{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34251, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9196227847362122e-05, "grad_norm": 1.8232885977127864, "learning_rate": 1.4594279042615296e-08, "loss": 1.1658, "step": 1 }, { "epoch": 5.8392455694724244e-05, "grad_norm": 1.900171083607522, "learning_rate": 2.918855808523059e-08, "loss": 1.0618, "step": 2 }, { "epoch": 8.758868354208637e-05, "grad_norm": 1.8158786994122884, "learning_rate": 4.3782837127845886e-08, "loss": 1.1538, "step": 3 }, { "epoch": 0.00011678491138944849, "grad_norm": 1.9276979751483978, "learning_rate": 5.837711617046118e-08, "loss": 1.2508, "step": 4 }, { "epoch": 0.0001459811392368106, "grad_norm": 1.8174637060672358, "learning_rate": 7.297139521307647e-08, "loss": 1.2495, "step": 5 }, { "epoch": 0.00017517736708417273, "grad_norm": 1.8535134738931633, "learning_rate": 8.756567425569177e-08, "loss": 1.3523, "step": 6 }, { "epoch": 0.00020437359493153485, "grad_norm": 2.089490114262868, "learning_rate": 1.0215995329830707e-07, "loss": 1.3484, "step": 7 }, { "epoch": 0.00023356982277889697, "grad_norm": 1.6895612935414044, "learning_rate": 1.1675423234092237e-07, "loss": 1.12, "step": 8 }, { "epoch": 0.0002627660506262591, "grad_norm": 1.8064853073114575, "learning_rate": 1.3134851138353765e-07, "loss": 1.1901, "step": 9 }, { "epoch": 0.0002919622784736212, "grad_norm": 2.1447189141295095, "learning_rate": 1.4594279042615295e-07, "loss": 1.2929, "step": 10 }, { "epoch": 0.00032115850632098334, "grad_norm": 1.9188251768452622, "learning_rate": 1.6053706946876824e-07, "loss": 1.0622, "step": 11 }, { "epoch": 0.00035035473416834546, "grad_norm": 1.7071759108313602, "learning_rate": 1.7513134851138354e-07, "loss": 1.1183, "step": 12 }, { "epoch": 0.0003795509620157076, "grad_norm": 2.1494462020687535, "learning_rate": 1.8972562755399884e-07, "loss": 1.2315, "step": 13 }, { "epoch": 0.0004087471898630697, "grad_norm": 2.2551127764703733, "learning_rate": 2.0431990659661414e-07, "loss": 1.213, "step": 14 }, { "epoch": 0.00043794341771043183, "grad_norm": 1.918462843794828, "learning_rate": 2.1891418563922943e-07, "loss": 1.1207, "step": 15 }, { "epoch": 0.00046713964555779395, "grad_norm": 1.7297240615463256, "learning_rate": 2.3350846468184473e-07, "loss": 1.2032, "step": 16 }, { "epoch": 0.000496335873405156, "grad_norm": 1.6709128043535746, "learning_rate": 2.4810274372446003e-07, "loss": 1.0673, "step": 17 }, { "epoch": 0.0005255321012525182, "grad_norm": 1.8827878198561043, "learning_rate": 2.626970227670753e-07, "loss": 1.1872, "step": 18 }, { "epoch": 0.0005547283290998803, "grad_norm": 1.896066921843873, "learning_rate": 2.772913018096906e-07, "loss": 1.2766, "step": 19 }, { "epoch": 0.0005839245569472424, "grad_norm": 1.8873301500781836, "learning_rate": 2.918855808523059e-07, "loss": 1.2389, "step": 20 }, { "epoch": 0.0006131207847946045, "grad_norm": 1.8303811830020165, "learning_rate": 3.064798598949212e-07, "loss": 1.2225, "step": 21 }, { "epoch": 0.0006423170126419667, "grad_norm": 2.0052350226865316, "learning_rate": 3.210741389375365e-07, "loss": 1.27, "step": 22 }, { "epoch": 0.0006715132404893287, "grad_norm": 1.8567452864165546, "learning_rate": 3.356684179801518e-07, "loss": 1.2416, "step": 23 }, { "epoch": 0.0007007094683366909, "grad_norm": 1.7350240245235253, "learning_rate": 3.502626970227671e-07, "loss": 1.1249, "step": 24 }, { "epoch": 0.000729905696184053, "grad_norm": 1.9961038714543187, "learning_rate": 3.648569760653824e-07, "loss": 1.1446, "step": 25 }, { "epoch": 0.0007591019240314152, "grad_norm": 2.1067972425798653, "learning_rate": 3.794512551079977e-07, "loss": 1.3392, "step": 26 }, { "epoch": 0.0007882981518787772, "grad_norm": 2.289343233908165, "learning_rate": 3.94045534150613e-07, "loss": 1.3642, "step": 27 }, { "epoch": 0.0008174943797261394, "grad_norm": 1.771608113005404, "learning_rate": 4.086398131932283e-07, "loss": 1.1542, "step": 28 }, { "epoch": 0.0008466906075735015, "grad_norm": 1.9680883507522284, "learning_rate": 4.232340922358436e-07, "loss": 1.1705, "step": 29 }, { "epoch": 0.0008758868354208637, "grad_norm": 1.8437601385992093, "learning_rate": 4.3782837127845887e-07, "loss": 1.2188, "step": 30 }, { "epoch": 0.0009050830632682257, "grad_norm": 1.7314389161519472, "learning_rate": 4.5242265032107414e-07, "loss": 1.1378, "step": 31 }, { "epoch": 0.0009342792911155879, "grad_norm": 1.7876140049651732, "learning_rate": 4.6701692936368946e-07, "loss": 1.202, "step": 32 }, { "epoch": 0.00096347551896295, "grad_norm": 1.9017993083000089, "learning_rate": 4.816112084063047e-07, "loss": 1.3269, "step": 33 }, { "epoch": 0.000992671746810312, "grad_norm": 1.8524473063991331, "learning_rate": 4.962054874489201e-07, "loss": 1.1891, "step": 34 }, { "epoch": 0.0010218679746576743, "grad_norm": 2.026327898514205, "learning_rate": 5.107997664915354e-07, "loss": 1.3536, "step": 35 }, { "epoch": 0.0010510642025050364, "grad_norm": 1.7851210489790204, "learning_rate": 5.253940455341506e-07, "loss": 1.1945, "step": 36 }, { "epoch": 0.0010802604303523985, "grad_norm": 2.129749214456274, "learning_rate": 5.399883245767659e-07, "loss": 1.4031, "step": 37 }, { "epoch": 0.0011094566581997605, "grad_norm": 1.8040984712709547, "learning_rate": 5.545826036193812e-07, "loss": 1.0971, "step": 38 }, { "epoch": 0.0011386528860471228, "grad_norm": 1.7576119765782996, "learning_rate": 5.691768826619966e-07, "loss": 1.216, "step": 39 }, { "epoch": 0.0011678491138944849, "grad_norm": 1.7736491639142957, "learning_rate": 5.837711617046118e-07, "loss": 1.0354, "step": 40 }, { "epoch": 0.001197045341741847, "grad_norm": 2.052712710730128, "learning_rate": 5.983654407472271e-07, "loss": 1.2291, "step": 41 }, { "epoch": 0.001226241569589209, "grad_norm": 1.7234051102785488, "learning_rate": 6.129597197898424e-07, "loss": 1.1734, "step": 42 }, { "epoch": 0.0012554377974365713, "grad_norm": 1.587072683654423, "learning_rate": 6.275539988324578e-07, "loss": 1.1672, "step": 43 }, { "epoch": 0.0012846340252839334, "grad_norm": 1.8415132713619642, "learning_rate": 6.42148277875073e-07, "loss": 1.2023, "step": 44 }, { "epoch": 0.0013138302531312954, "grad_norm": 2.1227003283920878, "learning_rate": 6.567425569176883e-07, "loss": 1.309, "step": 45 }, { "epoch": 0.0013430264809786575, "grad_norm": 2.031164868550998, "learning_rate": 6.713368359603036e-07, "loss": 1.1693, "step": 46 }, { "epoch": 0.0013722227088260198, "grad_norm": 1.768185179457972, "learning_rate": 6.859311150029188e-07, "loss": 1.0889, "step": 47 }, { "epoch": 0.0014014189366733818, "grad_norm": 1.8487527492629763, "learning_rate": 7.005253940455342e-07, "loss": 1.2569, "step": 48 }, { "epoch": 0.001430615164520744, "grad_norm": 1.6405904537977356, "learning_rate": 7.151196730881495e-07, "loss": 1.127, "step": 49 }, { "epoch": 0.001459811392368106, "grad_norm": 1.7879938173421128, "learning_rate": 7.297139521307648e-07, "loss": 1.1889, "step": 50 }, { "epoch": 0.0014890076202154683, "grad_norm": 1.9771790560824347, "learning_rate": 7.4430823117338e-07, "loss": 1.3521, "step": 51 }, { "epoch": 0.0015182038480628303, "grad_norm": 1.857003242151213, "learning_rate": 7.589025102159954e-07, "loss": 1.2528, "step": 52 }, { "epoch": 0.0015474000759101924, "grad_norm": 1.733656426103337, "learning_rate": 7.734967892586107e-07, "loss": 1.1844, "step": 53 }, { "epoch": 0.0015765963037575545, "grad_norm": 4.503741772870994, "learning_rate": 7.88091068301226e-07, "loss": 1.1908, "step": 54 }, { "epoch": 0.0016057925316049165, "grad_norm": 1.823378487739193, "learning_rate": 8.026853473438412e-07, "loss": 1.1606, "step": 55 }, { "epoch": 0.0016349887594522788, "grad_norm": 1.8064393330204211, "learning_rate": 8.172796263864565e-07, "loss": 1.2196, "step": 56 }, { "epoch": 0.0016641849872996409, "grad_norm": 1.8448387400070199, "learning_rate": 8.318739054290719e-07, "loss": 1.2654, "step": 57 }, { "epoch": 0.001693381215147003, "grad_norm": 1.8060558295431375, "learning_rate": 8.464681844716872e-07, "loss": 1.1644, "step": 58 }, { "epoch": 0.001722577442994365, "grad_norm": 1.6173894378215845, "learning_rate": 8.610624635143024e-07, "loss": 1.138, "step": 59 }, { "epoch": 0.0017517736708417273, "grad_norm": 1.7047871500944587, "learning_rate": 8.756567425569177e-07, "loss": 1.185, "step": 60 }, { "epoch": 0.0017809698986890894, "grad_norm": 1.6162503098507057, "learning_rate": 8.902510215995331e-07, "loss": 1.2075, "step": 61 }, { "epoch": 0.0018101661265364514, "grad_norm": 1.6469372700227325, "learning_rate": 9.048453006421483e-07, "loss": 1.2031, "step": 62 }, { "epoch": 0.0018393623543838135, "grad_norm": 1.7979353358827863, "learning_rate": 9.194395796847636e-07, "loss": 1.2642, "step": 63 }, { "epoch": 0.0018685585822311758, "grad_norm": 1.841104488275457, "learning_rate": 9.340338587273789e-07, "loss": 1.2171, "step": 64 }, { "epoch": 0.0018977548100785379, "grad_norm": 1.7503435558283411, "learning_rate": 9.486281377699943e-07, "loss": 1.2734, "step": 65 }, { "epoch": 0.0019269510379259, "grad_norm": 1.8771679008875295, "learning_rate": 9.632224168126095e-07, "loss": 1.1376, "step": 66 }, { "epoch": 0.001956147265773262, "grad_norm": 1.5061054937700078, "learning_rate": 9.77816695855225e-07, "loss": 1.0081, "step": 67 }, { "epoch": 0.001985343493620624, "grad_norm": 1.6494471763237337, "learning_rate": 9.924109748978401e-07, "loss": 1.1998, "step": 68 }, { "epoch": 0.002014539721467986, "grad_norm": 1.7573146672098219, "learning_rate": 1.0070052539404553e-06, "loss": 1.2938, "step": 69 }, { "epoch": 0.0020437359493153486, "grad_norm": 1.6245221437717705, "learning_rate": 1.0215995329830708e-06, "loss": 1.1416, "step": 70 }, { "epoch": 0.0020729321771627107, "grad_norm": 1.6255727556776651, "learning_rate": 1.036193812025686e-06, "loss": 1.1026, "step": 71 }, { "epoch": 0.0021021284050100728, "grad_norm": 1.4502497993487562, "learning_rate": 1.0507880910683012e-06, "loss": 1.165, "step": 72 }, { "epoch": 0.002131324632857435, "grad_norm": 1.520021166562745, "learning_rate": 1.0653823701109166e-06, "loss": 1.0909, "step": 73 }, { "epoch": 0.002160520860704797, "grad_norm": 1.4333092204719236, "learning_rate": 1.0799766491535318e-06, "loss": 1.1923, "step": 74 }, { "epoch": 0.002189717088552159, "grad_norm": 1.486936718752591, "learning_rate": 1.094570928196147e-06, "loss": 0.9723, "step": 75 }, { "epoch": 0.002218913316399521, "grad_norm": 1.38532790107885, "learning_rate": 1.1091652072387625e-06, "loss": 1.1103, "step": 76 }, { "epoch": 0.002248109544246883, "grad_norm": 1.5275687044351844, "learning_rate": 1.1237594862813777e-06, "loss": 1.1523, "step": 77 }, { "epoch": 0.0022773057720942456, "grad_norm": 1.4816711578885409, "learning_rate": 1.1383537653239931e-06, "loss": 1.1935, "step": 78 }, { "epoch": 0.0023065019999416077, "grad_norm": 1.4673615007482554, "learning_rate": 1.1529480443666084e-06, "loss": 1.1778, "step": 79 }, { "epoch": 0.0023356982277889697, "grad_norm": 1.3742075089156245, "learning_rate": 1.1675423234092236e-06, "loss": 1.0491, "step": 80 }, { "epoch": 0.002364894455636332, "grad_norm": 1.3680174092929698, "learning_rate": 1.182136602451839e-06, "loss": 1.1901, "step": 81 }, { "epoch": 0.002394090683483694, "grad_norm": 1.3181622799105481, "learning_rate": 1.1967308814944542e-06, "loss": 1.0172, "step": 82 }, { "epoch": 0.002423286911331056, "grad_norm": 1.3783776156234147, "learning_rate": 1.2113251605370694e-06, "loss": 1.0657, "step": 83 }, { "epoch": 0.002452483139178418, "grad_norm": 1.384999299767344, "learning_rate": 1.2259194395796849e-06, "loss": 1.142, "step": 84 }, { "epoch": 0.00248167936702578, "grad_norm": 1.5486512586903456, "learning_rate": 1.2405137186223e-06, "loss": 1.0465, "step": 85 }, { "epoch": 0.0025108755948731426, "grad_norm": 1.4753206636638883, "learning_rate": 1.2551079976649155e-06, "loss": 1.0834, "step": 86 }, { "epoch": 0.0025400718227205047, "grad_norm": 1.3507026405427929, "learning_rate": 1.2697022767075307e-06, "loss": 1.1652, "step": 87 }, { "epoch": 0.0025692680505678667, "grad_norm": 1.3338924303978734, "learning_rate": 1.284296555750146e-06, "loss": 1.0711, "step": 88 }, { "epoch": 0.002598464278415229, "grad_norm": 1.5955982978850813, "learning_rate": 1.2988908347927614e-06, "loss": 1.0177, "step": 89 }, { "epoch": 0.002627660506262591, "grad_norm": 1.3696204263951757, "learning_rate": 1.3134851138353766e-06, "loss": 1.1098, "step": 90 }, { "epoch": 0.002656856734109953, "grad_norm": 1.5116047842181264, "learning_rate": 1.3280793928779918e-06, "loss": 1.1937, "step": 91 }, { "epoch": 0.002686052961957315, "grad_norm": 1.5396966798425495, "learning_rate": 1.3426736719206073e-06, "loss": 1.0736, "step": 92 }, { "epoch": 0.002715249189804677, "grad_norm": 1.3526893110537448, "learning_rate": 1.3572679509632225e-06, "loss": 1.1578, "step": 93 }, { "epoch": 0.0027444454176520396, "grad_norm": 1.2875295658962032, "learning_rate": 1.3718622300058377e-06, "loss": 1.0699, "step": 94 }, { "epoch": 0.0027736416454994016, "grad_norm": 1.211946927920337, "learning_rate": 1.3864565090484531e-06, "loss": 0.991, "step": 95 }, { "epoch": 0.0028028378733467637, "grad_norm": 1.3786428214830464, "learning_rate": 1.4010507880910683e-06, "loss": 1.0645, "step": 96 }, { "epoch": 0.0028320341011941258, "grad_norm": 1.3154204059586527, "learning_rate": 1.4156450671336838e-06, "loss": 1.0445, "step": 97 }, { "epoch": 0.002861230329041488, "grad_norm": 1.184137249131547, "learning_rate": 1.430239346176299e-06, "loss": 0.9546, "step": 98 }, { "epoch": 0.00289042655688885, "grad_norm": 1.200259008109693, "learning_rate": 1.4448336252189142e-06, "loss": 0.9565, "step": 99 }, { "epoch": 0.002919622784736212, "grad_norm": 1.3760314465798176, "learning_rate": 1.4594279042615296e-06, "loss": 1.0161, "step": 100 }, { "epoch": 0.002948819012583574, "grad_norm": 1.689214203453813, "learning_rate": 1.4740221833041449e-06, "loss": 1.144, "step": 101 }, { "epoch": 0.0029780152404309365, "grad_norm": 2.239117042008185, "learning_rate": 1.48861646234676e-06, "loss": 1.1108, "step": 102 }, { "epoch": 0.0030072114682782986, "grad_norm": 1.1905850318445317, "learning_rate": 1.5032107413893755e-06, "loss": 0.931, "step": 103 }, { "epoch": 0.0030364076961256607, "grad_norm": 1.3689712915046741, "learning_rate": 1.5178050204319907e-06, "loss": 1.1904, "step": 104 }, { "epoch": 0.0030656039239730227, "grad_norm": 1.2978245767246879, "learning_rate": 1.532399299474606e-06, "loss": 1.0852, "step": 105 }, { "epoch": 0.003094800151820385, "grad_norm": 1.285931970836353, "learning_rate": 1.5469935785172214e-06, "loss": 1.0944, "step": 106 }, { "epoch": 0.003123996379667747, "grad_norm": 1.28984148320918, "learning_rate": 1.5615878575598366e-06, "loss": 1.061, "step": 107 }, { "epoch": 0.003153192607515109, "grad_norm": 1.2152433069557775, "learning_rate": 1.576182136602452e-06, "loss": 1.0812, "step": 108 }, { "epoch": 0.003182388835362471, "grad_norm": 1.1257470510925138, "learning_rate": 1.590776415645067e-06, "loss": 1.0501, "step": 109 }, { "epoch": 0.003211585063209833, "grad_norm": 1.0992766383827832, "learning_rate": 1.6053706946876824e-06, "loss": 0.9705, "step": 110 }, { "epoch": 0.0032407812910571956, "grad_norm": 1.3762817564584937, "learning_rate": 1.6199649737302977e-06, "loss": 1.0467, "step": 111 }, { "epoch": 0.0032699775189045576, "grad_norm": 1.21794156372755, "learning_rate": 1.634559252772913e-06, "loss": 1.0218, "step": 112 }, { "epoch": 0.0032991737467519197, "grad_norm": 1.231070919206575, "learning_rate": 1.6491535318155283e-06, "loss": 1.0856, "step": 113 }, { "epoch": 0.0033283699745992818, "grad_norm": 1.1007408366530145, "learning_rate": 1.6637478108581437e-06, "loss": 0.9758, "step": 114 }, { "epoch": 0.003357566202446644, "grad_norm": 1.0502895003294694, "learning_rate": 1.6783420899007588e-06, "loss": 1.0077, "step": 115 }, { "epoch": 0.003386762430294006, "grad_norm": 1.1632346723686056, "learning_rate": 1.6929363689433744e-06, "loss": 1.0112, "step": 116 }, { "epoch": 0.003415958658141368, "grad_norm": 1.2468169817196477, "learning_rate": 1.7075306479859894e-06, "loss": 1.0423, "step": 117 }, { "epoch": 0.00344515488598873, "grad_norm": 1.1267447058133897, "learning_rate": 1.7221249270286048e-06, "loss": 1.0658, "step": 118 }, { "epoch": 0.0034743511138360926, "grad_norm": 1.1938284974311977, "learning_rate": 1.73671920607122e-06, "loss": 1.049, "step": 119 }, { "epoch": 0.0035035473416834546, "grad_norm": 1.1052756004069584, "learning_rate": 1.7513134851138355e-06, "loss": 0.979, "step": 120 }, { "epoch": 0.0035327435695308167, "grad_norm": 1.6710109871707688, "learning_rate": 1.7659077641564507e-06, "loss": 0.9931, "step": 121 }, { "epoch": 0.0035619397973781788, "grad_norm": 1.0712532083373887, "learning_rate": 1.7805020431990661e-06, "loss": 1.1078, "step": 122 }, { "epoch": 0.003591136025225541, "grad_norm": 1.3024179502293871, "learning_rate": 1.7950963222416811e-06, "loss": 1.0709, "step": 123 }, { "epoch": 0.003620332253072903, "grad_norm": 1.2757382421465746, "learning_rate": 1.8096906012842966e-06, "loss": 1.0759, "step": 124 }, { "epoch": 0.003649528480920265, "grad_norm": 1.0795092059622549, "learning_rate": 1.8242848803269118e-06, "loss": 1.0274, "step": 125 }, { "epoch": 0.003678724708767627, "grad_norm": 1.132490417962277, "learning_rate": 1.8388791593695272e-06, "loss": 1.1126, "step": 126 }, { "epoch": 0.0037079209366149895, "grad_norm": 1.0463417928305834, "learning_rate": 1.8534734384121424e-06, "loss": 1.04, "step": 127 }, { "epoch": 0.0037371171644623516, "grad_norm": 1.013259809281362, "learning_rate": 1.8680677174547579e-06, "loss": 0.957, "step": 128 }, { "epoch": 0.0037663133923097137, "grad_norm": 1.1634356417716583, "learning_rate": 1.8826619964973729e-06, "loss": 1.0035, "step": 129 }, { "epoch": 0.0037955096201570757, "grad_norm": 1.0039472822483884, "learning_rate": 1.8972562755399885e-06, "loss": 0.9836, "step": 130 }, { "epoch": 0.003824705848004438, "grad_norm": 1.0473969893981843, "learning_rate": 1.9118505545826035e-06, "loss": 0.8997, "step": 131 }, { "epoch": 0.0038539020758518, "grad_norm": 1.757592089142241, "learning_rate": 1.926444833625219e-06, "loss": 1.015, "step": 132 }, { "epoch": 0.003883098303699162, "grad_norm": 0.9577553218113755, "learning_rate": 1.941039112667834e-06, "loss": 0.9293, "step": 133 }, { "epoch": 0.003912294531546524, "grad_norm": 1.3240678582482708, "learning_rate": 1.95563339171045e-06, "loss": 0.9651, "step": 134 }, { "epoch": 0.003941490759393886, "grad_norm": 1.0579004871040845, "learning_rate": 1.970227670753065e-06, "loss": 1.0137, "step": 135 }, { "epoch": 0.003970686987241248, "grad_norm": 1.5126225985636013, "learning_rate": 1.9848219497956802e-06, "loss": 1.0964, "step": 136 }, { "epoch": 0.00399988321508861, "grad_norm": 0.9465567926686593, "learning_rate": 1.9994162288382952e-06, "loss": 1.0025, "step": 137 }, { "epoch": 0.004029079442935972, "grad_norm": 1.138938969278345, "learning_rate": 2.0140105078809107e-06, "loss": 0.9464, "step": 138 }, { "epoch": 0.004058275670783335, "grad_norm": 1.0805207405007615, "learning_rate": 2.028604786923526e-06, "loss": 0.9676, "step": 139 }, { "epoch": 0.004087471898630697, "grad_norm": 1.185814630098316, "learning_rate": 2.0431990659661415e-06, "loss": 1.0481, "step": 140 }, { "epoch": 0.004116668126478059, "grad_norm": 1.031356043033653, "learning_rate": 2.0577933450087565e-06, "loss": 1.0072, "step": 141 }, { "epoch": 0.004145864354325421, "grad_norm": 0.9306247004279775, "learning_rate": 2.072387624051372e-06, "loss": 0.9902, "step": 142 }, { "epoch": 0.0041750605821727835, "grad_norm": 0.9335493241855881, "learning_rate": 2.086981903093987e-06, "loss": 0.9809, "step": 143 }, { "epoch": 0.0042042568100201455, "grad_norm": 1.1403275138478777, "learning_rate": 2.1015761821366024e-06, "loss": 1.0176, "step": 144 }, { "epoch": 0.004233453037867508, "grad_norm": 1.1228282667940361, "learning_rate": 2.116170461179218e-06, "loss": 0.9956, "step": 145 }, { "epoch": 0.00426264926571487, "grad_norm": 0.903333500822075, "learning_rate": 2.1307647402218333e-06, "loss": 0.9286, "step": 146 }, { "epoch": 0.004291845493562232, "grad_norm": 1.0632411964553037, "learning_rate": 2.1453590192644483e-06, "loss": 0.8691, "step": 147 }, { "epoch": 0.004321041721409594, "grad_norm": 0.9412529202341693, "learning_rate": 2.1599532983070637e-06, "loss": 0.9756, "step": 148 }, { "epoch": 0.004350237949256956, "grad_norm": 1.1836365559505113, "learning_rate": 2.1745475773496787e-06, "loss": 0.9624, "step": 149 }, { "epoch": 0.004379434177104318, "grad_norm": 1.3822391501209865, "learning_rate": 2.189141856392294e-06, "loss": 0.9574, "step": 150 }, { "epoch": 0.00440863040495168, "grad_norm": 0.8896180459613094, "learning_rate": 2.2037361354349096e-06, "loss": 0.8415, "step": 151 }, { "epoch": 0.004437826632799042, "grad_norm": 1.1114664393495783, "learning_rate": 2.218330414477525e-06, "loss": 1.0624, "step": 152 }, { "epoch": 0.004467022860646404, "grad_norm": 0.9644946701118168, "learning_rate": 2.23292469352014e-06, "loss": 0.9808, "step": 153 }, { "epoch": 0.004496219088493766, "grad_norm": 1.0292814497111122, "learning_rate": 2.2475189725627554e-06, "loss": 1.0183, "step": 154 }, { "epoch": 0.004525415316341129, "grad_norm": 1.165329053630331, "learning_rate": 2.2621132516053704e-06, "loss": 0.9482, "step": 155 }, { "epoch": 0.004554611544188491, "grad_norm": 1.1765703174970588, "learning_rate": 2.2767075306479863e-06, "loss": 1.0419, "step": 156 }, { "epoch": 0.004583807772035853, "grad_norm": 1.5225860779716125, "learning_rate": 2.2913018096906013e-06, "loss": 0.8919, "step": 157 }, { "epoch": 0.004613003999883215, "grad_norm": 0.9031069146602455, "learning_rate": 2.3058960887332167e-06, "loss": 0.879, "step": 158 }, { "epoch": 0.004642200227730577, "grad_norm": 1.208489977792828, "learning_rate": 2.3204903677758317e-06, "loss": 0.9806, "step": 159 }, { "epoch": 0.0046713964555779395, "grad_norm": 0.8890927162943121, "learning_rate": 2.335084646818447e-06, "loss": 0.8145, "step": 160 }, { "epoch": 0.0047005926834253016, "grad_norm": 0.9297966573083872, "learning_rate": 2.3496789258610626e-06, "loss": 0.9435, "step": 161 }, { "epoch": 0.004729788911272664, "grad_norm": 0.9090130318698475, "learning_rate": 2.364273204903678e-06, "loss": 1.0399, "step": 162 }, { "epoch": 0.004758985139120026, "grad_norm": 1.169771145183509, "learning_rate": 2.378867483946293e-06, "loss": 1.0265, "step": 163 }, { "epoch": 0.004788181366967388, "grad_norm": 0.9242332337520442, "learning_rate": 2.3934617629889085e-06, "loss": 0.9233, "step": 164 }, { "epoch": 0.00481737759481475, "grad_norm": 0.9857251844579197, "learning_rate": 2.4080560420315235e-06, "loss": 0.951, "step": 165 }, { "epoch": 0.004846573822662112, "grad_norm": 1.0126978932415271, "learning_rate": 2.422650321074139e-06, "loss": 0.977, "step": 166 }, { "epoch": 0.004875770050509474, "grad_norm": 1.0874080898381198, "learning_rate": 2.4372446001167543e-06, "loss": 0.7823, "step": 167 }, { "epoch": 0.004904966278356836, "grad_norm": 1.084382998563676, "learning_rate": 2.4518388791593698e-06, "loss": 1.0488, "step": 168 }, { "epoch": 0.004934162506204198, "grad_norm": 0.9825167091629777, "learning_rate": 2.4664331582019848e-06, "loss": 0.9644, "step": 169 }, { "epoch": 0.00496335873405156, "grad_norm": 1.0650384144787692, "learning_rate": 2.4810274372446e-06, "loss": 0.8397, "step": 170 }, { "epoch": 0.004992554961898922, "grad_norm": 0.9781291783108043, "learning_rate": 2.495621716287215e-06, "loss": 1.0033, "step": 171 }, { "epoch": 0.005021751189746285, "grad_norm": 1.1028049153780939, "learning_rate": 2.510215995329831e-06, "loss": 0.9428, "step": 172 }, { "epoch": 0.005050947417593647, "grad_norm": 0.9512484804537188, "learning_rate": 2.524810274372446e-06, "loss": 0.9353, "step": 173 }, { "epoch": 0.005080143645441009, "grad_norm": 1.2289133304656723, "learning_rate": 2.5394045534150615e-06, "loss": 0.9572, "step": 174 }, { "epoch": 0.005109339873288371, "grad_norm": 1.2715988032669254, "learning_rate": 2.5539988324576765e-06, "loss": 0.9243, "step": 175 }, { "epoch": 0.0051385361011357334, "grad_norm": 0.9031393061809833, "learning_rate": 2.568593111500292e-06, "loss": 0.9289, "step": 176 }, { "epoch": 0.0051677323289830955, "grad_norm": 0.8401268841430755, "learning_rate": 2.5831873905429073e-06, "loss": 0.8755, "step": 177 }, { "epoch": 0.005196928556830458, "grad_norm": 1.3422237720597325, "learning_rate": 2.5977816695855228e-06, "loss": 0.9006, "step": 178 }, { "epoch": 0.00522612478467782, "grad_norm": 0.9087499235978718, "learning_rate": 2.6123759486281378e-06, "loss": 0.9028, "step": 179 }, { "epoch": 0.005255321012525182, "grad_norm": 0.9120930309305735, "learning_rate": 2.6269702276707532e-06, "loss": 0.7872, "step": 180 }, { "epoch": 0.005284517240372544, "grad_norm": 0.8901924064288239, "learning_rate": 2.6415645067133682e-06, "loss": 0.7925, "step": 181 }, { "epoch": 0.005313713468219906, "grad_norm": 0.9772460963252044, "learning_rate": 2.6561587857559837e-06, "loss": 0.9263, "step": 182 }, { "epoch": 0.005342909696067268, "grad_norm": 0.9515163961105888, "learning_rate": 2.670753064798599e-06, "loss": 0.9951, "step": 183 }, { "epoch": 0.00537210592391463, "grad_norm": 0.8161563142552721, "learning_rate": 2.6853473438412145e-06, "loss": 0.7991, "step": 184 }, { "epoch": 0.005401302151761992, "grad_norm": 0.9394704806024653, "learning_rate": 2.6999416228838295e-06, "loss": 0.94, "step": 185 }, { "epoch": 0.005430498379609354, "grad_norm": 1.2886502607038306, "learning_rate": 2.714535901926445e-06, "loss": 0.9375, "step": 186 }, { "epoch": 0.005459694607456716, "grad_norm": 0.8163593678688822, "learning_rate": 2.72913018096906e-06, "loss": 0.8302, "step": 187 }, { "epoch": 0.005488890835304079, "grad_norm": 0.9189165457861252, "learning_rate": 2.7437244600116754e-06, "loss": 0.9437, "step": 188 }, { "epoch": 0.005518087063151441, "grad_norm": 0.8476594634103597, "learning_rate": 2.758318739054291e-06, "loss": 0.8587, "step": 189 }, { "epoch": 0.005547283290998803, "grad_norm": 1.04210534209269, "learning_rate": 2.7729130180969062e-06, "loss": 1.0417, "step": 190 }, { "epoch": 0.005576479518846165, "grad_norm": 0.8446724087112821, "learning_rate": 2.7875072971395212e-06, "loss": 0.841, "step": 191 }, { "epoch": 0.005605675746693527, "grad_norm": 0.8839396526506146, "learning_rate": 2.8021015761821367e-06, "loss": 0.9206, "step": 192 }, { "epoch": 0.0056348719745408895, "grad_norm": 0.9703189577552157, "learning_rate": 2.8166958552247517e-06, "loss": 0.9529, "step": 193 }, { "epoch": 0.0056640682023882515, "grad_norm": 1.0542481849712255, "learning_rate": 2.8312901342673675e-06, "loss": 0.9489, "step": 194 }, { "epoch": 0.005693264430235614, "grad_norm": 1.691901020242629, "learning_rate": 2.8458844133099825e-06, "loss": 0.8757, "step": 195 }, { "epoch": 0.005722460658082976, "grad_norm": 0.9058422301065425, "learning_rate": 2.860478692352598e-06, "loss": 0.8225, "step": 196 }, { "epoch": 0.005751656885930338, "grad_norm": 0.8041321788215342, "learning_rate": 2.875072971395213e-06, "loss": 0.8187, "step": 197 }, { "epoch": 0.0057808531137777, "grad_norm": 0.893843883007661, "learning_rate": 2.8896672504378284e-06, "loss": 0.8961, "step": 198 }, { "epoch": 0.005810049341625062, "grad_norm": 1.0321508205976806, "learning_rate": 2.904261529480444e-06, "loss": 0.8537, "step": 199 }, { "epoch": 0.005839245569472424, "grad_norm": 1.1212527829471353, "learning_rate": 2.9188558085230593e-06, "loss": 0.8952, "step": 200 }, { "epoch": 0.005868441797319786, "grad_norm": 0.9711354848378162, "learning_rate": 2.9334500875656743e-06, "loss": 1.0242, "step": 201 }, { "epoch": 0.005897638025167148, "grad_norm": 0.947493405482381, "learning_rate": 2.9480443666082897e-06, "loss": 0.9499, "step": 202 }, { "epoch": 0.00592683425301451, "grad_norm": 1.4915388762498571, "learning_rate": 2.9626386456509047e-06, "loss": 1.0055, "step": 203 }, { "epoch": 0.005956030480861873, "grad_norm": 1.0331286356669929, "learning_rate": 2.97723292469352e-06, "loss": 0.8878, "step": 204 }, { "epoch": 0.005985226708709235, "grad_norm": 0.9391768053831648, "learning_rate": 2.9918272037361356e-06, "loss": 1.0444, "step": 205 }, { "epoch": 0.006014422936556597, "grad_norm": 0.8515961324204081, "learning_rate": 3.006421482778751e-06, "loss": 0.8774, "step": 206 }, { "epoch": 0.006043619164403959, "grad_norm": 1.0475447849932602, "learning_rate": 3.021015761821366e-06, "loss": 1.0706, "step": 207 }, { "epoch": 0.006072815392251321, "grad_norm": 0.9501944831162248, "learning_rate": 3.0356100408639814e-06, "loss": 0.8994, "step": 208 }, { "epoch": 0.006102011620098683, "grad_norm": 1.0499659679195434, "learning_rate": 3.0502043199065964e-06, "loss": 0.9787, "step": 209 }, { "epoch": 0.0061312078479460455, "grad_norm": 1.0141689933027473, "learning_rate": 3.064798598949212e-06, "loss": 0.7888, "step": 210 }, { "epoch": 0.0061604040757934075, "grad_norm": 0.762090361941835, "learning_rate": 3.0793928779918273e-06, "loss": 0.7845, "step": 211 }, { "epoch": 0.00618960030364077, "grad_norm": 1.4720708148223058, "learning_rate": 3.0939871570344427e-06, "loss": 0.8966, "step": 212 }, { "epoch": 0.006218796531488132, "grad_norm": 0.9400747074218713, "learning_rate": 3.1085814360770577e-06, "loss": 0.898, "step": 213 }, { "epoch": 0.006247992759335494, "grad_norm": 0.9707690455015591, "learning_rate": 3.123175715119673e-06, "loss": 0.9419, "step": 214 }, { "epoch": 0.006277188987182856, "grad_norm": 1.1259840369847103, "learning_rate": 3.1377699941622886e-06, "loss": 0.9484, "step": 215 }, { "epoch": 0.006306385215030218, "grad_norm": 0.9839173425240251, "learning_rate": 3.152364273204904e-06, "loss": 1.0228, "step": 216 }, { "epoch": 0.00633558144287758, "grad_norm": 0.8561409051352945, "learning_rate": 3.166958552247519e-06, "loss": 0.8061, "step": 217 }, { "epoch": 0.006364777670724942, "grad_norm": 0.7849169981533636, "learning_rate": 3.181552831290134e-06, "loss": 0.7902, "step": 218 }, { "epoch": 0.006393973898572304, "grad_norm": 0.8176644789323954, "learning_rate": 3.19614711033275e-06, "loss": 0.7511, "step": 219 }, { "epoch": 0.006423170126419666, "grad_norm": 1.0467705278584805, "learning_rate": 3.210741389375365e-06, "loss": 0.996, "step": 220 }, { "epoch": 0.006452366354267029, "grad_norm": 0.902299454039246, "learning_rate": 3.2253356684179803e-06, "loss": 0.917, "step": 221 }, { "epoch": 0.006481562582114391, "grad_norm": 0.9373227607566746, "learning_rate": 3.2399299474605953e-06, "loss": 0.8408, "step": 222 }, { "epoch": 0.006510758809961753, "grad_norm": 0.8975650693385484, "learning_rate": 3.254524226503211e-06, "loss": 0.9606, "step": 223 }, { "epoch": 0.006539955037809115, "grad_norm": 0.8856844325959359, "learning_rate": 3.269118505545826e-06, "loss": 0.8179, "step": 224 }, { "epoch": 0.006569151265656477, "grad_norm": 0.8824897795156842, "learning_rate": 3.283712784588441e-06, "loss": 0.8295, "step": 225 }, { "epoch": 0.006598347493503839, "grad_norm": 0.8424964280635112, "learning_rate": 3.2983070636310566e-06, "loss": 0.8475, "step": 226 }, { "epoch": 0.0066275437213512015, "grad_norm": 0.8391175839708136, "learning_rate": 3.3129013426736725e-06, "loss": 0.8414, "step": 227 }, { "epoch": 0.0066567399491985636, "grad_norm": 1.1510754499063325, "learning_rate": 3.3274956217162875e-06, "loss": 0.9241, "step": 228 }, { "epoch": 0.006685936177045926, "grad_norm": 0.8927096171497888, "learning_rate": 3.3420899007589025e-06, "loss": 0.9774, "step": 229 }, { "epoch": 0.006715132404893288, "grad_norm": 0.9239819321203334, "learning_rate": 3.3566841798015175e-06, "loss": 0.8937, "step": 230 }, { "epoch": 0.00674432863274065, "grad_norm": 1.0450822680279939, "learning_rate": 3.3712784588441334e-06, "loss": 0.9669, "step": 231 }, { "epoch": 0.006773524860588012, "grad_norm": 0.8226390804981855, "learning_rate": 3.3858727378867488e-06, "loss": 0.8444, "step": 232 }, { "epoch": 0.006802721088435374, "grad_norm": 0.8036946524770139, "learning_rate": 3.400467016929364e-06, "loss": 0.796, "step": 233 }, { "epoch": 0.006831917316282736, "grad_norm": 0.8417045253389825, "learning_rate": 3.415061295971979e-06, "loss": 0.8151, "step": 234 }, { "epoch": 0.006861113544130098, "grad_norm": 0.8367182516570683, "learning_rate": 3.4296555750145947e-06, "loss": 0.7678, "step": 235 }, { "epoch": 0.00689030977197746, "grad_norm": 0.8883181158098531, "learning_rate": 3.4442498540572097e-06, "loss": 0.7811, "step": 236 }, { "epoch": 0.006919505999824823, "grad_norm": 0.9017570386041835, "learning_rate": 3.458844133099825e-06, "loss": 0.9243, "step": 237 }, { "epoch": 0.006948702227672185, "grad_norm": 0.9235145102714052, "learning_rate": 3.47343841214244e-06, "loss": 0.9095, "step": 238 }, { "epoch": 0.006977898455519547, "grad_norm": 0.850516892696132, "learning_rate": 3.488032691185056e-06, "loss": 0.8311, "step": 239 }, { "epoch": 0.007007094683366909, "grad_norm": 1.0109460818735487, "learning_rate": 3.502626970227671e-06, "loss": 0.9775, "step": 240 }, { "epoch": 0.007036290911214271, "grad_norm": 1.2415425955864212, "learning_rate": 3.517221249270286e-06, "loss": 0.942, "step": 241 }, { "epoch": 0.007065487139061633, "grad_norm": 0.8168862956287893, "learning_rate": 3.5318155283129014e-06, "loss": 0.7803, "step": 242 }, { "epoch": 0.0070946833669089954, "grad_norm": 0.8634415883800228, "learning_rate": 3.546409807355517e-06, "loss": 0.9141, "step": 243 }, { "epoch": 0.0071238795947563575, "grad_norm": 0.9035507985924348, "learning_rate": 3.5610040863981322e-06, "loss": 0.8429, "step": 244 }, { "epoch": 0.00715307582260372, "grad_norm": 1.5180104741790088, "learning_rate": 3.5755983654407473e-06, "loss": 1.0311, "step": 245 }, { "epoch": 0.007182272050451082, "grad_norm": 0.9333629101408514, "learning_rate": 3.5901926444833623e-06, "loss": 0.9496, "step": 246 }, { "epoch": 0.007211468278298444, "grad_norm": 0.9446703029025993, "learning_rate": 3.604786923525978e-06, "loss": 0.916, "step": 247 }, { "epoch": 0.007240664506145806, "grad_norm": 0.9075728709274778, "learning_rate": 3.619381202568593e-06, "loss": 0.7597, "step": 248 }, { "epoch": 0.007269860733993168, "grad_norm": 0.8243001494875135, "learning_rate": 3.6339754816112086e-06, "loss": 0.8439, "step": 249 }, { "epoch": 0.00729905696184053, "grad_norm": 0.8586567410882128, "learning_rate": 3.6485697606538236e-06, "loss": 0.8214, "step": 250 }, { "epoch": 0.007328253189687892, "grad_norm": 1.0680912670644116, "learning_rate": 3.6631640396964394e-06, "loss": 1.009, "step": 251 }, { "epoch": 0.007357449417535254, "grad_norm": 0.9728474409043794, "learning_rate": 3.6777583187390544e-06, "loss": 0.9883, "step": 252 }, { "epoch": 0.007386645645382617, "grad_norm": 0.84016726586601, "learning_rate": 3.6923525977816694e-06, "loss": 0.8913, "step": 253 }, { "epoch": 0.007415841873229979, "grad_norm": 1.0745249724931596, "learning_rate": 3.706946876824285e-06, "loss": 0.9142, "step": 254 }, { "epoch": 0.007445038101077341, "grad_norm": 0.7460106368653882, "learning_rate": 3.7215411558669007e-06, "loss": 0.7104, "step": 255 }, { "epoch": 0.007474234328924703, "grad_norm": 1.0232218288438573, "learning_rate": 3.7361354349095157e-06, "loss": 0.751, "step": 256 }, { "epoch": 0.007503430556772065, "grad_norm": 0.9695926456459383, "learning_rate": 3.7507297139521307e-06, "loss": 0.925, "step": 257 }, { "epoch": 0.007532626784619427, "grad_norm": 0.9262732242833117, "learning_rate": 3.7653239929947457e-06, "loss": 0.8057, "step": 258 }, { "epoch": 0.007561823012466789, "grad_norm": 0.9569848586526071, "learning_rate": 3.7799182720373616e-06, "loss": 0.8449, "step": 259 }, { "epoch": 0.0075910192403141515, "grad_norm": 0.9021896298926787, "learning_rate": 3.794512551079977e-06, "loss": 0.8799, "step": 260 }, { "epoch": 0.0076202154681615135, "grad_norm": 0.8690431976289708, "learning_rate": 3.809106830122592e-06, "loss": 0.9191, "step": 261 }, { "epoch": 0.007649411696008876, "grad_norm": 0.8682876746100805, "learning_rate": 3.823701109165207e-06, "loss": 0.8648, "step": 262 }, { "epoch": 0.007678607923856238, "grad_norm": 0.9904322724591341, "learning_rate": 3.838295388207823e-06, "loss": 1.0185, "step": 263 }, { "epoch": 0.0077078041517036, "grad_norm": 0.723962698118826, "learning_rate": 3.852889667250438e-06, "loss": 0.742, "step": 264 }, { "epoch": 0.007737000379550962, "grad_norm": 1.1668900468740813, "learning_rate": 3.867483946293053e-06, "loss": 0.7813, "step": 265 }, { "epoch": 0.007766196607398324, "grad_norm": 1.0011367631089403, "learning_rate": 3.882078225335668e-06, "loss": 1.0329, "step": 266 }, { "epoch": 0.007795392835245686, "grad_norm": 0.90956548724966, "learning_rate": 3.896672504378284e-06, "loss": 0.8342, "step": 267 }, { "epoch": 0.007824589063093048, "grad_norm": 1.051956533702692, "learning_rate": 3.9112667834209e-06, "loss": 0.7646, "step": 268 }, { "epoch": 0.007853785290940411, "grad_norm": 0.8260670860950688, "learning_rate": 3.925861062463515e-06, "loss": 0.783, "step": 269 }, { "epoch": 0.007882981518787772, "grad_norm": 0.8448938118048599, "learning_rate": 3.94045534150613e-06, "loss": 0.8588, "step": 270 }, { "epoch": 0.007912177746635135, "grad_norm": 0.8182008547249761, "learning_rate": 3.9550496205487455e-06, "loss": 0.7688, "step": 271 }, { "epoch": 0.007941373974482496, "grad_norm": 0.8225139902094145, "learning_rate": 3.9696438995913605e-06, "loss": 0.8144, "step": 272 }, { "epoch": 0.00797057020232986, "grad_norm": 0.8448491398101149, "learning_rate": 3.9842381786339755e-06, "loss": 0.8127, "step": 273 }, { "epoch": 0.00799976643017722, "grad_norm": 0.7613843280698162, "learning_rate": 3.9988324576765905e-06, "loss": 0.7673, "step": 274 }, { "epoch": 0.008028962658024583, "grad_norm": 0.9705915460359273, "learning_rate": 4.013426736719206e-06, "loss": 0.8814, "step": 275 }, { "epoch": 0.008058158885871945, "grad_norm": 1.005441844636053, "learning_rate": 4.028021015761821e-06, "loss": 0.9111, "step": 276 }, { "epoch": 0.008087355113719307, "grad_norm": 1.0183343355293049, "learning_rate": 4.042615294804436e-06, "loss": 0.898, "step": 277 }, { "epoch": 0.00811655134156667, "grad_norm": 0.9597885015998168, "learning_rate": 4.057209573847052e-06, "loss": 0.8369, "step": 278 }, { "epoch": 0.008145747569414032, "grad_norm": 1.3530582208300928, "learning_rate": 4.071803852889668e-06, "loss": 0.9209, "step": 279 }, { "epoch": 0.008174943797261395, "grad_norm": 1.040152055153395, "learning_rate": 4.086398131932283e-06, "loss": 0.8555, "step": 280 }, { "epoch": 0.008204140025108756, "grad_norm": 1.133723858062518, "learning_rate": 4.100992410974898e-06, "loss": 0.9099, "step": 281 }, { "epoch": 0.008233336252956119, "grad_norm": 0.937035591499149, "learning_rate": 4.115586690017513e-06, "loss": 0.9744, "step": 282 }, { "epoch": 0.00826253248080348, "grad_norm": 0.9249435764222048, "learning_rate": 4.130180969060129e-06, "loss": 1.0107, "step": 283 }, { "epoch": 0.008291728708650843, "grad_norm": 0.8924789472948333, "learning_rate": 4.144775248102744e-06, "loss": 0.7928, "step": 284 }, { "epoch": 0.008320924936498204, "grad_norm": 0.8520882043834624, "learning_rate": 4.159369527145359e-06, "loss": 0.903, "step": 285 }, { "epoch": 0.008350121164345567, "grad_norm": 0.8794079120538619, "learning_rate": 4.173963806187974e-06, "loss": 0.7426, "step": 286 }, { "epoch": 0.008379317392192928, "grad_norm": 0.8524427354322152, "learning_rate": 4.18855808523059e-06, "loss": 0.7805, "step": 287 }, { "epoch": 0.008408513620040291, "grad_norm": 1.0049585054550045, "learning_rate": 4.203152364273205e-06, "loss": 0.8942, "step": 288 }, { "epoch": 0.008437709847887652, "grad_norm": 1.2283618044274653, "learning_rate": 4.217746643315821e-06, "loss": 0.8538, "step": 289 }, { "epoch": 0.008466906075735015, "grad_norm": 0.8348375659707795, "learning_rate": 4.232340922358436e-06, "loss": 0.8945, "step": 290 }, { "epoch": 0.008496102303582376, "grad_norm": 0.8459516651686793, "learning_rate": 4.2469352014010515e-06, "loss": 0.7372, "step": 291 }, { "epoch": 0.00852529853142974, "grad_norm": 0.8891353962403521, "learning_rate": 4.2615294804436665e-06, "loss": 0.9086, "step": 292 }, { "epoch": 0.0085544947592771, "grad_norm": 0.8562549018712324, "learning_rate": 4.2761237594862815e-06, "loss": 0.8085, "step": 293 }, { "epoch": 0.008583690987124463, "grad_norm": 0.7418210812012043, "learning_rate": 4.2907180385288965e-06, "loss": 0.7266, "step": 294 }, { "epoch": 0.008612887214971826, "grad_norm": 0.9329815556469371, "learning_rate": 4.305312317571512e-06, "loss": 0.7885, "step": 295 }, { "epoch": 0.008642083442819188, "grad_norm": 0.9016695263954109, "learning_rate": 4.319906596614127e-06, "loss": 0.8653, "step": 296 }, { "epoch": 0.00867127967066655, "grad_norm": 0.8520458722163616, "learning_rate": 4.334500875656742e-06, "loss": 0.7835, "step": 297 }, { "epoch": 0.008700475898513912, "grad_norm": 0.8124011974404625, "learning_rate": 4.349095154699357e-06, "loss": 0.8258, "step": 298 }, { "epoch": 0.008729672126361275, "grad_norm": 0.8845126324479725, "learning_rate": 4.363689433741973e-06, "loss": 0.866, "step": 299 }, { "epoch": 0.008758868354208636, "grad_norm": 0.8621416688381851, "learning_rate": 4.378283712784588e-06, "loss": 0.8511, "step": 300 }, { "epoch": 0.008788064582055999, "grad_norm": 0.8695645987852437, "learning_rate": 4.392877991827204e-06, "loss": 0.9488, "step": 301 }, { "epoch": 0.00881726080990336, "grad_norm": 0.8051637131319952, "learning_rate": 4.407472270869819e-06, "loss": 0.8277, "step": 302 }, { "epoch": 0.008846457037750723, "grad_norm": 0.9982016569462713, "learning_rate": 4.422066549912435e-06, "loss": 0.976, "step": 303 }, { "epoch": 0.008875653265598084, "grad_norm": 0.8767666393864342, "learning_rate": 4.43666082895505e-06, "loss": 0.9224, "step": 304 }, { "epoch": 0.008904849493445447, "grad_norm": 0.9919496745518591, "learning_rate": 4.451255107997665e-06, "loss": 0.898, "step": 305 }, { "epoch": 0.008934045721292808, "grad_norm": 0.9627078967810015, "learning_rate": 4.46584938704028e-06, "loss": 0.8755, "step": 306 }, { "epoch": 0.008963241949140171, "grad_norm": 0.8751701062768387, "learning_rate": 4.480443666082896e-06, "loss": 0.8962, "step": 307 }, { "epoch": 0.008992438176987532, "grad_norm": 0.8693342674781225, "learning_rate": 4.495037945125511e-06, "loss": 0.8609, "step": 308 }, { "epoch": 0.009021634404834895, "grad_norm": 0.7475799749561598, "learning_rate": 4.509632224168126e-06, "loss": 0.7474, "step": 309 }, { "epoch": 0.009050830632682258, "grad_norm": 0.9908232263003793, "learning_rate": 4.524226503210741e-06, "loss": 0.8973, "step": 310 }, { "epoch": 0.00908002686052962, "grad_norm": 0.9296011280609134, "learning_rate": 4.538820782253357e-06, "loss": 0.8857, "step": 311 }, { "epoch": 0.009109223088376982, "grad_norm": 0.8760173001942239, "learning_rate": 4.553415061295973e-06, "loss": 0.7864, "step": 312 }, { "epoch": 0.009138419316224344, "grad_norm": 1.1417187156568478, "learning_rate": 4.568009340338588e-06, "loss": 0.9313, "step": 313 }, { "epoch": 0.009167615544071707, "grad_norm": 0.8802681277145241, "learning_rate": 4.582603619381203e-06, "loss": 0.775, "step": 314 }, { "epoch": 0.009196811771919068, "grad_norm": 0.8442180068038305, "learning_rate": 4.5971978984238184e-06, "loss": 0.9059, "step": 315 }, { "epoch": 0.00922600799976643, "grad_norm": 0.7711375074648811, "learning_rate": 4.6117921774664335e-06, "loss": 0.7851, "step": 316 }, { "epoch": 0.009255204227613792, "grad_norm": 0.8151871099162944, "learning_rate": 4.6263864565090485e-06, "loss": 0.7979, "step": 317 }, { "epoch": 0.009284400455461155, "grad_norm": 0.8271539920166249, "learning_rate": 4.6409807355516635e-06, "loss": 0.8284, "step": 318 }, { "epoch": 0.009313596683308516, "grad_norm": 0.8241292525338043, "learning_rate": 4.655575014594279e-06, "loss": 0.8953, "step": 319 }, { "epoch": 0.009342792911155879, "grad_norm": 0.8682488908155988, "learning_rate": 4.670169293636894e-06, "loss": 0.8648, "step": 320 }, { "epoch": 0.00937198913900324, "grad_norm": 0.7849300368140598, "learning_rate": 4.684763572679509e-06, "loss": 0.7512, "step": 321 }, { "epoch": 0.009401185366850603, "grad_norm": 0.9948000013653082, "learning_rate": 4.699357851722125e-06, "loss": 0.8021, "step": 322 }, { "epoch": 0.009430381594697964, "grad_norm": 0.9278946204811426, "learning_rate": 4.713952130764741e-06, "loss": 0.8399, "step": 323 }, { "epoch": 0.009459577822545327, "grad_norm": 0.8222212246840468, "learning_rate": 4.728546409807356e-06, "loss": 0.7333, "step": 324 }, { "epoch": 0.009488774050392688, "grad_norm": 0.8229603299132847, "learning_rate": 4.743140688849971e-06, "loss": 0.875, "step": 325 }, { "epoch": 0.009517970278240051, "grad_norm": 0.7702492830856307, "learning_rate": 4.757734967892586e-06, "loss": 0.7172, "step": 326 }, { "epoch": 0.009547166506087414, "grad_norm": 0.8951470343972229, "learning_rate": 4.772329246935202e-06, "loss": 0.8455, "step": 327 }, { "epoch": 0.009576362733934776, "grad_norm": 0.8403995514351357, "learning_rate": 4.786923525977817e-06, "loss": 0.8601, "step": 328 }, { "epoch": 0.009605558961782138, "grad_norm": 0.9265564745317373, "learning_rate": 4.801517805020432e-06, "loss": 0.8625, "step": 329 }, { "epoch": 0.0096347551896295, "grad_norm": 0.9522130405793988, "learning_rate": 4.816112084063047e-06, "loss": 0.8234, "step": 330 }, { "epoch": 0.009663951417476863, "grad_norm": 0.8200368220125359, "learning_rate": 4.830706363105663e-06, "loss": 0.7204, "step": 331 }, { "epoch": 0.009693147645324224, "grad_norm": 0.9405948313129032, "learning_rate": 4.845300642148278e-06, "loss": 0.8786, "step": 332 }, { "epoch": 0.009722343873171587, "grad_norm": 0.7960566390449086, "learning_rate": 4.859894921190894e-06, "loss": 0.8057, "step": 333 }, { "epoch": 0.009751540101018948, "grad_norm": 1.0532493289169278, "learning_rate": 4.874489200233509e-06, "loss": 0.9121, "step": 334 }, { "epoch": 0.00978073632886631, "grad_norm": 0.8544318532670346, "learning_rate": 4.8890834792761245e-06, "loss": 0.8205, "step": 335 }, { "epoch": 0.009809932556713672, "grad_norm": 0.8618507065972612, "learning_rate": 4.9036777583187395e-06, "loss": 0.773, "step": 336 }, { "epoch": 0.009839128784561035, "grad_norm": 0.9235823357445955, "learning_rate": 4.9182720373613545e-06, "loss": 0.7848, "step": 337 }, { "epoch": 0.009868325012408396, "grad_norm": 0.7819873694567842, "learning_rate": 4.9328663164039695e-06, "loss": 0.8385, "step": 338 }, { "epoch": 0.00989752124025576, "grad_norm": 0.8506989616019827, "learning_rate": 4.947460595446585e-06, "loss": 0.8952, "step": 339 }, { "epoch": 0.00992671746810312, "grad_norm": 0.8595941836254404, "learning_rate": 4.9620548744892e-06, "loss": 0.8409, "step": 340 }, { "epoch": 0.009955913695950483, "grad_norm": 0.8729758741046137, "learning_rate": 4.976649153531815e-06, "loss": 0.8611, "step": 341 }, { "epoch": 0.009985109923797844, "grad_norm": 0.7696712902367729, "learning_rate": 4.99124343257443e-06, "loss": 0.7634, "step": 342 }, { "epoch": 0.010014306151645207, "grad_norm": 0.7959924811519636, "learning_rate": 5.005837711617046e-06, "loss": 0.7658, "step": 343 }, { "epoch": 0.01004350237949257, "grad_norm": 0.8551381621395677, "learning_rate": 5.020431990659662e-06, "loss": 0.8075, "step": 344 }, { "epoch": 0.010072698607339932, "grad_norm": 0.8742367837242363, "learning_rate": 5.035026269702277e-06, "loss": 0.8116, "step": 345 }, { "epoch": 0.010101894835187294, "grad_norm": 0.7801545805412426, "learning_rate": 5.049620548744892e-06, "loss": 0.671, "step": 346 }, { "epoch": 0.010131091063034656, "grad_norm": 0.8342475177618855, "learning_rate": 5.064214827787508e-06, "loss": 0.9013, "step": 347 }, { "epoch": 0.010160287290882019, "grad_norm": 0.8544265195962304, "learning_rate": 5.078809106830123e-06, "loss": 0.8366, "step": 348 }, { "epoch": 0.01018948351872938, "grad_norm": 0.8150702198148964, "learning_rate": 5.093403385872738e-06, "loss": 0.789, "step": 349 }, { "epoch": 0.010218679746576743, "grad_norm": 0.8827275198649956, "learning_rate": 5.107997664915353e-06, "loss": 0.8767, "step": 350 }, { "epoch": 0.010247875974424104, "grad_norm": 0.9586341952737144, "learning_rate": 5.122591943957969e-06, "loss": 0.9065, "step": 351 }, { "epoch": 0.010277072202271467, "grad_norm": 0.932830511802231, "learning_rate": 5.137186223000584e-06, "loss": 0.8422, "step": 352 }, { "epoch": 0.010306268430118828, "grad_norm": 0.9470685437275449, "learning_rate": 5.151780502043199e-06, "loss": 0.979, "step": 353 }, { "epoch": 0.010335464657966191, "grad_norm": 0.9450887955986954, "learning_rate": 5.166374781085815e-06, "loss": 0.8564, "step": 354 }, { "epoch": 0.010364660885813552, "grad_norm": 1.0972776145229306, "learning_rate": 5.18096906012843e-06, "loss": 0.835, "step": 355 }, { "epoch": 0.010393857113660915, "grad_norm": 0.9071731927593025, "learning_rate": 5.1955633391710456e-06, "loss": 0.8357, "step": 356 }, { "epoch": 0.010423053341508276, "grad_norm": 0.7977116943202984, "learning_rate": 5.2101576182136606e-06, "loss": 0.7677, "step": 357 }, { "epoch": 0.01045224956935564, "grad_norm": 0.8597039173144809, "learning_rate": 5.2247518972562756e-06, "loss": 0.8374, "step": 358 }, { "epoch": 0.010481445797203002, "grad_norm": 1.0472336085125764, "learning_rate": 5.2393461762988914e-06, "loss": 0.857, "step": 359 }, { "epoch": 0.010510642025050363, "grad_norm": 0.8506193980945815, "learning_rate": 5.2539404553415064e-06, "loss": 0.7335, "step": 360 }, { "epoch": 0.010539838252897726, "grad_norm": 0.8119918031507282, "learning_rate": 5.2685347343841214e-06, "loss": 0.8181, "step": 361 }, { "epoch": 0.010569034480745088, "grad_norm": 0.8412134059022202, "learning_rate": 5.2831290134267364e-06, "loss": 0.8974, "step": 362 }, { "epoch": 0.01059823070859245, "grad_norm": 0.94882681741717, "learning_rate": 5.297723292469352e-06, "loss": 0.9456, "step": 363 }, { "epoch": 0.010627426936439812, "grad_norm": 0.81244111140329, "learning_rate": 5.312317571511967e-06, "loss": 0.8885, "step": 364 }, { "epoch": 0.010656623164287175, "grad_norm": 0.7442483286417065, "learning_rate": 5.326911850554582e-06, "loss": 0.6741, "step": 365 }, { "epoch": 0.010685819392134536, "grad_norm": 0.9060995603825164, "learning_rate": 5.341506129597198e-06, "loss": 0.8419, "step": 366 }, { "epoch": 0.010715015619981899, "grad_norm": 0.7866970548832669, "learning_rate": 5.356100408639814e-06, "loss": 0.6393, "step": 367 }, { "epoch": 0.01074421184782926, "grad_norm": 0.9479700544978581, "learning_rate": 5.370694687682429e-06, "loss": 0.8585, "step": 368 }, { "epoch": 0.010773408075676623, "grad_norm": 0.8419248937087273, "learning_rate": 5.385288966725044e-06, "loss": 0.8141, "step": 369 }, { "epoch": 0.010802604303523984, "grad_norm": 0.8371097441422383, "learning_rate": 5.399883245767659e-06, "loss": 0.676, "step": 370 }, { "epoch": 0.010831800531371347, "grad_norm": 0.8671311683604614, "learning_rate": 5.414477524810275e-06, "loss": 0.7679, "step": 371 }, { "epoch": 0.010860996759218708, "grad_norm": 0.8450413027266028, "learning_rate": 5.42907180385289e-06, "loss": 0.8445, "step": 372 }, { "epoch": 0.010890192987066071, "grad_norm": 0.835535395696544, "learning_rate": 5.443666082895505e-06, "loss": 0.7258, "step": 373 }, { "epoch": 0.010919389214913432, "grad_norm": 0.7984734037782524, "learning_rate": 5.45826036193812e-06, "loss": 0.8449, "step": 374 }, { "epoch": 0.010948585442760795, "grad_norm": 0.8457203783935601, "learning_rate": 5.472854640980736e-06, "loss": 0.8652, "step": 375 }, { "epoch": 0.010977781670608158, "grad_norm": 0.8657915446774359, "learning_rate": 5.487448920023351e-06, "loss": 0.9308, "step": 376 }, { "epoch": 0.01100697789845552, "grad_norm": 1.113419030077556, "learning_rate": 5.502043199065967e-06, "loss": 0.8896, "step": 377 }, { "epoch": 0.011036174126302882, "grad_norm": 0.8317580193624765, "learning_rate": 5.516637478108582e-06, "loss": 0.8242, "step": 378 }, { "epoch": 0.011065370354150244, "grad_norm": 0.9222029590718266, "learning_rate": 5.5312317571511975e-06, "loss": 0.7923, "step": 379 }, { "epoch": 0.011094566581997607, "grad_norm": 0.8468179364082461, "learning_rate": 5.5458260361938125e-06, "loss": 0.7423, "step": 380 }, { "epoch": 0.011123762809844968, "grad_norm": 1.6862432830045258, "learning_rate": 5.5604203152364275e-06, "loss": 0.8773, "step": 381 }, { "epoch": 0.01115295903769233, "grad_norm": 0.8215270587861323, "learning_rate": 5.5750145942790425e-06, "loss": 0.7012, "step": 382 }, { "epoch": 0.011182155265539692, "grad_norm": 3.0606443020711995, "learning_rate": 5.589608873321658e-06, "loss": 0.946, "step": 383 }, { "epoch": 0.011211351493387055, "grad_norm": 0.8171345225259148, "learning_rate": 5.604203152364273e-06, "loss": 0.8155, "step": 384 }, { "epoch": 0.011240547721234416, "grad_norm": 0.9805564444478623, "learning_rate": 5.618797431406888e-06, "loss": 0.8459, "step": 385 }, { "epoch": 0.011269743949081779, "grad_norm": 0.8530001581612188, "learning_rate": 5.633391710449503e-06, "loss": 0.7339, "step": 386 }, { "epoch": 0.01129894017692914, "grad_norm": 0.9432393896610005, "learning_rate": 5.647985989492119e-06, "loss": 0.8374, "step": 387 }, { "epoch": 0.011328136404776503, "grad_norm": 0.82485975604837, "learning_rate": 5.662580268534735e-06, "loss": 0.7929, "step": 388 }, { "epoch": 0.011357332632623864, "grad_norm": 0.8297583052954717, "learning_rate": 5.67717454757735e-06, "loss": 0.6877, "step": 389 }, { "epoch": 0.011386528860471227, "grad_norm": 0.9327348854604454, "learning_rate": 5.691768826619965e-06, "loss": 0.8386, "step": 390 }, { "epoch": 0.011415725088318588, "grad_norm": 0.7958490476124837, "learning_rate": 5.706363105662581e-06, "loss": 0.7919, "step": 391 }, { "epoch": 0.011444921316165951, "grad_norm": 0.7876677196307481, "learning_rate": 5.720957384705196e-06, "loss": 0.7531, "step": 392 }, { "epoch": 0.011474117544013314, "grad_norm": 1.0523391035163547, "learning_rate": 5.735551663747811e-06, "loss": 0.8339, "step": 393 }, { "epoch": 0.011503313771860675, "grad_norm": 0.9590844818308515, "learning_rate": 5.750145942790426e-06, "loss": 0.9062, "step": 394 }, { "epoch": 0.011532509999708038, "grad_norm": 0.9224763785750923, "learning_rate": 5.764740221833042e-06, "loss": 0.846, "step": 395 }, { "epoch": 0.0115617062275554, "grad_norm": 0.8435972585077722, "learning_rate": 5.779334500875657e-06, "loss": 0.7917, "step": 396 }, { "epoch": 0.011590902455402763, "grad_norm": 0.7707251428092794, "learning_rate": 5.793928779918272e-06, "loss": 0.7353, "step": 397 }, { "epoch": 0.011620098683250124, "grad_norm": 0.8301552913902144, "learning_rate": 5.808523058960888e-06, "loss": 0.8297, "step": 398 }, { "epoch": 0.011649294911097487, "grad_norm": 0.874730025190961, "learning_rate": 5.8231173380035035e-06, "loss": 0.8906, "step": 399 }, { "epoch": 0.011678491138944848, "grad_norm": 0.9039778535226379, "learning_rate": 5.8377116170461185e-06, "loss": 0.8048, "step": 400 }, { "epoch": 0.01170768736679221, "grad_norm": 0.9271539253726137, "learning_rate": 5.8523058960887335e-06, "loss": 0.7501, "step": 401 }, { "epoch": 0.011736883594639572, "grad_norm": 0.8916685230911435, "learning_rate": 5.8669001751313486e-06, "loss": 0.8452, "step": 402 }, { "epoch": 0.011766079822486935, "grad_norm": 0.8332589918662168, "learning_rate": 5.881494454173964e-06, "loss": 0.8035, "step": 403 }, { "epoch": 0.011795276050334296, "grad_norm": 0.7687184627045739, "learning_rate": 5.896088733216579e-06, "loss": 0.7008, "step": 404 }, { "epoch": 0.011824472278181659, "grad_norm": 0.8631584375030844, "learning_rate": 5.910683012259194e-06, "loss": 0.758, "step": 405 }, { "epoch": 0.01185366850602902, "grad_norm": 1.094510325252462, "learning_rate": 5.925277291301809e-06, "loss": 0.8587, "step": 406 }, { "epoch": 0.011882864733876383, "grad_norm": 0.9699873653850687, "learning_rate": 5.939871570344425e-06, "loss": 0.8495, "step": 407 }, { "epoch": 0.011912060961723746, "grad_norm": 0.7618900582231616, "learning_rate": 5.95446584938704e-06, "loss": 0.7027, "step": 408 }, { "epoch": 0.011941257189571107, "grad_norm": 0.897954459693387, "learning_rate": 5.969060128429656e-06, "loss": 0.9065, "step": 409 }, { "epoch": 0.01197045341741847, "grad_norm": 1.7547966802711898, "learning_rate": 5.983654407472271e-06, "loss": 0.8904, "step": 410 }, { "epoch": 0.011999649645265831, "grad_norm": 0.9179981213431035, "learning_rate": 5.998248686514887e-06, "loss": 0.9318, "step": 411 }, { "epoch": 0.012028845873113194, "grad_norm": 0.8111117547372665, "learning_rate": 6.012842965557502e-06, "loss": 0.78, "step": 412 }, { "epoch": 0.012058042100960556, "grad_norm": 0.7940367318348774, "learning_rate": 6.027437244600117e-06, "loss": 0.7438, "step": 413 }, { "epoch": 0.012087238328807919, "grad_norm": 0.865124175175135, "learning_rate": 6.042031523642732e-06, "loss": 0.8148, "step": 414 }, { "epoch": 0.01211643455665528, "grad_norm": 1.0164095014984953, "learning_rate": 6.056625802685348e-06, "loss": 0.8414, "step": 415 }, { "epoch": 0.012145630784502643, "grad_norm": 0.9135812785498206, "learning_rate": 6.071220081727963e-06, "loss": 0.8341, "step": 416 }, { "epoch": 0.012174827012350004, "grad_norm": 0.872303611682688, "learning_rate": 6.085814360770578e-06, "loss": 0.8043, "step": 417 }, { "epoch": 0.012204023240197367, "grad_norm": 0.7872520326860549, "learning_rate": 6.100408639813193e-06, "loss": 0.725, "step": 418 }, { "epoch": 0.012233219468044728, "grad_norm": 0.879147715470626, "learning_rate": 6.115002918855809e-06, "loss": 0.8703, "step": 419 }, { "epoch": 0.012262415695892091, "grad_norm": 0.8384438392727703, "learning_rate": 6.129597197898424e-06, "loss": 0.9258, "step": 420 }, { "epoch": 0.012291611923739452, "grad_norm": 1.0398666599194688, "learning_rate": 6.14419147694104e-06, "loss": 0.7912, "step": 421 }, { "epoch": 0.012320808151586815, "grad_norm": 0.8582496453316779, "learning_rate": 6.158785755983655e-06, "loss": 0.7406, "step": 422 }, { "epoch": 0.012350004379434176, "grad_norm": 0.8360971481791312, "learning_rate": 6.1733800350262705e-06, "loss": 0.7933, "step": 423 }, { "epoch": 0.01237920060728154, "grad_norm": 0.815061246270182, "learning_rate": 6.1879743140688855e-06, "loss": 0.7238, "step": 424 }, { "epoch": 0.012408396835128902, "grad_norm": 0.8288475560197849, "learning_rate": 6.2025685931115005e-06, "loss": 0.8204, "step": 425 }, { "epoch": 0.012437593062976263, "grad_norm": 0.7310517177544211, "learning_rate": 6.2171628721541155e-06, "loss": 0.6929, "step": 426 }, { "epoch": 0.012466789290823626, "grad_norm": 0.7880538473120846, "learning_rate": 6.231757151196731e-06, "loss": 0.7366, "step": 427 }, { "epoch": 0.012495985518670987, "grad_norm": 0.792415849498219, "learning_rate": 6.246351430239346e-06, "loss": 0.796, "step": 428 }, { "epoch": 0.01252518174651835, "grad_norm": 0.9415506678904334, "learning_rate": 6.260945709281962e-06, "loss": 0.8326, "step": 429 }, { "epoch": 0.012554377974365712, "grad_norm": 1.084048181001426, "learning_rate": 6.275539988324577e-06, "loss": 0.7521, "step": 430 }, { "epoch": 0.012583574202213075, "grad_norm": 0.9040567210530912, "learning_rate": 6.290134267367192e-06, "loss": 0.9387, "step": 431 }, { "epoch": 0.012612770430060436, "grad_norm": 0.7593937392988083, "learning_rate": 6.304728546409808e-06, "loss": 0.7267, "step": 432 }, { "epoch": 0.012641966657907799, "grad_norm": 0.826358808250186, "learning_rate": 6.319322825452423e-06, "loss": 0.7755, "step": 433 }, { "epoch": 0.01267116288575516, "grad_norm": 0.8530350097961062, "learning_rate": 6.333917104495038e-06, "loss": 0.7661, "step": 434 }, { "epoch": 0.012700359113602523, "grad_norm": 0.8127776161380069, "learning_rate": 6.348511383537653e-06, "loss": 0.7726, "step": 435 }, { "epoch": 0.012729555341449884, "grad_norm": 0.758782202151483, "learning_rate": 6.363105662580268e-06, "loss": 0.677, "step": 436 }, { "epoch": 0.012758751569297247, "grad_norm": 0.750177033349745, "learning_rate": 6.377699941622885e-06, "loss": 0.6869, "step": 437 }, { "epoch": 0.012787947797144608, "grad_norm": 0.7567978185500358, "learning_rate": 6.3922942206655e-06, "loss": 0.7409, "step": 438 }, { "epoch": 0.012817144024991971, "grad_norm": 1.0662919510395434, "learning_rate": 6.406888499708115e-06, "loss": 0.8468, "step": 439 }, { "epoch": 0.012846340252839332, "grad_norm": 0.8787532642338, "learning_rate": 6.42148277875073e-06, "loss": 0.7305, "step": 440 }, { "epoch": 0.012875536480686695, "grad_norm": 0.8180878819503029, "learning_rate": 6.436077057793345e-06, "loss": 0.7175, "step": 441 }, { "epoch": 0.012904732708534058, "grad_norm": 0.8044017090854653, "learning_rate": 6.450671336835961e-06, "loss": 0.8468, "step": 442 }, { "epoch": 0.01293392893638142, "grad_norm": 0.992351861309411, "learning_rate": 6.465265615878576e-06, "loss": 0.8639, "step": 443 }, { "epoch": 0.012963125164228782, "grad_norm": 0.8261405087878558, "learning_rate": 6.479859894921191e-06, "loss": 0.7046, "step": 444 }, { "epoch": 0.012992321392076144, "grad_norm": 0.9665793480477742, "learning_rate": 6.494454173963807e-06, "loss": 0.9306, "step": 445 }, { "epoch": 0.013021517619923506, "grad_norm": 0.9430511336770484, "learning_rate": 6.509048453006422e-06, "loss": 0.8595, "step": 446 }, { "epoch": 0.013050713847770868, "grad_norm": 0.8359964595282096, "learning_rate": 6.523642732049037e-06, "loss": 0.7047, "step": 447 }, { "epoch": 0.01307991007561823, "grad_norm": 0.8153160577303811, "learning_rate": 6.538237011091652e-06, "loss": 0.8065, "step": 448 }, { "epoch": 0.013109106303465592, "grad_norm": 0.8636363844095788, "learning_rate": 6.552831290134267e-06, "loss": 0.792, "step": 449 }, { "epoch": 0.013138302531312955, "grad_norm": 0.8424603229524844, "learning_rate": 6.567425569176882e-06, "loss": 0.8581, "step": 450 }, { "epoch": 0.013167498759160316, "grad_norm": 0.8098183114554652, "learning_rate": 6.582019848219497e-06, "loss": 0.7416, "step": 451 }, { "epoch": 0.013196694987007679, "grad_norm": 0.8364705627763533, "learning_rate": 6.596614127262113e-06, "loss": 0.732, "step": 452 }, { "epoch": 0.01322589121485504, "grad_norm": 0.8010152473964575, "learning_rate": 6.611208406304729e-06, "loss": 0.8476, "step": 453 }, { "epoch": 0.013255087442702403, "grad_norm": 0.8381215682297567, "learning_rate": 6.625802685347345e-06, "loss": 0.8356, "step": 454 }, { "epoch": 0.013284283670549764, "grad_norm": 0.7776181844150487, "learning_rate": 6.64039696438996e-06, "loss": 0.7863, "step": 455 }, { "epoch": 0.013313479898397127, "grad_norm": 0.8604912844962745, "learning_rate": 6.654991243432575e-06, "loss": 0.8088, "step": 456 }, { "epoch": 0.01334267612624449, "grad_norm": 0.9303748964554791, "learning_rate": 6.66958552247519e-06, "loss": 0.9499, "step": 457 }, { "epoch": 0.013371872354091851, "grad_norm": 0.8951252575050709, "learning_rate": 6.684179801517805e-06, "loss": 0.7814, "step": 458 }, { "epoch": 0.013401068581939214, "grad_norm": 1.0818322414356365, "learning_rate": 6.69877408056042e-06, "loss": 0.7678, "step": 459 }, { "epoch": 0.013430264809786575, "grad_norm": 0.9019739637710597, "learning_rate": 6.713368359603035e-06, "loss": 0.8767, "step": 460 }, { "epoch": 0.013459461037633938, "grad_norm": 0.9785539101292242, "learning_rate": 6.727962638645652e-06, "loss": 0.8233, "step": 461 }, { "epoch": 0.0134886572654813, "grad_norm": 0.7756969467002984, "learning_rate": 6.742556917688267e-06, "loss": 0.7668, "step": 462 }, { "epoch": 0.013517853493328662, "grad_norm": 1.9872809448761488, "learning_rate": 6.757151196730882e-06, "loss": 0.8691, "step": 463 }, { "epoch": 0.013547049721176024, "grad_norm": 0.8309239372460531, "learning_rate": 6.7717454757734976e-06, "loss": 0.7729, "step": 464 }, { "epoch": 0.013576245949023387, "grad_norm": 0.7611667800375317, "learning_rate": 6.786339754816113e-06, "loss": 0.7399, "step": 465 }, { "epoch": 0.013605442176870748, "grad_norm": 0.8322272785535335, "learning_rate": 6.800934033858728e-06, "loss": 0.7522, "step": 466 }, { "epoch": 0.01363463840471811, "grad_norm": 0.8515381384304044, "learning_rate": 6.815528312901343e-06, "loss": 0.8214, "step": 467 }, { "epoch": 0.013663834632565472, "grad_norm": 0.901392663756073, "learning_rate": 6.830122591943958e-06, "loss": 0.8162, "step": 468 }, { "epoch": 0.013693030860412835, "grad_norm": 1.2028571952567253, "learning_rate": 6.844716870986574e-06, "loss": 0.9162, "step": 469 }, { "epoch": 0.013722227088260196, "grad_norm": 0.8038985006893379, "learning_rate": 6.859311150029189e-06, "loss": 0.8478, "step": 470 }, { "epoch": 0.013751423316107559, "grad_norm": 0.8141304925305717, "learning_rate": 6.873905429071804e-06, "loss": 0.7971, "step": 471 }, { "epoch": 0.01378061954395492, "grad_norm": 0.9995525280527173, "learning_rate": 6.888499708114419e-06, "loss": 0.7311, "step": 472 }, { "epoch": 0.013809815771802283, "grad_norm": 0.8109972793727701, "learning_rate": 6.903093987157034e-06, "loss": 0.8174, "step": 473 }, { "epoch": 0.013839011999649646, "grad_norm": 0.9013136705285663, "learning_rate": 6.91768826619965e-06, "loss": 0.8788, "step": 474 }, { "epoch": 0.013868208227497007, "grad_norm": 1.468479306672836, "learning_rate": 6.932282545242265e-06, "loss": 0.7654, "step": 475 }, { "epoch": 0.01389740445534437, "grad_norm": 0.8519960355507723, "learning_rate": 6.94687682428488e-06, "loss": 0.8461, "step": 476 }, { "epoch": 0.013926600683191731, "grad_norm": 0.8100136350791158, "learning_rate": 6.961471103327497e-06, "loss": 0.7994, "step": 477 }, { "epoch": 0.013955796911039094, "grad_norm": 0.8731430165371165, "learning_rate": 6.976065382370112e-06, "loss": 0.8857, "step": 478 }, { "epoch": 0.013984993138886456, "grad_norm": 0.8944819644301827, "learning_rate": 6.990659661412727e-06, "loss": 0.7609, "step": 479 }, { "epoch": 0.014014189366733818, "grad_norm": 0.7677006250337868, "learning_rate": 7.005253940455342e-06, "loss": 0.7351, "step": 480 }, { "epoch": 0.01404338559458118, "grad_norm": 0.8421818816184733, "learning_rate": 7.019848219497957e-06, "loss": 0.769, "step": 481 }, { "epoch": 0.014072581822428543, "grad_norm": 1.0051340220737457, "learning_rate": 7.034442498540572e-06, "loss": 0.7055, "step": 482 }, { "epoch": 0.014101778050275904, "grad_norm": 0.8470067712531345, "learning_rate": 7.049036777583187e-06, "loss": 0.8329, "step": 483 }, { "epoch": 0.014130974278123267, "grad_norm": 0.9258700774971966, "learning_rate": 7.063631056625803e-06, "loss": 0.8311, "step": 484 }, { "epoch": 0.014160170505970628, "grad_norm": 0.9109676652608394, "learning_rate": 7.078225335668419e-06, "loss": 0.7729, "step": 485 }, { "epoch": 0.014189366733817991, "grad_norm": 0.7955194930100931, "learning_rate": 7.092819614711034e-06, "loss": 0.751, "step": 486 }, { "epoch": 0.014218562961665352, "grad_norm": 0.8836144563050098, "learning_rate": 7.1074138937536495e-06, "loss": 0.7964, "step": 487 }, { "epoch": 0.014247759189512715, "grad_norm": 0.8325314699674105, "learning_rate": 7.1220081727962645e-06, "loss": 0.7439, "step": 488 }, { "epoch": 0.014276955417360076, "grad_norm": 0.8063035598160699, "learning_rate": 7.1366024518388795e-06, "loss": 0.7998, "step": 489 }, { "epoch": 0.01430615164520744, "grad_norm": 0.7652775556727329, "learning_rate": 7.1511967308814945e-06, "loss": 0.7473, "step": 490 }, { "epoch": 0.014335347873054802, "grad_norm": 1.1515978305506078, "learning_rate": 7.1657910099241095e-06, "loss": 0.841, "step": 491 }, { "epoch": 0.014364544100902163, "grad_norm": 0.9251750239217053, "learning_rate": 7.1803852889667245e-06, "loss": 0.8369, "step": 492 }, { "epoch": 0.014393740328749526, "grad_norm": 0.8505644391498247, "learning_rate": 7.194979568009341e-06, "loss": 0.8322, "step": 493 }, { "epoch": 0.014422936556596887, "grad_norm": 1.1688356918597533, "learning_rate": 7.209573847051956e-06, "loss": 0.8479, "step": 494 }, { "epoch": 0.01445213278444425, "grad_norm": 0.8030650947952506, "learning_rate": 7.224168126094571e-06, "loss": 0.8417, "step": 495 }, { "epoch": 0.014481329012291612, "grad_norm": 0.8297250509219839, "learning_rate": 7.238762405137186e-06, "loss": 0.8221, "step": 496 }, { "epoch": 0.014510525240138974, "grad_norm": 0.8610953964168558, "learning_rate": 7.253356684179802e-06, "loss": 0.7015, "step": 497 }, { "epoch": 0.014539721467986336, "grad_norm": 1.0041836067504382, "learning_rate": 7.267950963222417e-06, "loss": 0.7862, "step": 498 }, { "epoch": 0.014568917695833699, "grad_norm": 0.863782061608813, "learning_rate": 7.282545242265032e-06, "loss": 0.8316, "step": 499 }, { "epoch": 0.01459811392368106, "grad_norm": 0.8143696953071274, "learning_rate": 7.297139521307647e-06, "loss": 0.7565, "step": 500 }, { "epoch": 0.014627310151528423, "grad_norm": 0.8541899408315129, "learning_rate": 7.311733800350264e-06, "loss": 0.9181, "step": 501 }, { "epoch": 0.014656506379375784, "grad_norm": 0.8028196154473604, "learning_rate": 7.326328079392879e-06, "loss": 0.7993, "step": 502 }, { "epoch": 0.014685702607223147, "grad_norm": 1.0844134592187196, "learning_rate": 7.340922358435494e-06, "loss": 0.8664, "step": 503 }, { "epoch": 0.014714898835070508, "grad_norm": 0.8374108819592305, "learning_rate": 7.355516637478109e-06, "loss": 0.8243, "step": 504 }, { "epoch": 0.014744095062917871, "grad_norm": 0.9044570581212728, "learning_rate": 7.370110916520724e-06, "loss": 0.8884, "step": 505 }, { "epoch": 0.014773291290765234, "grad_norm": 0.8387701485043484, "learning_rate": 7.384705195563339e-06, "loss": 0.7442, "step": 506 }, { "epoch": 0.014802487518612595, "grad_norm": 0.9347005717056573, "learning_rate": 7.399299474605955e-06, "loss": 0.8019, "step": 507 }, { "epoch": 0.014831683746459958, "grad_norm": 0.8485305430696336, "learning_rate": 7.41389375364857e-06, "loss": 0.79, "step": 508 }, { "epoch": 0.01486087997430732, "grad_norm": 0.8303223234562442, "learning_rate": 7.428488032691186e-06, "loss": 0.845, "step": 509 }, { "epoch": 0.014890076202154682, "grad_norm": 0.8257029725787356, "learning_rate": 7.443082311733801e-06, "loss": 0.7992, "step": 510 }, { "epoch": 0.014919272430002043, "grad_norm": 0.7928559107277576, "learning_rate": 7.457676590776416e-06, "loss": 0.7278, "step": 511 }, { "epoch": 0.014948468657849406, "grad_norm": 0.8029475678639966, "learning_rate": 7.4722708698190314e-06, "loss": 0.7851, "step": 512 }, { "epoch": 0.014977664885696768, "grad_norm": 0.8409701444680587, "learning_rate": 7.4868651488616464e-06, "loss": 0.8152, "step": 513 }, { "epoch": 0.01500686111354413, "grad_norm": 0.9624923478092925, "learning_rate": 7.5014594279042614e-06, "loss": 0.796, "step": 514 }, { "epoch": 0.015036057341391492, "grad_norm": 1.0844737625041045, "learning_rate": 7.5160537069468764e-06, "loss": 0.8719, "step": 515 }, { "epoch": 0.015065253569238855, "grad_norm": 0.8322761777955473, "learning_rate": 7.5306479859894914e-06, "loss": 0.7238, "step": 516 }, { "epoch": 0.015094449797086216, "grad_norm": 0.7970678990378725, "learning_rate": 7.545242265032108e-06, "loss": 0.8054, "step": 517 }, { "epoch": 0.015123646024933579, "grad_norm": 0.7953523900782083, "learning_rate": 7.559836544074723e-06, "loss": 0.7818, "step": 518 }, { "epoch": 0.01515284225278094, "grad_norm": 0.8816281245427665, "learning_rate": 7.574430823117339e-06, "loss": 0.888, "step": 519 }, { "epoch": 0.015182038480628303, "grad_norm": 0.8709835690565341, "learning_rate": 7.589025102159954e-06, "loss": 0.7688, "step": 520 }, { "epoch": 0.015211234708475664, "grad_norm": 1.1275540864266387, "learning_rate": 7.603619381202569e-06, "loss": 0.7565, "step": 521 }, { "epoch": 0.015240430936323027, "grad_norm": 0.8716321234478718, "learning_rate": 7.618213660245184e-06, "loss": 0.7098, "step": 522 }, { "epoch": 0.01526962716417039, "grad_norm": 0.8790915341672368, "learning_rate": 7.6328079392878e-06, "loss": 0.8226, "step": 523 }, { "epoch": 0.015298823392017751, "grad_norm": 0.7855414758684478, "learning_rate": 7.647402218330414e-06, "loss": 0.7263, "step": 524 }, { "epoch": 0.015328019619865114, "grad_norm": 0.7838338599326566, "learning_rate": 7.66199649737303e-06, "loss": 0.7188, "step": 525 }, { "epoch": 0.015357215847712475, "grad_norm": 0.8857643658878442, "learning_rate": 7.676590776415646e-06, "loss": 0.7975, "step": 526 }, { "epoch": 0.015386412075559838, "grad_norm": 0.8326705790200943, "learning_rate": 7.691185055458262e-06, "loss": 0.8064, "step": 527 }, { "epoch": 0.0154156083034072, "grad_norm": 0.7697479156616429, "learning_rate": 7.705779334500876e-06, "loss": 0.7535, "step": 528 }, { "epoch": 0.015444804531254562, "grad_norm": 1.0214711741830378, "learning_rate": 7.720373613543492e-06, "loss": 0.8314, "step": 529 }, { "epoch": 0.015474000759101924, "grad_norm": 0.8278597577743888, "learning_rate": 7.734967892586106e-06, "loss": 0.7536, "step": 530 }, { "epoch": 0.015503196986949287, "grad_norm": 0.9756376523443447, "learning_rate": 7.749562171628722e-06, "loss": 0.7493, "step": 531 }, { "epoch": 0.015532393214796648, "grad_norm": 0.8829442732248437, "learning_rate": 7.764156450671336e-06, "loss": 0.9121, "step": 532 }, { "epoch": 0.01556158944264401, "grad_norm": 0.7590715558015682, "learning_rate": 7.778750729713953e-06, "loss": 0.7352, "step": 533 }, { "epoch": 0.015590785670491372, "grad_norm": 0.8991562984770718, "learning_rate": 7.793345008756567e-06, "loss": 0.8737, "step": 534 }, { "epoch": 0.015619981898338735, "grad_norm": 0.8700381614176058, "learning_rate": 7.807939287799183e-06, "loss": 0.8247, "step": 535 }, { "epoch": 0.015649178126186096, "grad_norm": 0.7737844733508855, "learning_rate": 7.8225335668418e-06, "loss": 0.7582, "step": 536 }, { "epoch": 0.015678374354033457, "grad_norm": 0.8159074788087892, "learning_rate": 7.837127845884413e-06, "loss": 0.8245, "step": 537 }, { "epoch": 0.015707570581880822, "grad_norm": 0.7976895373467107, "learning_rate": 7.85172212492703e-06, "loss": 0.7091, "step": 538 }, { "epoch": 0.015736766809728183, "grad_norm": 0.8081154039430078, "learning_rate": 7.866316403969643e-06, "loss": 0.7714, "step": 539 }, { "epoch": 0.015765963037575544, "grad_norm": 1.3397171563283843, "learning_rate": 7.88091068301226e-06, "loss": 0.8765, "step": 540 }, { "epoch": 0.01579515926542291, "grad_norm": 0.8115170188883838, "learning_rate": 7.895504962054875e-06, "loss": 0.753, "step": 541 }, { "epoch": 0.01582435549327027, "grad_norm": 0.7845290556844985, "learning_rate": 7.910099241097491e-06, "loss": 0.75, "step": 542 }, { "epoch": 0.01585355172111763, "grad_norm": 0.7666518605013929, "learning_rate": 7.924693520140105e-06, "loss": 0.6608, "step": 543 }, { "epoch": 0.015882747948964993, "grad_norm": 1.4406929553784336, "learning_rate": 7.939287799182721e-06, "loss": 0.7461, "step": 544 }, { "epoch": 0.015911944176812357, "grad_norm": 0.8018149130387848, "learning_rate": 7.953882078225335e-06, "loss": 0.8055, "step": 545 }, { "epoch": 0.01594114040465972, "grad_norm": 0.8575023057587424, "learning_rate": 7.968476357267951e-06, "loss": 0.813, "step": 546 }, { "epoch": 0.01597033663250708, "grad_norm": 0.8049922255239257, "learning_rate": 7.983070636310567e-06, "loss": 0.7666, "step": 547 }, { "epoch": 0.01599953286035444, "grad_norm": 0.859350719909196, "learning_rate": 7.997664915353181e-06, "loss": 0.833, "step": 548 }, { "epoch": 0.016028729088201805, "grad_norm": 0.8728658264312983, "learning_rate": 8.012259194395799e-06, "loss": 0.7626, "step": 549 }, { "epoch": 0.016057925316049167, "grad_norm": 0.8317502010580616, "learning_rate": 8.026853473438413e-06, "loss": 0.7764, "step": 550 }, { "epoch": 0.016087121543896528, "grad_norm": 0.7948008215016056, "learning_rate": 8.041447752481029e-06, "loss": 0.7611, "step": 551 }, { "epoch": 0.01611631777174389, "grad_norm": 0.8087659024435759, "learning_rate": 8.056042031523643e-06, "loss": 0.8046, "step": 552 }, { "epoch": 0.016145513999591254, "grad_norm": 0.809427057225677, "learning_rate": 8.070636310566259e-06, "loss": 0.7991, "step": 553 }, { "epoch": 0.016174710227438615, "grad_norm": 0.9313060577402515, "learning_rate": 8.085230589608873e-06, "loss": 0.7459, "step": 554 }, { "epoch": 0.016203906455285976, "grad_norm": 0.835382775953264, "learning_rate": 8.099824868651489e-06, "loss": 0.816, "step": 555 }, { "epoch": 0.01623310268313334, "grad_norm": 1.2982266874466284, "learning_rate": 8.114419147694104e-06, "loss": 0.8144, "step": 556 }, { "epoch": 0.016262298910980702, "grad_norm": 0.9296791208260067, "learning_rate": 8.12901342673672e-06, "loss": 0.9404, "step": 557 }, { "epoch": 0.016291495138828063, "grad_norm": 0.8014370235386262, "learning_rate": 8.143607705779336e-06, "loss": 0.7665, "step": 558 }, { "epoch": 0.016320691366675424, "grad_norm": 0.9365122754412731, "learning_rate": 8.15820198482195e-06, "loss": 0.7995, "step": 559 }, { "epoch": 0.01634988759452279, "grad_norm": 0.8284876206089664, "learning_rate": 8.172796263864566e-06, "loss": 0.85, "step": 560 }, { "epoch": 0.01637908382237015, "grad_norm": 0.9212654492764597, "learning_rate": 8.18739054290718e-06, "loss": 0.8342, "step": 561 }, { "epoch": 0.01640828005021751, "grad_norm": 0.85532587088767, "learning_rate": 8.201984821949796e-06, "loss": 0.8027, "step": 562 }, { "epoch": 0.016437476278064873, "grad_norm": 2.267707850751453, "learning_rate": 8.21657910099241e-06, "loss": 0.7907, "step": 563 }, { "epoch": 0.016466672505912237, "grad_norm": 0.8752736654533048, "learning_rate": 8.231173380035026e-06, "loss": 0.8847, "step": 564 }, { "epoch": 0.0164958687337596, "grad_norm": 0.8816485040589828, "learning_rate": 8.245767659077642e-06, "loss": 0.7381, "step": 565 }, { "epoch": 0.01652506496160696, "grad_norm": 0.9451516915275848, "learning_rate": 8.260361938120258e-06, "loss": 0.8187, "step": 566 }, { "epoch": 0.01655426118945432, "grad_norm": 0.7426163058113081, "learning_rate": 8.274956217162872e-06, "loss": 0.7013, "step": 567 }, { "epoch": 0.016583457417301686, "grad_norm": 0.8147614831954206, "learning_rate": 8.289550496205488e-06, "loss": 0.7965, "step": 568 }, { "epoch": 0.016612653645149047, "grad_norm": 0.9056115408025686, "learning_rate": 8.304144775248104e-06, "loss": 0.8004, "step": 569 }, { "epoch": 0.016641849872996408, "grad_norm": 0.8071695876758715, "learning_rate": 8.318739054290718e-06, "loss": 0.723, "step": 570 }, { "epoch": 0.016671046100843773, "grad_norm": 0.7983833484068659, "learning_rate": 8.333333333333334e-06, "loss": 0.759, "step": 571 }, { "epoch": 0.016700242328691134, "grad_norm": 1.1097426575945761, "learning_rate": 8.347927612375948e-06, "loss": 0.8655, "step": 572 }, { "epoch": 0.016729438556538495, "grad_norm": 0.849256012155054, "learning_rate": 8.362521891418565e-06, "loss": 0.8035, "step": 573 }, { "epoch": 0.016758634784385856, "grad_norm": 0.7452405967127155, "learning_rate": 8.37711617046118e-06, "loss": 0.7465, "step": 574 }, { "epoch": 0.01678783101223322, "grad_norm": 0.9585084700876852, "learning_rate": 8.391710449503795e-06, "loss": 0.9089, "step": 575 }, { "epoch": 0.016817027240080582, "grad_norm": 0.9155331397279827, "learning_rate": 8.40630472854641e-06, "loss": 0.7439, "step": 576 }, { "epoch": 0.016846223467927943, "grad_norm": 0.8424197072538657, "learning_rate": 8.420899007589025e-06, "loss": 0.7928, "step": 577 }, { "epoch": 0.016875419695775305, "grad_norm": 0.7630030419360571, "learning_rate": 8.435493286631641e-06, "loss": 0.7234, "step": 578 }, { "epoch": 0.01690461592362267, "grad_norm": 1.327231863145676, "learning_rate": 8.450087565674255e-06, "loss": 0.7033, "step": 579 }, { "epoch": 0.01693381215147003, "grad_norm": 0.8149958131096201, "learning_rate": 8.464681844716871e-06, "loss": 0.8449, "step": 580 }, { "epoch": 0.01696300837931739, "grad_norm": 0.8325166781218155, "learning_rate": 8.479276123759487e-06, "loss": 0.7499, "step": 581 }, { "epoch": 0.016992204607164753, "grad_norm": 0.8379519970480773, "learning_rate": 8.493870402802103e-06, "loss": 0.8664, "step": 582 }, { "epoch": 0.017021400835012118, "grad_norm": 0.8611369190528833, "learning_rate": 8.508464681844717e-06, "loss": 0.8032, "step": 583 }, { "epoch": 0.01705059706285948, "grad_norm": 0.8623946436355945, "learning_rate": 8.523058960887333e-06, "loss": 0.831, "step": 584 }, { "epoch": 0.01707979329070684, "grad_norm": 0.8167759007034684, "learning_rate": 8.537653239929947e-06, "loss": 0.7721, "step": 585 }, { "epoch": 0.0171089895185542, "grad_norm": 0.8596137254536323, "learning_rate": 8.552247518972563e-06, "loss": 0.7738, "step": 586 }, { "epoch": 0.017138185746401566, "grad_norm": 0.7969536556497773, "learning_rate": 8.566841798015177e-06, "loss": 0.7086, "step": 587 }, { "epoch": 0.017167381974248927, "grad_norm": 0.9134983657684816, "learning_rate": 8.581436077057793e-06, "loss": 0.9017, "step": 588 }, { "epoch": 0.017196578202096288, "grad_norm": 0.7449123847645509, "learning_rate": 8.596030356100409e-06, "loss": 0.7367, "step": 589 }, { "epoch": 0.017225774429943653, "grad_norm": 0.8767236369891878, "learning_rate": 8.610624635143025e-06, "loss": 0.7914, "step": 590 }, { "epoch": 0.017254970657791014, "grad_norm": 0.8505140523954989, "learning_rate": 8.62521891418564e-06, "loss": 0.7969, "step": 591 }, { "epoch": 0.017284166885638375, "grad_norm": 1.2401440960412249, "learning_rate": 8.639813193228255e-06, "loss": 0.6963, "step": 592 }, { "epoch": 0.017313363113485736, "grad_norm": 0.8164325920658987, "learning_rate": 8.65440747227087e-06, "loss": 0.8261, "step": 593 }, { "epoch": 0.0173425593413331, "grad_norm": 0.9060940847417437, "learning_rate": 8.669001751313485e-06, "loss": 0.872, "step": 594 }, { "epoch": 0.017371755569180462, "grad_norm": 0.7612485897135461, "learning_rate": 8.6835960303561e-06, "loss": 0.7132, "step": 595 }, { "epoch": 0.017400951797027824, "grad_norm": 0.7540418472094303, "learning_rate": 8.698190309398715e-06, "loss": 0.7012, "step": 596 }, { "epoch": 0.017430148024875185, "grad_norm": 0.8315141012440093, "learning_rate": 8.712784588441332e-06, "loss": 0.7087, "step": 597 }, { "epoch": 0.01745934425272255, "grad_norm": 0.8770735880702969, "learning_rate": 8.727378867483947e-06, "loss": 0.7274, "step": 598 }, { "epoch": 0.01748854048056991, "grad_norm": 0.7872473759283206, "learning_rate": 8.741973146526562e-06, "loss": 0.7312, "step": 599 }, { "epoch": 0.017517736708417272, "grad_norm": 0.9611933582842012, "learning_rate": 8.756567425569177e-06, "loss": 0.756, "step": 600 }, { "epoch": 0.017546932936264633, "grad_norm": 0.8374547976343869, "learning_rate": 8.771161704611792e-06, "loss": 0.8071, "step": 601 }, { "epoch": 0.017576129164111998, "grad_norm": 0.7540378824721486, "learning_rate": 8.785755983654408e-06, "loss": 0.6923, "step": 602 }, { "epoch": 0.01760532539195936, "grad_norm": 1.0823177115446458, "learning_rate": 8.800350262697022e-06, "loss": 0.6727, "step": 603 }, { "epoch": 0.01763452161980672, "grad_norm": 0.8675647396585536, "learning_rate": 8.814944541739638e-06, "loss": 0.8263, "step": 604 }, { "epoch": 0.017663717847654085, "grad_norm": 0.9483776877788361, "learning_rate": 8.829538820782254e-06, "loss": 0.9016, "step": 605 }, { "epoch": 0.017692914075501446, "grad_norm": 0.8024593520274003, "learning_rate": 8.84413309982487e-06, "loss": 0.7809, "step": 606 }, { "epoch": 0.017722110303348807, "grad_norm": 0.8530077385618818, "learning_rate": 8.858727378867484e-06, "loss": 0.8012, "step": 607 }, { "epoch": 0.01775130653119617, "grad_norm": 0.7327700599443592, "learning_rate": 8.8733216579101e-06, "loss": 0.635, "step": 608 }, { "epoch": 0.017780502759043533, "grad_norm": 1.1863671263034423, "learning_rate": 8.887915936952714e-06, "loss": 0.6704, "step": 609 }, { "epoch": 0.017809698986890894, "grad_norm": 0.8201422556980857, "learning_rate": 8.90251021599533e-06, "loss": 0.7715, "step": 610 }, { "epoch": 0.017838895214738255, "grad_norm": 0.8180621484990043, "learning_rate": 8.917104495037946e-06, "loss": 0.7871, "step": 611 }, { "epoch": 0.017868091442585617, "grad_norm": 0.8937431057629416, "learning_rate": 8.93169877408056e-06, "loss": 0.7645, "step": 612 }, { "epoch": 0.01789728767043298, "grad_norm": 0.8418379574173963, "learning_rate": 8.946293053123178e-06, "loss": 0.79, "step": 613 }, { "epoch": 0.017926483898280342, "grad_norm": 0.7624796093890419, "learning_rate": 8.960887332165792e-06, "loss": 0.7747, "step": 614 }, { "epoch": 0.017955680126127704, "grad_norm": 0.7859696088242373, "learning_rate": 8.975481611208408e-06, "loss": 0.7529, "step": 615 }, { "epoch": 0.017984876353975065, "grad_norm": 0.834719352637345, "learning_rate": 8.990075890251022e-06, "loss": 0.7986, "step": 616 }, { "epoch": 0.01801407258182243, "grad_norm": 1.0391207852835493, "learning_rate": 9.004670169293638e-06, "loss": 0.835, "step": 617 }, { "epoch": 0.01804326880966979, "grad_norm": 0.7242888637713629, "learning_rate": 9.019264448336252e-06, "loss": 0.668, "step": 618 }, { "epoch": 0.018072465037517152, "grad_norm": 0.836483633929588, "learning_rate": 9.033858727378868e-06, "loss": 0.7712, "step": 619 }, { "epoch": 0.018101661265364517, "grad_norm": 0.9520631670602614, "learning_rate": 9.048453006421482e-06, "loss": 0.8094, "step": 620 }, { "epoch": 0.018130857493211878, "grad_norm": 0.8142385452343305, "learning_rate": 9.0630472854641e-06, "loss": 0.7752, "step": 621 }, { "epoch": 0.01816005372105924, "grad_norm": 0.7608926118179921, "learning_rate": 9.077641564506713e-06, "loss": 0.7074, "step": 622 }, { "epoch": 0.0181892499489066, "grad_norm": 0.8894004110574044, "learning_rate": 9.09223584354933e-06, "loss": 0.8552, "step": 623 }, { "epoch": 0.018218446176753965, "grad_norm": 1.1419385054845452, "learning_rate": 9.106830122591945e-06, "loss": 0.8151, "step": 624 }, { "epoch": 0.018247642404601326, "grad_norm": 0.7919792898595684, "learning_rate": 9.12142440163456e-06, "loss": 0.7718, "step": 625 }, { "epoch": 0.018276838632448687, "grad_norm": 0.7569308287954357, "learning_rate": 9.136018680677175e-06, "loss": 0.6636, "step": 626 }, { "epoch": 0.01830603486029605, "grad_norm": 0.7658692505410292, "learning_rate": 9.15061295971979e-06, "loss": 0.733, "step": 627 }, { "epoch": 0.018335231088143413, "grad_norm": 0.9730588671593331, "learning_rate": 9.165207238762405e-06, "loss": 0.8669, "step": 628 }, { "epoch": 0.018364427315990774, "grad_norm": 0.9153324324054174, "learning_rate": 9.179801517805021e-06, "loss": 0.778, "step": 629 }, { "epoch": 0.018393623543838136, "grad_norm": 1.0795872277454577, "learning_rate": 9.194395796847637e-06, "loss": 0.7419, "step": 630 }, { "epoch": 0.018422819771685497, "grad_norm": 0.7917650902438748, "learning_rate": 9.208990075890251e-06, "loss": 0.728, "step": 631 }, { "epoch": 0.01845201599953286, "grad_norm": 0.8860323125316973, "learning_rate": 9.223584354932867e-06, "loss": 0.8191, "step": 632 }, { "epoch": 0.018481212227380223, "grad_norm": 0.8187415215427231, "learning_rate": 9.238178633975483e-06, "loss": 0.8068, "step": 633 }, { "epoch": 0.018510408455227584, "grad_norm": 0.8613128126758388, "learning_rate": 9.252772913018097e-06, "loss": 0.7539, "step": 634 }, { "epoch": 0.018539604683074945, "grad_norm": 0.9710785667192292, "learning_rate": 9.267367192060713e-06, "loss": 0.7711, "step": 635 }, { "epoch": 0.01856880091092231, "grad_norm": 0.842263529436469, "learning_rate": 9.281961471103327e-06, "loss": 0.7711, "step": 636 }, { "epoch": 0.01859799713876967, "grad_norm": 0.9069672039155837, "learning_rate": 9.296555750145944e-06, "loss": 0.739, "step": 637 }, { "epoch": 0.018627193366617032, "grad_norm": 0.8211229905509478, "learning_rate": 9.311150029188559e-06, "loss": 0.7246, "step": 638 }, { "epoch": 0.018656389594464397, "grad_norm": 1.3467482201731718, "learning_rate": 9.325744308231174e-06, "loss": 0.7927, "step": 639 }, { "epoch": 0.018685585822311758, "grad_norm": 0.8405575443668835, "learning_rate": 9.340338587273789e-06, "loss": 0.8125, "step": 640 }, { "epoch": 0.01871478205015912, "grad_norm": 0.817421257947357, "learning_rate": 9.354932866316404e-06, "loss": 0.7645, "step": 641 }, { "epoch": 0.01874397827800648, "grad_norm": 0.779479690813212, "learning_rate": 9.369527145359019e-06, "loss": 0.6723, "step": 642 }, { "epoch": 0.018773174505853845, "grad_norm": 0.7783415327129061, "learning_rate": 9.384121424401635e-06, "loss": 0.7498, "step": 643 }, { "epoch": 0.018802370733701206, "grad_norm": 0.7446793730503085, "learning_rate": 9.39871570344425e-06, "loss": 0.6544, "step": 644 }, { "epoch": 0.018831566961548567, "grad_norm": 0.7555758200561367, "learning_rate": 9.413309982486866e-06, "loss": 0.7237, "step": 645 }, { "epoch": 0.01886076318939593, "grad_norm": 0.8447043745929378, "learning_rate": 9.427904261529482e-06, "loss": 0.7576, "step": 646 }, { "epoch": 0.018889959417243293, "grad_norm": 0.8538720849265726, "learning_rate": 9.442498540572096e-06, "loss": 0.789, "step": 647 }, { "epoch": 0.018919155645090655, "grad_norm": 0.938311950923278, "learning_rate": 9.457092819614712e-06, "loss": 0.8365, "step": 648 }, { "epoch": 0.018948351872938016, "grad_norm": 0.8587327069221992, "learning_rate": 9.471687098657326e-06, "loss": 0.7445, "step": 649 }, { "epoch": 0.018977548100785377, "grad_norm": 0.9347578037778514, "learning_rate": 9.486281377699942e-06, "loss": 0.9565, "step": 650 }, { "epoch": 0.01900674432863274, "grad_norm": 1.1798411797897554, "learning_rate": 9.500875656742556e-06, "loss": 0.7932, "step": 651 }, { "epoch": 0.019035940556480103, "grad_norm": 0.7723609247191806, "learning_rate": 9.515469935785172e-06, "loss": 0.7512, "step": 652 }, { "epoch": 0.019065136784327464, "grad_norm": 0.7422118568868089, "learning_rate": 9.530064214827788e-06, "loss": 0.6731, "step": 653 }, { "epoch": 0.01909433301217483, "grad_norm": 0.8487124755078377, "learning_rate": 9.544658493870404e-06, "loss": 0.7644, "step": 654 }, { "epoch": 0.01912352924002219, "grad_norm": 0.781598865440857, "learning_rate": 9.559252772913018e-06, "loss": 0.7552, "step": 655 }, { "epoch": 0.01915272546786955, "grad_norm": 1.158215716266514, "learning_rate": 9.573847051955634e-06, "loss": 0.7413, "step": 656 }, { "epoch": 0.019181921695716912, "grad_norm": 0.9957827554915899, "learning_rate": 9.58844133099825e-06, "loss": 0.8382, "step": 657 }, { "epoch": 0.019211117923564277, "grad_norm": 0.8221145578479233, "learning_rate": 9.603035610040864e-06, "loss": 0.8205, "step": 658 }, { "epoch": 0.019240314151411638, "grad_norm": 0.7840370349634176, "learning_rate": 9.61762988908348e-06, "loss": 0.7601, "step": 659 }, { "epoch": 0.019269510379259, "grad_norm": 0.8336234954757087, "learning_rate": 9.632224168126094e-06, "loss": 0.8014, "step": 660 }, { "epoch": 0.01929870660710636, "grad_norm": 0.9091483724794135, "learning_rate": 9.646818447168711e-06, "loss": 0.7824, "step": 661 }, { "epoch": 0.019327902834953725, "grad_norm": 0.8186166742915808, "learning_rate": 9.661412726211326e-06, "loss": 0.7157, "step": 662 }, { "epoch": 0.019357099062801086, "grad_norm": 0.991356852526856, "learning_rate": 9.676007005253941e-06, "loss": 0.7393, "step": 663 }, { "epoch": 0.019386295290648448, "grad_norm": 0.8043752840304061, "learning_rate": 9.690601284296556e-06, "loss": 0.851, "step": 664 }, { "epoch": 0.01941549151849581, "grad_norm": 0.7825714927037061, "learning_rate": 9.705195563339171e-06, "loss": 0.6772, "step": 665 }, { "epoch": 0.019444687746343173, "grad_norm": 0.8677645471360574, "learning_rate": 9.719789842381787e-06, "loss": 0.7236, "step": 666 }, { "epoch": 0.019473883974190535, "grad_norm": 0.7920603030116495, "learning_rate": 9.734384121424401e-06, "loss": 0.7708, "step": 667 }, { "epoch": 0.019503080202037896, "grad_norm": 0.8112878393045472, "learning_rate": 9.748978400467017e-06, "loss": 0.76, "step": 668 }, { "epoch": 0.01953227642988526, "grad_norm": 0.7632500687789732, "learning_rate": 9.763572679509633e-06, "loss": 0.6654, "step": 669 }, { "epoch": 0.01956147265773262, "grad_norm": 0.9780377302112456, "learning_rate": 9.778166958552249e-06, "loss": 0.7663, "step": 670 }, { "epoch": 0.019590668885579983, "grad_norm": 0.9114284825207085, "learning_rate": 9.792761237594863e-06, "loss": 0.7302, "step": 671 }, { "epoch": 0.019619865113427344, "grad_norm": 0.8449622328050449, "learning_rate": 9.807355516637479e-06, "loss": 0.786, "step": 672 }, { "epoch": 0.01964906134127471, "grad_norm": 0.8579111455603136, "learning_rate": 9.821949795680093e-06, "loss": 0.7896, "step": 673 }, { "epoch": 0.01967825756912207, "grad_norm": 0.8111662090517859, "learning_rate": 9.836544074722709e-06, "loss": 0.8018, "step": 674 }, { "epoch": 0.01970745379696943, "grad_norm": 0.8436208349944609, "learning_rate": 9.851138353765323e-06, "loss": 0.742, "step": 675 }, { "epoch": 0.019736650024816792, "grad_norm": 0.7868126644242017, "learning_rate": 9.865732632807939e-06, "loss": 0.7159, "step": 676 }, { "epoch": 0.019765846252664157, "grad_norm": 0.7136343811849274, "learning_rate": 9.880326911850555e-06, "loss": 0.6059, "step": 677 }, { "epoch": 0.01979504248051152, "grad_norm": 0.8270402637328361, "learning_rate": 9.89492119089317e-06, "loss": 0.7909, "step": 678 }, { "epoch": 0.01982423870835888, "grad_norm": 0.8064891843504342, "learning_rate": 9.909515469935787e-06, "loss": 0.7509, "step": 679 }, { "epoch": 0.01985343493620624, "grad_norm": 0.7892436998208296, "learning_rate": 9.9241097489784e-06, "loss": 0.686, "step": 680 }, { "epoch": 0.019882631164053605, "grad_norm": 0.8005598361095204, "learning_rate": 9.938704028021017e-06, "loss": 0.7177, "step": 681 }, { "epoch": 0.019911827391900967, "grad_norm": 0.8878689398866408, "learning_rate": 9.95329830706363e-06, "loss": 0.7365, "step": 682 }, { "epoch": 0.019941023619748328, "grad_norm": 0.9454223257590064, "learning_rate": 9.967892586106247e-06, "loss": 0.715, "step": 683 }, { "epoch": 0.01997021984759569, "grad_norm": 0.9625210592787234, "learning_rate": 9.98248686514886e-06, "loss": 0.7976, "step": 684 }, { "epoch": 0.019999416075443054, "grad_norm": 0.7727436934307387, "learning_rate": 9.997081144191478e-06, "loss": 0.7474, "step": 685 }, { "epoch": 0.020028612303290415, "grad_norm": 1.320979835964976, "learning_rate": 1.0011675423234092e-05, "loss": 0.8932, "step": 686 }, { "epoch": 0.020057808531137776, "grad_norm": 0.8297859405463436, "learning_rate": 1.0026269702276708e-05, "loss": 0.7643, "step": 687 }, { "epoch": 0.02008700475898514, "grad_norm": 0.7833656973919241, "learning_rate": 1.0040863981319324e-05, "loss": 0.7605, "step": 688 }, { "epoch": 0.020116200986832502, "grad_norm": 0.8599667704179055, "learning_rate": 1.0055458260361938e-05, "loss": 0.8236, "step": 689 }, { "epoch": 0.020145397214679863, "grad_norm": 0.7652750456227542, "learning_rate": 1.0070052539404554e-05, "loss": 0.6946, "step": 690 }, { "epoch": 0.020174593442527224, "grad_norm": 0.7730794142305338, "learning_rate": 1.0084646818447168e-05, "loss": 0.7229, "step": 691 }, { "epoch": 0.02020378967037459, "grad_norm": 0.9478368281964781, "learning_rate": 1.0099241097489784e-05, "loss": 0.8457, "step": 692 }, { "epoch": 0.02023298589822195, "grad_norm": 0.850378301916952, "learning_rate": 1.01138353765324e-05, "loss": 0.8333, "step": 693 }, { "epoch": 0.02026218212606931, "grad_norm": 0.7958488110768579, "learning_rate": 1.0128429655575016e-05, "loss": 0.7212, "step": 694 }, { "epoch": 0.020291378353916673, "grad_norm": 0.8377414311342465, "learning_rate": 1.014302393461763e-05, "loss": 0.7407, "step": 695 }, { "epoch": 0.020320574581764037, "grad_norm": 0.8523283926271088, "learning_rate": 1.0157618213660246e-05, "loss": 0.8496, "step": 696 }, { "epoch": 0.0203497708096114, "grad_norm": 0.8392876789766193, "learning_rate": 1.017221249270286e-05, "loss": 0.7463, "step": 697 }, { "epoch": 0.02037896703745876, "grad_norm": 0.9955583538481875, "learning_rate": 1.0186806771745476e-05, "loss": 0.7082, "step": 698 }, { "epoch": 0.02040816326530612, "grad_norm": 1.0270542835724317, "learning_rate": 1.0201401050788092e-05, "loss": 0.7925, "step": 699 }, { "epoch": 0.020437359493153485, "grad_norm": 0.7635555067136856, "learning_rate": 1.0215995329830706e-05, "loss": 0.7049, "step": 700 }, { "epoch": 0.020466555721000847, "grad_norm": 0.9102421592023339, "learning_rate": 1.0230589608873324e-05, "loss": 0.7966, "step": 701 }, { "epoch": 0.020495751948848208, "grad_norm": 0.783441736215896, "learning_rate": 1.0245183887915938e-05, "loss": 0.7178, "step": 702 }, { "epoch": 0.020524948176695573, "grad_norm": 0.7671551626463976, "learning_rate": 1.0259778166958554e-05, "loss": 0.7479, "step": 703 }, { "epoch": 0.020554144404542934, "grad_norm": 0.8848968722274839, "learning_rate": 1.0274372446001168e-05, "loss": 0.7952, "step": 704 }, { "epoch": 0.020583340632390295, "grad_norm": 0.864708448246383, "learning_rate": 1.0288966725043784e-05, "loss": 0.7688, "step": 705 }, { "epoch": 0.020612536860237656, "grad_norm": 0.7993952985215935, "learning_rate": 1.0303561004086398e-05, "loss": 0.7735, "step": 706 }, { "epoch": 0.02064173308808502, "grad_norm": 0.8021764361334132, "learning_rate": 1.0318155283129014e-05, "loss": 0.7975, "step": 707 }, { "epoch": 0.020670929315932382, "grad_norm": 0.876286349636028, "learning_rate": 1.033274956217163e-05, "loss": 0.8035, "step": 708 }, { "epoch": 0.020700125543779743, "grad_norm": 0.8358628009280568, "learning_rate": 1.0347343841214245e-05, "loss": 0.7452, "step": 709 }, { "epoch": 0.020729321771627104, "grad_norm": 0.8354450387391867, "learning_rate": 1.036193812025686e-05, "loss": 0.7778, "step": 710 }, { "epoch": 0.02075851799947447, "grad_norm": 0.8043456191433463, "learning_rate": 1.0376532399299475e-05, "loss": 0.7658, "step": 711 }, { "epoch": 0.02078771422732183, "grad_norm": 0.7782935336535277, "learning_rate": 1.0391126678342091e-05, "loss": 0.738, "step": 712 }, { "epoch": 0.02081691045516919, "grad_norm": 0.950741782359852, "learning_rate": 1.0405720957384705e-05, "loss": 0.6989, "step": 713 }, { "epoch": 0.020846106683016553, "grad_norm": 0.7724776664202916, "learning_rate": 1.0420315236427321e-05, "loss": 0.7161, "step": 714 }, { "epoch": 0.020875302910863917, "grad_norm": 0.7084657511054202, "learning_rate": 1.0434909515469935e-05, "loss": 0.6908, "step": 715 }, { "epoch": 0.02090449913871128, "grad_norm": 0.9123530560224686, "learning_rate": 1.0449503794512551e-05, "loss": 0.8151, "step": 716 }, { "epoch": 0.02093369536655864, "grad_norm": 0.8510386019662187, "learning_rate": 1.0464098073555167e-05, "loss": 0.7772, "step": 717 }, { "epoch": 0.020962891594406004, "grad_norm": 0.7994591509239294, "learning_rate": 1.0478692352597783e-05, "loss": 0.7185, "step": 718 }, { "epoch": 0.020992087822253366, "grad_norm": 1.5836041495910713, "learning_rate": 1.0493286631640397e-05, "loss": 0.6551, "step": 719 }, { "epoch": 0.021021284050100727, "grad_norm": 0.7226006731215496, "learning_rate": 1.0507880910683013e-05, "loss": 0.6501, "step": 720 }, { "epoch": 0.021050480277948088, "grad_norm": 0.8638962018957159, "learning_rate": 1.0522475189725629e-05, "loss": 0.8704, "step": 721 }, { "epoch": 0.021079676505795453, "grad_norm": 0.7570951570342519, "learning_rate": 1.0537069468768243e-05, "loss": 0.688, "step": 722 }, { "epoch": 0.021108872733642814, "grad_norm": 0.7676166286190714, "learning_rate": 1.0551663747810859e-05, "loss": 0.7416, "step": 723 }, { "epoch": 0.021138068961490175, "grad_norm": 0.7661766283443254, "learning_rate": 1.0566258026853473e-05, "loss": 0.7017, "step": 724 }, { "epoch": 0.021167265189337536, "grad_norm": 0.7844363096170969, "learning_rate": 1.058085230589609e-05, "loss": 0.7564, "step": 725 }, { "epoch": 0.0211964614171849, "grad_norm": 0.8906634563431519, "learning_rate": 1.0595446584938705e-05, "loss": 0.7976, "step": 726 }, { "epoch": 0.021225657645032262, "grad_norm": 0.843614696419501, "learning_rate": 1.061004086398132e-05, "loss": 0.731, "step": 727 }, { "epoch": 0.021254853872879623, "grad_norm": 0.913530507702044, "learning_rate": 1.0624635143023935e-05, "loss": 0.7652, "step": 728 }, { "epoch": 0.021284050100726985, "grad_norm": 0.9910591327616888, "learning_rate": 1.063922942206655e-05, "loss": 0.7978, "step": 729 }, { "epoch": 0.02131324632857435, "grad_norm": 0.7783337609932499, "learning_rate": 1.0653823701109165e-05, "loss": 0.737, "step": 730 }, { "epoch": 0.02134244255642171, "grad_norm": 0.8041355698311187, "learning_rate": 1.066841798015178e-05, "loss": 0.7396, "step": 731 }, { "epoch": 0.02137163878426907, "grad_norm": 0.7256554083399631, "learning_rate": 1.0683012259194396e-05, "loss": 0.6821, "step": 732 }, { "epoch": 0.021400835012116433, "grad_norm": 0.8888773297983651, "learning_rate": 1.0697606538237012e-05, "loss": 0.8092, "step": 733 }, { "epoch": 0.021430031239963798, "grad_norm": 1.2794255669356132, "learning_rate": 1.0712200817279628e-05, "loss": 0.7962, "step": 734 }, { "epoch": 0.02145922746781116, "grad_norm": 0.8990390210569825, "learning_rate": 1.0726795096322242e-05, "loss": 0.7693, "step": 735 }, { "epoch": 0.02148842369565852, "grad_norm": 0.828878117639223, "learning_rate": 1.0741389375364858e-05, "loss": 0.7749, "step": 736 }, { "epoch": 0.021517619923505885, "grad_norm": 0.9190125007467207, "learning_rate": 1.0755983654407472e-05, "loss": 0.7453, "step": 737 }, { "epoch": 0.021546816151353246, "grad_norm": 0.8030068925373491, "learning_rate": 1.0770577933450088e-05, "loss": 0.711, "step": 738 }, { "epoch": 0.021576012379200607, "grad_norm": 0.7774646435928751, "learning_rate": 1.0785172212492702e-05, "loss": 0.7273, "step": 739 }, { "epoch": 0.021605208607047968, "grad_norm": 0.862592379412912, "learning_rate": 1.0799766491535318e-05, "loss": 0.7448, "step": 740 }, { "epoch": 0.021634404834895333, "grad_norm": 0.7668140500012556, "learning_rate": 1.0814360770577934e-05, "loss": 0.7173, "step": 741 }, { "epoch": 0.021663601062742694, "grad_norm": 0.82523489613105, "learning_rate": 1.082895504962055e-05, "loss": 0.7243, "step": 742 }, { "epoch": 0.021692797290590055, "grad_norm": 0.806765145061638, "learning_rate": 1.0843549328663166e-05, "loss": 0.7301, "step": 743 }, { "epoch": 0.021721993518437416, "grad_norm": 0.8296630725577586, "learning_rate": 1.085814360770578e-05, "loss": 0.792, "step": 744 }, { "epoch": 0.02175118974628478, "grad_norm": 0.8058066162103276, "learning_rate": 1.0872737886748396e-05, "loss": 0.7481, "step": 745 }, { "epoch": 0.021780385974132142, "grad_norm": 1.1051959042669424, "learning_rate": 1.088733216579101e-05, "loss": 0.6753, "step": 746 }, { "epoch": 0.021809582201979504, "grad_norm": 1.1853913069167685, "learning_rate": 1.0901926444833626e-05, "loss": 0.8751, "step": 747 }, { "epoch": 0.021838778429826865, "grad_norm": 0.7408608973934768, "learning_rate": 1.091652072387624e-05, "loss": 0.6612, "step": 748 }, { "epoch": 0.02186797465767423, "grad_norm": 0.8576125036609653, "learning_rate": 1.0931115002918857e-05, "loss": 0.7865, "step": 749 }, { "epoch": 0.02189717088552159, "grad_norm": 0.8102913816839683, "learning_rate": 1.0945709281961472e-05, "loss": 0.7428, "step": 750 }, { "epoch": 0.021926367113368952, "grad_norm": 0.8849385063144041, "learning_rate": 1.0960303561004087e-05, "loss": 0.7987, "step": 751 }, { "epoch": 0.021955563341216316, "grad_norm": 0.809416842905973, "learning_rate": 1.0974897840046702e-05, "loss": 0.7931, "step": 752 }, { "epoch": 0.021984759569063678, "grad_norm": 0.9340605498862024, "learning_rate": 1.0989492119089317e-05, "loss": 0.7529, "step": 753 }, { "epoch": 0.02201395579691104, "grad_norm": 0.8976784064923744, "learning_rate": 1.1004086398131933e-05, "loss": 0.7934, "step": 754 }, { "epoch": 0.0220431520247584, "grad_norm": 0.9064330032905444, "learning_rate": 1.1018680677174547e-05, "loss": 0.8339, "step": 755 }, { "epoch": 0.022072348252605765, "grad_norm": 0.8587422651932958, "learning_rate": 1.1033274956217163e-05, "loss": 0.7756, "step": 756 }, { "epoch": 0.022101544480453126, "grad_norm": 0.8114157988648175, "learning_rate": 1.1047869235259779e-05, "loss": 0.6639, "step": 757 }, { "epoch": 0.022130740708300487, "grad_norm": 0.818622072040373, "learning_rate": 1.1062463514302395e-05, "loss": 0.7873, "step": 758 }, { "epoch": 0.02215993693614785, "grad_norm": 0.6976860330570138, "learning_rate": 1.1077057793345009e-05, "loss": 0.5783, "step": 759 }, { "epoch": 0.022189133163995213, "grad_norm": 0.7863077706089083, "learning_rate": 1.1091652072387625e-05, "loss": 0.7054, "step": 760 }, { "epoch": 0.022218329391842574, "grad_norm": 2.2010108582474195, "learning_rate": 1.1106246351430239e-05, "loss": 0.8183, "step": 761 }, { "epoch": 0.022247525619689935, "grad_norm": 0.7842924734894671, "learning_rate": 1.1120840630472855e-05, "loss": 0.7031, "step": 762 }, { "epoch": 0.022276721847537297, "grad_norm": 1.0121165832056318, "learning_rate": 1.113543490951547e-05, "loss": 0.8142, "step": 763 }, { "epoch": 0.02230591807538466, "grad_norm": 0.8081931688867838, "learning_rate": 1.1150029188558085e-05, "loss": 0.7716, "step": 764 }, { "epoch": 0.022335114303232022, "grad_norm": 0.8912342192262145, "learning_rate": 1.11646234676007e-05, "loss": 0.7226, "step": 765 }, { "epoch": 0.022364310531079384, "grad_norm": 0.8439337071309798, "learning_rate": 1.1179217746643317e-05, "loss": 0.8021, "step": 766 }, { "epoch": 0.02239350675892675, "grad_norm": 0.7808900233215916, "learning_rate": 1.1193812025685933e-05, "loss": 0.7418, "step": 767 }, { "epoch": 0.02242270298677411, "grad_norm": 0.8485746726507475, "learning_rate": 1.1208406304728547e-05, "loss": 0.7703, "step": 768 }, { "epoch": 0.02245189921462147, "grad_norm": 0.8043251876180845, "learning_rate": 1.1223000583771163e-05, "loss": 0.7136, "step": 769 }, { "epoch": 0.022481095442468832, "grad_norm": 0.8545277252050953, "learning_rate": 1.1237594862813777e-05, "loss": 0.8345, "step": 770 }, { "epoch": 0.022510291670316197, "grad_norm": 0.9678662238021429, "learning_rate": 1.1252189141856393e-05, "loss": 0.8437, "step": 771 }, { "epoch": 0.022539487898163558, "grad_norm": 0.7822412128659845, "learning_rate": 1.1266783420899007e-05, "loss": 0.6995, "step": 772 }, { "epoch": 0.02256868412601092, "grad_norm": 0.8021038626243774, "learning_rate": 1.1281377699941624e-05, "loss": 0.7248, "step": 773 }, { "epoch": 0.02259788035385828, "grad_norm": 0.8577966170727743, "learning_rate": 1.1295971978984238e-05, "loss": 0.7811, "step": 774 }, { "epoch": 0.022627076581705645, "grad_norm": 0.8228749871776091, "learning_rate": 1.1310566258026854e-05, "loss": 0.7074, "step": 775 }, { "epoch": 0.022656272809553006, "grad_norm": 0.7653369874103316, "learning_rate": 1.132516053706947e-05, "loss": 0.7138, "step": 776 }, { "epoch": 0.022685469037400367, "grad_norm": 0.8992605042899048, "learning_rate": 1.1339754816112084e-05, "loss": 0.7664, "step": 777 }, { "epoch": 0.02271466526524773, "grad_norm": 0.8111204131682987, "learning_rate": 1.13543490951547e-05, "loss": 0.7824, "step": 778 }, { "epoch": 0.022743861493095093, "grad_norm": 0.8058575940250734, "learning_rate": 1.1368943374197314e-05, "loss": 0.8204, "step": 779 }, { "epoch": 0.022773057720942454, "grad_norm": 0.7305556662867178, "learning_rate": 1.138353765323993e-05, "loss": 0.6274, "step": 780 }, { "epoch": 0.022802253948789816, "grad_norm": 0.9250341073749678, "learning_rate": 1.1398131932282546e-05, "loss": 0.7666, "step": 781 }, { "epoch": 0.022831450176637177, "grad_norm": 1.055205584326094, "learning_rate": 1.1412726211325162e-05, "loss": 0.8708, "step": 782 }, { "epoch": 0.02286064640448454, "grad_norm": 0.8869896561774263, "learning_rate": 1.1427320490367776e-05, "loss": 0.7858, "step": 783 }, { "epoch": 0.022889842632331903, "grad_norm": 0.7945206839285065, "learning_rate": 1.1441914769410392e-05, "loss": 0.7204, "step": 784 }, { "epoch": 0.022919038860179264, "grad_norm": 0.7914313773372031, "learning_rate": 1.1456509048453006e-05, "loss": 0.744, "step": 785 }, { "epoch": 0.02294823508802663, "grad_norm": 0.8332790419651228, "learning_rate": 1.1471103327495622e-05, "loss": 0.7988, "step": 786 }, { "epoch": 0.02297743131587399, "grad_norm": 0.9746544639852469, "learning_rate": 1.1485697606538238e-05, "loss": 0.8565, "step": 787 }, { "epoch": 0.02300662754372135, "grad_norm": 1.1703752000192198, "learning_rate": 1.1500291885580852e-05, "loss": 0.8371, "step": 788 }, { "epoch": 0.023035823771568712, "grad_norm": 0.908750054785642, "learning_rate": 1.151488616462347e-05, "loss": 0.7649, "step": 789 }, { "epoch": 0.023065019999416077, "grad_norm": 0.795032834904694, "learning_rate": 1.1529480443666084e-05, "loss": 0.765, "step": 790 }, { "epoch": 0.023094216227263438, "grad_norm": 0.8488053718103618, "learning_rate": 1.15440747227087e-05, "loss": 0.7621, "step": 791 }, { "epoch": 0.0231234124551108, "grad_norm": 0.7829319603971279, "learning_rate": 1.1558669001751314e-05, "loss": 0.7884, "step": 792 }, { "epoch": 0.02315260868295816, "grad_norm": 0.8824011655027314, "learning_rate": 1.157326328079393e-05, "loss": 0.8697, "step": 793 }, { "epoch": 0.023181804910805525, "grad_norm": 0.7356738016489296, "learning_rate": 1.1587857559836544e-05, "loss": 0.6174, "step": 794 }, { "epoch": 0.023211001138652886, "grad_norm": 0.7818579319458865, "learning_rate": 1.160245183887916e-05, "loss": 0.7294, "step": 795 }, { "epoch": 0.023240197366500247, "grad_norm": 1.3007275652563817, "learning_rate": 1.1617046117921775e-05, "loss": 0.8019, "step": 796 }, { "epoch": 0.02326939359434761, "grad_norm": 0.8372667830825246, "learning_rate": 1.1631640396964391e-05, "loss": 0.7699, "step": 797 }, { "epoch": 0.023298589822194973, "grad_norm": 0.8023286101445561, "learning_rate": 1.1646234676007007e-05, "loss": 0.7586, "step": 798 }, { "epoch": 0.023327786050042335, "grad_norm": 0.765056164371347, "learning_rate": 1.1660828955049621e-05, "loss": 0.7555, "step": 799 }, { "epoch": 0.023356982277889696, "grad_norm": 0.8341680660294056, "learning_rate": 1.1675423234092237e-05, "loss": 0.7783, "step": 800 }, { "epoch": 0.02338617850573706, "grad_norm": 0.8626106900967051, "learning_rate": 1.1690017513134851e-05, "loss": 0.6625, "step": 801 }, { "epoch": 0.02341537473358442, "grad_norm": 0.8222036554105132, "learning_rate": 1.1704611792177467e-05, "loss": 0.8062, "step": 802 }, { "epoch": 0.023444570961431783, "grad_norm": 0.8717210943189191, "learning_rate": 1.1719206071220081e-05, "loss": 0.7581, "step": 803 }, { "epoch": 0.023473767189279144, "grad_norm": 0.9449462216539305, "learning_rate": 1.1733800350262697e-05, "loss": 0.7946, "step": 804 }, { "epoch": 0.02350296341712651, "grad_norm": 0.7642472205798388, "learning_rate": 1.1748394629305313e-05, "loss": 0.6738, "step": 805 }, { "epoch": 0.02353215964497387, "grad_norm": 0.8356485783305497, "learning_rate": 1.1762988908347929e-05, "loss": 0.7765, "step": 806 }, { "epoch": 0.02356135587282123, "grad_norm": 0.7608232349688774, "learning_rate": 1.1777583187390543e-05, "loss": 0.7271, "step": 807 }, { "epoch": 0.023590552100668592, "grad_norm": 0.7725756590576593, "learning_rate": 1.1792177466433159e-05, "loss": 0.6535, "step": 808 }, { "epoch": 0.023619748328515957, "grad_norm": 0.9852790429665532, "learning_rate": 1.1806771745475775e-05, "loss": 0.757, "step": 809 }, { "epoch": 0.023648944556363318, "grad_norm": 0.8892711634343561, "learning_rate": 1.1821366024518389e-05, "loss": 0.827, "step": 810 }, { "epoch": 0.02367814078421068, "grad_norm": 0.8333339084341469, "learning_rate": 1.1835960303561005e-05, "loss": 0.8424, "step": 811 }, { "epoch": 0.02370733701205804, "grad_norm": 0.7716714061592658, "learning_rate": 1.1850554582603619e-05, "loss": 0.6972, "step": 812 }, { "epoch": 0.023736533239905405, "grad_norm": 0.7809967465920167, "learning_rate": 1.1865148861646236e-05, "loss": 0.7035, "step": 813 }, { "epoch": 0.023765729467752766, "grad_norm": 0.8014818515247051, "learning_rate": 1.187974314068885e-05, "loss": 0.785, "step": 814 }, { "epoch": 0.023794925695600128, "grad_norm": 0.7712666809158579, "learning_rate": 1.1894337419731466e-05, "loss": 0.7322, "step": 815 }, { "epoch": 0.023824121923447492, "grad_norm": 0.8534660618647479, "learning_rate": 1.190893169877408e-05, "loss": 0.7616, "step": 816 }, { "epoch": 0.023853318151294853, "grad_norm": 1.0909596783450246, "learning_rate": 1.1923525977816696e-05, "loss": 0.7729, "step": 817 }, { "epoch": 0.023882514379142215, "grad_norm": 0.7762685068543278, "learning_rate": 1.1938120256859312e-05, "loss": 0.7143, "step": 818 }, { "epoch": 0.023911710606989576, "grad_norm": 0.7860452095492124, "learning_rate": 1.1952714535901926e-05, "loss": 0.7632, "step": 819 }, { "epoch": 0.02394090683483694, "grad_norm": 0.7232437060879015, "learning_rate": 1.1967308814944542e-05, "loss": 0.6065, "step": 820 }, { "epoch": 0.023970103062684302, "grad_norm": 0.7580166391676819, "learning_rate": 1.1981903093987158e-05, "loss": 0.6044, "step": 821 }, { "epoch": 0.023999299290531663, "grad_norm": 0.8585640976465648, "learning_rate": 1.1996497373029774e-05, "loss": 0.7471, "step": 822 }, { "epoch": 0.024028495518379024, "grad_norm": 0.8532952307805952, "learning_rate": 1.2011091652072388e-05, "loss": 0.7368, "step": 823 }, { "epoch": 0.02405769174622639, "grad_norm": 0.744169375517655, "learning_rate": 1.2025685931115004e-05, "loss": 0.7282, "step": 824 }, { "epoch": 0.02408688797407375, "grad_norm": 0.729024398863182, "learning_rate": 1.2040280210157618e-05, "loss": 0.6971, "step": 825 }, { "epoch": 0.02411608420192111, "grad_norm": 0.8411178940079045, "learning_rate": 1.2054874489200234e-05, "loss": 0.7738, "step": 826 }, { "epoch": 0.024145280429768472, "grad_norm": 0.7838292980675825, "learning_rate": 1.2069468768242848e-05, "loss": 0.7514, "step": 827 }, { "epoch": 0.024174476657615837, "grad_norm": 0.8015482052745424, "learning_rate": 1.2084063047285464e-05, "loss": 0.7473, "step": 828 }, { "epoch": 0.0242036728854632, "grad_norm": 0.8009901995048929, "learning_rate": 1.209865732632808e-05, "loss": 0.7749, "step": 829 }, { "epoch": 0.02423286911331056, "grad_norm": 0.7918957116746289, "learning_rate": 1.2113251605370696e-05, "loss": 0.7139, "step": 830 }, { "epoch": 0.02426206534115792, "grad_norm": 0.7750848528745886, "learning_rate": 1.2127845884413312e-05, "loss": 0.7033, "step": 831 }, { "epoch": 0.024291261569005285, "grad_norm": 0.7972005715064111, "learning_rate": 1.2142440163455926e-05, "loss": 0.7312, "step": 832 }, { "epoch": 0.024320457796852647, "grad_norm": 0.7343425317238879, "learning_rate": 1.2157034442498542e-05, "loss": 0.6833, "step": 833 }, { "epoch": 0.024349654024700008, "grad_norm": 0.8483088652616778, "learning_rate": 1.2171628721541156e-05, "loss": 0.7037, "step": 834 }, { "epoch": 0.024378850252547372, "grad_norm": 0.7853142037944628, "learning_rate": 1.2186223000583772e-05, "loss": 0.8009, "step": 835 }, { "epoch": 0.024408046480394734, "grad_norm": 0.8070308346180859, "learning_rate": 1.2200817279626386e-05, "loss": 0.7282, "step": 836 }, { "epoch": 0.024437242708242095, "grad_norm": 0.8167738916939659, "learning_rate": 1.2215411558669003e-05, "loss": 0.8057, "step": 837 }, { "epoch": 0.024466438936089456, "grad_norm": 1.0719739983058154, "learning_rate": 1.2230005837711617e-05, "loss": 0.7709, "step": 838 }, { "epoch": 0.02449563516393682, "grad_norm": 1.0152962886736105, "learning_rate": 1.2244600116754233e-05, "loss": 0.8427, "step": 839 }, { "epoch": 0.024524831391784182, "grad_norm": 0.8949642737586997, "learning_rate": 1.2259194395796847e-05, "loss": 0.776, "step": 840 }, { "epoch": 0.024554027619631543, "grad_norm": 1.4279670751649527, "learning_rate": 1.2273788674839463e-05, "loss": 0.8516, "step": 841 }, { "epoch": 0.024583223847478904, "grad_norm": 0.8269737822849591, "learning_rate": 1.228838295388208e-05, "loss": 0.7961, "step": 842 }, { "epoch": 0.02461242007532627, "grad_norm": 0.7940685270236418, "learning_rate": 1.2302977232924693e-05, "loss": 0.7844, "step": 843 }, { "epoch": 0.02464161630317363, "grad_norm": 0.7873527350482047, "learning_rate": 1.231757151196731e-05, "loss": 0.7672, "step": 844 }, { "epoch": 0.02467081253102099, "grad_norm": 0.8005205269320226, "learning_rate": 1.2332165791009925e-05, "loss": 0.8028, "step": 845 }, { "epoch": 0.024700008758868353, "grad_norm": 0.8105763130729926, "learning_rate": 1.2346760070052541e-05, "loss": 0.7366, "step": 846 }, { "epoch": 0.024729204986715717, "grad_norm": 0.742715048830473, "learning_rate": 1.2361354349095155e-05, "loss": 0.6733, "step": 847 }, { "epoch": 0.02475840121456308, "grad_norm": 0.949946440425362, "learning_rate": 1.2375948628137771e-05, "loss": 0.8552, "step": 848 }, { "epoch": 0.02478759744241044, "grad_norm": 0.9295009629823174, "learning_rate": 1.2390542907180385e-05, "loss": 0.82, "step": 849 }, { "epoch": 0.024816793670257804, "grad_norm": 0.7776881592856939, "learning_rate": 1.2405137186223001e-05, "loss": 0.7213, "step": 850 }, { "epoch": 0.024845989898105166, "grad_norm": 0.8791332026329269, "learning_rate": 1.2419731465265617e-05, "loss": 0.756, "step": 851 }, { "epoch": 0.024875186125952527, "grad_norm": 0.8676023439102957, "learning_rate": 1.2434325744308231e-05, "loss": 0.7406, "step": 852 }, { "epoch": 0.024904382353799888, "grad_norm": 0.9695006305594757, "learning_rate": 1.2448920023350849e-05, "loss": 0.9026, "step": 853 }, { "epoch": 0.024933578581647253, "grad_norm": 0.7980192819968348, "learning_rate": 1.2463514302393463e-05, "loss": 0.7864, "step": 854 }, { "epoch": 0.024962774809494614, "grad_norm": 0.8007052718106907, "learning_rate": 1.2478108581436079e-05, "loss": 0.7037, "step": 855 }, { "epoch": 0.024991971037341975, "grad_norm": 0.9657006100147916, "learning_rate": 1.2492702860478693e-05, "loss": 0.8057, "step": 856 }, { "epoch": 0.025021167265189336, "grad_norm": 0.9676042784663494, "learning_rate": 1.250729713952131e-05, "loss": 0.8742, "step": 857 }, { "epoch": 0.0250503634930367, "grad_norm": 0.796549844121491, "learning_rate": 1.2521891418563924e-05, "loss": 0.6941, "step": 858 }, { "epoch": 0.025079559720884062, "grad_norm": 0.76982746613907, "learning_rate": 1.253648569760654e-05, "loss": 0.7012, "step": 859 }, { "epoch": 0.025108755948731423, "grad_norm": 0.7724220038405294, "learning_rate": 1.2551079976649154e-05, "loss": 0.7707, "step": 860 }, { "epoch": 0.025137952176578784, "grad_norm": 0.7760027587195546, "learning_rate": 1.256567425569177e-05, "loss": 0.6909, "step": 861 }, { "epoch": 0.02516714840442615, "grad_norm": 0.7495944739671899, "learning_rate": 1.2580268534734384e-05, "loss": 0.7533, "step": 862 }, { "epoch": 0.02519634463227351, "grad_norm": 0.7318643084573618, "learning_rate": 1.2594862813777e-05, "loss": 0.6817, "step": 863 }, { "epoch": 0.02522554086012087, "grad_norm": 0.93302205904551, "learning_rate": 1.2609457092819616e-05, "loss": 0.7951, "step": 864 }, { "epoch": 0.025254737087968236, "grad_norm": 0.8310951060891442, "learning_rate": 1.262405137186223e-05, "loss": 0.6944, "step": 865 }, { "epoch": 0.025283933315815597, "grad_norm": 0.8235710692841189, "learning_rate": 1.2638645650904846e-05, "loss": 0.7754, "step": 866 }, { "epoch": 0.02531312954366296, "grad_norm": 0.9457843345927637, "learning_rate": 1.265323992994746e-05, "loss": 0.9369, "step": 867 }, { "epoch": 0.02534232577151032, "grad_norm": 0.7404694850484914, "learning_rate": 1.2667834208990076e-05, "loss": 0.7716, "step": 868 }, { "epoch": 0.025371521999357684, "grad_norm": 0.8611016066281394, "learning_rate": 1.268242848803269e-05, "loss": 0.6806, "step": 869 }, { "epoch": 0.025400718227205046, "grad_norm": 0.9020559256831163, "learning_rate": 1.2697022767075306e-05, "loss": 0.8085, "step": 870 }, { "epoch": 0.025429914455052407, "grad_norm": 0.9735783912209663, "learning_rate": 1.2711617046117922e-05, "loss": 0.7332, "step": 871 }, { "epoch": 0.025459110682899768, "grad_norm": 0.8092796164409228, "learning_rate": 1.2726211325160536e-05, "loss": 0.7309, "step": 872 }, { "epoch": 0.025488306910747133, "grad_norm": 0.7732077895509657, "learning_rate": 1.2740805604203154e-05, "loss": 0.7088, "step": 873 }, { "epoch": 0.025517503138594494, "grad_norm": 0.7336110169380771, "learning_rate": 1.275539988324577e-05, "loss": 0.6728, "step": 874 }, { "epoch": 0.025546699366441855, "grad_norm": 0.8503519759793587, "learning_rate": 1.2769994162288384e-05, "loss": 0.7773, "step": 875 }, { "epoch": 0.025575895594289216, "grad_norm": 0.751216048114801, "learning_rate": 1.2784588441331e-05, "loss": 0.6829, "step": 876 }, { "epoch": 0.02560509182213658, "grad_norm": 0.8785859078971012, "learning_rate": 1.2799182720373615e-05, "loss": 0.7838, "step": 877 }, { "epoch": 0.025634288049983942, "grad_norm": 0.7715076635420379, "learning_rate": 1.281377699941623e-05, "loss": 0.682, "step": 878 }, { "epoch": 0.025663484277831303, "grad_norm": 0.8288392547994865, "learning_rate": 1.2828371278458845e-05, "loss": 0.7738, "step": 879 }, { "epoch": 0.025692680505678665, "grad_norm": 0.7642776185237286, "learning_rate": 1.284296555750146e-05, "loss": 0.7024, "step": 880 }, { "epoch": 0.02572187673352603, "grad_norm": 0.6718446037582384, "learning_rate": 1.2857559836544075e-05, "loss": 0.5818, "step": 881 }, { "epoch": 0.02575107296137339, "grad_norm": 0.8764243223753936, "learning_rate": 1.287215411558669e-05, "loss": 0.785, "step": 882 }, { "epoch": 0.02578026918922075, "grad_norm": 0.738425599168615, "learning_rate": 1.2886748394629305e-05, "loss": 0.6467, "step": 883 }, { "epoch": 0.025809465417068116, "grad_norm": 0.8121490705172574, "learning_rate": 1.2901342673671921e-05, "loss": 0.7572, "step": 884 }, { "epoch": 0.025838661644915478, "grad_norm": 0.8890554958760256, "learning_rate": 1.2915936952714535e-05, "loss": 0.7068, "step": 885 }, { "epoch": 0.02586785787276284, "grad_norm": 0.7950566241927689, "learning_rate": 1.2930531231757151e-05, "loss": 0.6718, "step": 886 }, { "epoch": 0.0258970541006102, "grad_norm": 0.7953757808205985, "learning_rate": 1.2945125510799765e-05, "loss": 0.7014, "step": 887 }, { "epoch": 0.025926250328457565, "grad_norm": 0.7475038012148322, "learning_rate": 1.2959719789842381e-05, "loss": 0.689, "step": 888 }, { "epoch": 0.025955446556304926, "grad_norm": 0.7259864975542925, "learning_rate": 1.2974314068884999e-05, "loss": 0.6607, "step": 889 }, { "epoch": 0.025984642784152287, "grad_norm": 0.7797731947321136, "learning_rate": 1.2988908347927615e-05, "loss": 0.6997, "step": 890 }, { "epoch": 0.026013839011999648, "grad_norm": 0.9606282233489428, "learning_rate": 1.3003502626970229e-05, "loss": 0.7458, "step": 891 }, { "epoch": 0.026043035239847013, "grad_norm": 0.8093828211151342, "learning_rate": 1.3018096906012845e-05, "loss": 0.776, "step": 892 }, { "epoch": 0.026072231467694374, "grad_norm": 0.7587020778015765, "learning_rate": 1.3032691185055459e-05, "loss": 0.7154, "step": 893 }, { "epoch": 0.026101427695541735, "grad_norm": 0.7796018183503156, "learning_rate": 1.3047285464098075e-05, "loss": 0.708, "step": 894 }, { "epoch": 0.026130623923389096, "grad_norm": 1.1310669770966006, "learning_rate": 1.3061879743140689e-05, "loss": 0.8023, "step": 895 }, { "epoch": 0.02615982015123646, "grad_norm": 0.7686887643426125, "learning_rate": 1.3076474022183305e-05, "loss": 0.7545, "step": 896 }, { "epoch": 0.026189016379083822, "grad_norm": 0.9852232473060869, "learning_rate": 1.309106830122592e-05, "loss": 0.8019, "step": 897 }, { "epoch": 0.026218212606931184, "grad_norm": 0.7443164367336491, "learning_rate": 1.3105662580268535e-05, "loss": 0.681, "step": 898 }, { "epoch": 0.026247408834778548, "grad_norm": 0.780972852758699, "learning_rate": 1.312025685931115e-05, "loss": 0.6272, "step": 899 }, { "epoch": 0.02627660506262591, "grad_norm": 0.7815916794118366, "learning_rate": 1.3134851138353765e-05, "loss": 0.7218, "step": 900 }, { "epoch": 0.02630580129047327, "grad_norm": 1.421307370295616, "learning_rate": 1.314944541739638e-05, "loss": 0.8576, "step": 901 }, { "epoch": 0.026334997518320632, "grad_norm": 0.7731122441826651, "learning_rate": 1.3164039696438995e-05, "loss": 0.7044, "step": 902 }, { "epoch": 0.026364193746167996, "grad_norm": 0.8781878271662531, "learning_rate": 1.317863397548161e-05, "loss": 0.7415, "step": 903 }, { "epoch": 0.026393389974015358, "grad_norm": 0.827515746145212, "learning_rate": 1.3193228254524227e-05, "loss": 0.732, "step": 904 }, { "epoch": 0.02642258620186272, "grad_norm": 0.823367870618862, "learning_rate": 1.3207822533566844e-05, "loss": 0.7708, "step": 905 }, { "epoch": 0.02645178242971008, "grad_norm": 0.7959434779264146, "learning_rate": 1.3222416812609458e-05, "loss": 0.728, "step": 906 }, { "epoch": 0.026480978657557445, "grad_norm": 0.8787810036694244, "learning_rate": 1.3237011091652074e-05, "loss": 0.698, "step": 907 }, { "epoch": 0.026510174885404806, "grad_norm": 0.7862611849941853, "learning_rate": 1.325160537069469e-05, "loss": 0.7738, "step": 908 }, { "epoch": 0.026539371113252167, "grad_norm": 1.081783125268956, "learning_rate": 1.3266199649737304e-05, "loss": 0.9132, "step": 909 }, { "epoch": 0.02656856734109953, "grad_norm": 0.8358051475759259, "learning_rate": 1.328079392877992e-05, "loss": 0.6416, "step": 910 }, { "epoch": 0.026597763568946893, "grad_norm": 0.7355062336682386, "learning_rate": 1.3295388207822534e-05, "loss": 0.6696, "step": 911 }, { "epoch": 0.026626959796794254, "grad_norm": 1.192618282096854, "learning_rate": 1.330998248686515e-05, "loss": 0.7328, "step": 912 }, { "epoch": 0.026656156024641615, "grad_norm": 0.7724449620883177, "learning_rate": 1.3324576765907764e-05, "loss": 0.7606, "step": 913 }, { "epoch": 0.02668535225248898, "grad_norm": 0.8015639224785795, "learning_rate": 1.333917104495038e-05, "loss": 0.8209, "step": 914 }, { "epoch": 0.02671454848033634, "grad_norm": 1.0890643615033675, "learning_rate": 1.3353765323992994e-05, "loss": 0.7453, "step": 915 }, { "epoch": 0.026743744708183703, "grad_norm": 1.119458547125154, "learning_rate": 1.336835960303561e-05, "loss": 0.7829, "step": 916 }, { "epoch": 0.026772940936031064, "grad_norm": 1.0599010641668702, "learning_rate": 1.3382953882078226e-05, "loss": 0.7269, "step": 917 }, { "epoch": 0.02680213716387843, "grad_norm": 0.8537809154862255, "learning_rate": 1.339754816112084e-05, "loss": 0.7823, "step": 918 }, { "epoch": 0.02683133339172579, "grad_norm": 0.7913836776918222, "learning_rate": 1.3412142440163456e-05, "loss": 0.7871, "step": 919 }, { "epoch": 0.02686052961957315, "grad_norm": 0.7582086391047321, "learning_rate": 1.342673671920607e-05, "loss": 0.634, "step": 920 }, { "epoch": 0.026889725847420512, "grad_norm": 0.7264236942173412, "learning_rate": 1.344133099824869e-05, "loss": 0.6465, "step": 921 }, { "epoch": 0.026918922075267877, "grad_norm": 0.8414066519529957, "learning_rate": 1.3455925277291303e-05, "loss": 0.8552, "step": 922 }, { "epoch": 0.026948118303115238, "grad_norm": 0.7544648653201159, "learning_rate": 1.347051955633392e-05, "loss": 0.7221, "step": 923 }, { "epoch": 0.0269773145309626, "grad_norm": 0.720038600595701, "learning_rate": 1.3485113835376533e-05, "loss": 0.679, "step": 924 }, { "epoch": 0.02700651075880996, "grad_norm": 0.7835142488106633, "learning_rate": 1.349970811441915e-05, "loss": 0.7558, "step": 925 }, { "epoch": 0.027035706986657325, "grad_norm": 0.8776320625168369, "learning_rate": 1.3514302393461763e-05, "loss": 0.6987, "step": 926 }, { "epoch": 0.027064903214504686, "grad_norm": 0.7834662982957269, "learning_rate": 1.352889667250438e-05, "loss": 0.7243, "step": 927 }, { "epoch": 0.027094099442352047, "grad_norm": 0.788304225072381, "learning_rate": 1.3543490951546995e-05, "loss": 0.7542, "step": 928 }, { "epoch": 0.02712329567019941, "grad_norm": 0.7575701483590666, "learning_rate": 1.355808523058961e-05, "loss": 0.7536, "step": 929 }, { "epoch": 0.027152491898046773, "grad_norm": 3.116646790035353, "learning_rate": 1.3572679509632225e-05, "loss": 0.8712, "step": 930 }, { "epoch": 0.027181688125894134, "grad_norm": 0.8678427022079123, "learning_rate": 1.358727378867484e-05, "loss": 0.8345, "step": 931 }, { "epoch": 0.027210884353741496, "grad_norm": 0.882460433675293, "learning_rate": 1.3601868067717455e-05, "loss": 0.7319, "step": 932 }, { "epoch": 0.02724008058158886, "grad_norm": 0.7133515079079473, "learning_rate": 1.361646234676007e-05, "loss": 0.6327, "step": 933 }, { "epoch": 0.02726927680943622, "grad_norm": 0.8914139763072105, "learning_rate": 1.3631056625802685e-05, "loss": 0.8416, "step": 934 }, { "epoch": 0.027298473037283583, "grad_norm": 0.7755631560258744, "learning_rate": 1.36456509048453e-05, "loss": 0.7075, "step": 935 }, { "epoch": 0.027327669265130944, "grad_norm": 0.8987458580371546, "learning_rate": 1.3660245183887915e-05, "loss": 0.7995, "step": 936 }, { "epoch": 0.02735686549297831, "grad_norm": 0.7507053899354617, "learning_rate": 1.3674839462930533e-05, "loss": 0.5969, "step": 937 }, { "epoch": 0.02738606172082567, "grad_norm": 0.9631711007153521, "learning_rate": 1.3689433741973149e-05, "loss": 0.7779, "step": 938 }, { "epoch": 0.02741525794867303, "grad_norm": 0.9580609363808129, "learning_rate": 1.3704028021015763e-05, "loss": 0.7686, "step": 939 }, { "epoch": 0.027444454176520392, "grad_norm": 0.8132901046006111, "learning_rate": 1.3718622300058379e-05, "loss": 0.6921, "step": 940 }, { "epoch": 0.027473650404367757, "grad_norm": 0.8535567185697958, "learning_rate": 1.3733216579100994e-05, "loss": 0.7359, "step": 941 }, { "epoch": 0.027502846632215118, "grad_norm": 0.9636922827745855, "learning_rate": 1.3747810858143609e-05, "loss": 0.8592, "step": 942 }, { "epoch": 0.02753204286006248, "grad_norm": 0.8000823229810773, "learning_rate": 1.3762405137186224e-05, "loss": 0.8085, "step": 943 }, { "epoch": 0.02756123908790984, "grad_norm": 0.7973747842038004, "learning_rate": 1.3776999416228839e-05, "loss": 0.7096, "step": 944 }, { "epoch": 0.027590435315757205, "grad_norm": 0.8082036356229073, "learning_rate": 1.3791593695271454e-05, "loss": 0.7561, "step": 945 }, { "epoch": 0.027619631543604566, "grad_norm": 0.8279784809814691, "learning_rate": 1.3806187974314069e-05, "loss": 0.7403, "step": 946 }, { "epoch": 0.027648827771451927, "grad_norm": 0.774662656019682, "learning_rate": 1.3820782253356684e-05, "loss": 0.7642, "step": 947 }, { "epoch": 0.027678023999299292, "grad_norm": 0.8377058608893306, "learning_rate": 1.38353765323993e-05, "loss": 0.754, "step": 948 }, { "epoch": 0.027707220227146653, "grad_norm": 0.7221516455870887, "learning_rate": 1.3849970811441915e-05, "loss": 0.6256, "step": 949 }, { "epoch": 0.027736416454994015, "grad_norm": 0.8439853369211395, "learning_rate": 1.386456509048453e-05, "loss": 0.8252, "step": 950 }, { "epoch": 0.027765612682841376, "grad_norm": 0.7481123412701063, "learning_rate": 1.3879159369527145e-05, "loss": 0.6604, "step": 951 }, { "epoch": 0.02779480891068874, "grad_norm": 0.8372365848024196, "learning_rate": 1.389375364856976e-05, "loss": 0.8239, "step": 952 }, { "epoch": 0.0278240051385361, "grad_norm": 0.8263625487840682, "learning_rate": 1.3908347927612378e-05, "loss": 0.7921, "step": 953 }, { "epoch": 0.027853201366383463, "grad_norm": 1.0155498099339086, "learning_rate": 1.3922942206654994e-05, "loss": 0.8856, "step": 954 }, { "epoch": 0.027882397594230824, "grad_norm": 0.8507690221839398, "learning_rate": 1.3937536485697608e-05, "loss": 0.8146, "step": 955 }, { "epoch": 0.02791159382207819, "grad_norm": 0.7957716253914399, "learning_rate": 1.3952130764740224e-05, "loss": 0.7268, "step": 956 }, { "epoch": 0.02794079004992555, "grad_norm": 0.7882440059983257, "learning_rate": 1.3966725043782838e-05, "loss": 0.716, "step": 957 }, { "epoch": 0.02796998627777291, "grad_norm": 0.756415082612536, "learning_rate": 1.3981319322825454e-05, "loss": 0.7389, "step": 958 }, { "epoch": 0.027999182505620272, "grad_norm": 0.7884591027709038, "learning_rate": 1.3995913601868068e-05, "loss": 0.7088, "step": 959 }, { "epoch": 0.028028378733467637, "grad_norm": 0.8771117111979043, "learning_rate": 1.4010507880910684e-05, "loss": 0.7124, "step": 960 }, { "epoch": 0.028057574961314998, "grad_norm": 0.7861355346928525, "learning_rate": 1.40251021599533e-05, "loss": 0.7145, "step": 961 }, { "epoch": 0.02808677118916236, "grad_norm": 0.8024545159745831, "learning_rate": 1.4039696438995914e-05, "loss": 0.7705, "step": 962 }, { "epoch": 0.028115967417009724, "grad_norm": 0.8256040623856743, "learning_rate": 1.405429071803853e-05, "loss": 0.7245, "step": 963 }, { "epoch": 0.028145163644857085, "grad_norm": 0.8099472297053458, "learning_rate": 1.4068884997081144e-05, "loss": 0.7609, "step": 964 }, { "epoch": 0.028174359872704446, "grad_norm": 0.8018325589771238, "learning_rate": 1.408347927612376e-05, "loss": 0.7206, "step": 965 }, { "epoch": 0.028203556100551808, "grad_norm": 0.845930826335637, "learning_rate": 1.4098073555166374e-05, "loss": 0.8275, "step": 966 }, { "epoch": 0.028232752328399172, "grad_norm": 0.7815547030514668, "learning_rate": 1.411266783420899e-05, "loss": 0.7505, "step": 967 }, { "epoch": 0.028261948556246533, "grad_norm": 0.9031566105144541, "learning_rate": 1.4127262113251606e-05, "loss": 0.7835, "step": 968 }, { "epoch": 0.028291144784093895, "grad_norm": 0.8633430516169884, "learning_rate": 1.4141856392294223e-05, "loss": 0.7665, "step": 969 }, { "epoch": 0.028320341011941256, "grad_norm": 0.8462781877450374, "learning_rate": 1.4156450671336837e-05, "loss": 0.7514, "step": 970 }, { "epoch": 0.02834953723978862, "grad_norm": 0.7962415857555741, "learning_rate": 1.4171044950379453e-05, "loss": 0.7145, "step": 971 }, { "epoch": 0.028378733467635982, "grad_norm": 0.8726687574283934, "learning_rate": 1.4185639229422067e-05, "loss": 0.8215, "step": 972 }, { "epoch": 0.028407929695483343, "grad_norm": 0.813307245418182, "learning_rate": 1.4200233508464683e-05, "loss": 0.8033, "step": 973 }, { "epoch": 0.028437125923330704, "grad_norm": 0.8349813203523778, "learning_rate": 1.4214827787507299e-05, "loss": 0.766, "step": 974 }, { "epoch": 0.02846632215117807, "grad_norm": 0.803959315143778, "learning_rate": 1.4229422066549913e-05, "loss": 0.7756, "step": 975 }, { "epoch": 0.02849551837902543, "grad_norm": 0.7571099832493837, "learning_rate": 1.4244016345592529e-05, "loss": 0.6858, "step": 976 }, { "epoch": 0.02852471460687279, "grad_norm": 0.9575171364520659, "learning_rate": 1.4258610624635143e-05, "loss": 0.7763, "step": 977 }, { "epoch": 0.028553910834720152, "grad_norm": 0.8114088133208709, "learning_rate": 1.4273204903677759e-05, "loss": 0.7491, "step": 978 }, { "epoch": 0.028583107062567517, "grad_norm": 0.9638611897161351, "learning_rate": 1.4287799182720373e-05, "loss": 0.8791, "step": 979 }, { "epoch": 0.02861230329041488, "grad_norm": 0.7726471445526808, "learning_rate": 1.4302393461762989e-05, "loss": 0.6604, "step": 980 }, { "epoch": 0.02864149951826224, "grad_norm": 0.7822568160585552, "learning_rate": 1.4316987740805605e-05, "loss": 0.7454, "step": 981 }, { "epoch": 0.028670695746109604, "grad_norm": 0.8537472520570492, "learning_rate": 1.4331582019848219e-05, "loss": 0.8279, "step": 982 }, { "epoch": 0.028699891973956965, "grad_norm": 0.9096984789065148, "learning_rate": 1.4346176298890835e-05, "loss": 0.7095, "step": 983 }, { "epoch": 0.028729088201804327, "grad_norm": 0.9057670293613972, "learning_rate": 1.4360770577933449e-05, "loss": 0.7053, "step": 984 }, { "epoch": 0.028758284429651688, "grad_norm": 0.8695499205573753, "learning_rate": 1.4375364856976067e-05, "loss": 0.7761, "step": 985 }, { "epoch": 0.028787480657499052, "grad_norm": 0.799624190742578, "learning_rate": 1.4389959136018682e-05, "loss": 0.7509, "step": 986 }, { "epoch": 0.028816676885346414, "grad_norm": 0.7179257484174055, "learning_rate": 1.4404553415061298e-05, "loss": 0.6561, "step": 987 }, { "epoch": 0.028845873113193775, "grad_norm": 0.7877476861856806, "learning_rate": 1.4419147694103912e-05, "loss": 0.8118, "step": 988 }, { "epoch": 0.028875069341041136, "grad_norm": 0.7348794845039002, "learning_rate": 1.4433741973146528e-05, "loss": 0.6454, "step": 989 }, { "epoch": 0.0289042655688885, "grad_norm": 0.8006311758502258, "learning_rate": 1.4448336252189142e-05, "loss": 0.7555, "step": 990 }, { "epoch": 0.028933461796735862, "grad_norm": 0.7991300237168334, "learning_rate": 1.4462930531231758e-05, "loss": 0.6826, "step": 991 }, { "epoch": 0.028962658024583223, "grad_norm": 0.7585046507586862, "learning_rate": 1.4477524810274372e-05, "loss": 0.7324, "step": 992 }, { "epoch": 0.028991854252430584, "grad_norm": 0.7677568364298626, "learning_rate": 1.4492119089316988e-05, "loss": 0.7527, "step": 993 }, { "epoch": 0.02902105048027795, "grad_norm": 0.8467396075451957, "learning_rate": 1.4506713368359604e-05, "loss": 0.7001, "step": 994 }, { "epoch": 0.02905024670812531, "grad_norm": 0.818756238125478, "learning_rate": 1.4521307647402218e-05, "loss": 0.7478, "step": 995 }, { "epoch": 0.02907944293597267, "grad_norm": 1.005492245559951, "learning_rate": 1.4535901926444834e-05, "loss": 0.8064, "step": 996 }, { "epoch": 0.029108639163820036, "grad_norm": 0.8776407134576788, "learning_rate": 1.4550496205487448e-05, "loss": 0.7646, "step": 997 }, { "epoch": 0.029137835391667397, "grad_norm": 0.8057445553590407, "learning_rate": 1.4565090484530064e-05, "loss": 0.8146, "step": 998 }, { "epoch": 0.02916703161951476, "grad_norm": 0.8691203717491548, "learning_rate": 1.4579684763572678e-05, "loss": 0.7641, "step": 999 }, { "epoch": 0.02919622784736212, "grad_norm": 0.7397166398805538, "learning_rate": 1.4594279042615294e-05, "loss": 0.6156, "step": 1000 }, { "epoch": 0.029225424075209484, "grad_norm": 0.7538556818599083, "learning_rate": 1.4608873321657912e-05, "loss": 0.6119, "step": 1001 }, { "epoch": 0.029254620303056846, "grad_norm": 1.8618798700330124, "learning_rate": 1.4623467600700528e-05, "loss": 0.7618, "step": 1002 }, { "epoch": 0.029283816530904207, "grad_norm": 0.7891686995002639, "learning_rate": 1.4638061879743142e-05, "loss": 0.6915, "step": 1003 }, { "epoch": 0.029313012758751568, "grad_norm": 0.8218082827357581, "learning_rate": 1.4652656158785758e-05, "loss": 0.7526, "step": 1004 }, { "epoch": 0.029342208986598933, "grad_norm": 0.7299410479803826, "learning_rate": 1.4667250437828372e-05, "loss": 0.6282, "step": 1005 }, { "epoch": 0.029371405214446294, "grad_norm": 0.7629115404771191, "learning_rate": 1.4681844716870988e-05, "loss": 0.7106, "step": 1006 }, { "epoch": 0.029400601442293655, "grad_norm": 0.9451073325212596, "learning_rate": 1.4696438995913604e-05, "loss": 0.6918, "step": 1007 }, { "epoch": 0.029429797670141016, "grad_norm": 0.8571710180694413, "learning_rate": 1.4711033274956218e-05, "loss": 0.7959, "step": 1008 }, { "epoch": 0.02945899389798838, "grad_norm": 0.7244200857069784, "learning_rate": 1.4725627553998834e-05, "loss": 0.6198, "step": 1009 }, { "epoch": 0.029488190125835742, "grad_norm": 0.8634667554203884, "learning_rate": 1.4740221833041448e-05, "loss": 0.7826, "step": 1010 }, { "epoch": 0.029517386353683103, "grad_norm": 0.9012443222195884, "learning_rate": 1.4754816112084064e-05, "loss": 0.7023, "step": 1011 }, { "epoch": 0.029546582581530468, "grad_norm": 0.9356273193830233, "learning_rate": 1.4769410391126678e-05, "loss": 0.8677, "step": 1012 }, { "epoch": 0.02957577880937783, "grad_norm": 0.8083700331845987, "learning_rate": 1.4784004670169294e-05, "loss": 0.7419, "step": 1013 }, { "epoch": 0.02960497503722519, "grad_norm": 1.6459159278371118, "learning_rate": 1.479859894921191e-05, "loss": 0.8938, "step": 1014 }, { "epoch": 0.02963417126507255, "grad_norm": 0.7850049361450294, "learning_rate": 1.4813193228254524e-05, "loss": 0.764, "step": 1015 }, { "epoch": 0.029663367492919916, "grad_norm": 0.8551558498759846, "learning_rate": 1.482778750729714e-05, "loss": 0.7246, "step": 1016 }, { "epoch": 0.029692563720767277, "grad_norm": 0.7731711809272691, "learning_rate": 1.4842381786339757e-05, "loss": 0.6123, "step": 1017 }, { "epoch": 0.02972175994861464, "grad_norm": 0.8369979205308115, "learning_rate": 1.4856976065382373e-05, "loss": 0.7984, "step": 1018 }, { "epoch": 0.029750956176462, "grad_norm": 0.7578115396189274, "learning_rate": 1.4871570344424987e-05, "loss": 0.7312, "step": 1019 }, { "epoch": 0.029780152404309364, "grad_norm": 0.7686097641652604, "learning_rate": 1.4886164623467603e-05, "loss": 0.7139, "step": 1020 }, { "epoch": 0.029809348632156726, "grad_norm": 0.7233325407935021, "learning_rate": 1.4900758902510217e-05, "loss": 0.6405, "step": 1021 }, { "epoch": 0.029838544860004087, "grad_norm": 0.9984921885417087, "learning_rate": 1.4915353181552833e-05, "loss": 0.7553, "step": 1022 }, { "epoch": 0.029867741087851448, "grad_norm": 0.7754698940882396, "learning_rate": 1.4929947460595447e-05, "loss": 0.7489, "step": 1023 }, { "epoch": 0.029896937315698813, "grad_norm": 0.8861580625808204, "learning_rate": 1.4944541739638063e-05, "loss": 0.7969, "step": 1024 }, { "epoch": 0.029926133543546174, "grad_norm": 0.8025748594572538, "learning_rate": 1.4959136018680677e-05, "loss": 0.6798, "step": 1025 }, { "epoch": 0.029955329771393535, "grad_norm": 0.8589614292644068, "learning_rate": 1.4973730297723293e-05, "loss": 0.74, "step": 1026 }, { "epoch": 0.029984525999240896, "grad_norm": 1.001515599878186, "learning_rate": 1.4988324576765909e-05, "loss": 0.8362, "step": 1027 }, { "epoch": 0.03001372222708826, "grad_norm": 0.8359661308847481, "learning_rate": 1.5002918855808523e-05, "loss": 0.7085, "step": 1028 }, { "epoch": 0.030042918454935622, "grad_norm": 0.7387732660461381, "learning_rate": 1.5017513134851139e-05, "loss": 0.6599, "step": 1029 }, { "epoch": 0.030072114682782983, "grad_norm": 0.9176569905015722, "learning_rate": 1.5032107413893753e-05, "loss": 0.7798, "step": 1030 }, { "epoch": 0.030101310910630348, "grad_norm": 0.7508328718484354, "learning_rate": 1.5046701692936369e-05, "loss": 0.6548, "step": 1031 }, { "epoch": 0.03013050713847771, "grad_norm": 0.8114502780854808, "learning_rate": 1.5061295971978983e-05, "loss": 0.7221, "step": 1032 }, { "epoch": 0.03015970336632507, "grad_norm": 0.9252609851752547, "learning_rate": 1.5075890251021602e-05, "loss": 0.8783, "step": 1033 }, { "epoch": 0.03018889959417243, "grad_norm": 0.7435244755303095, "learning_rate": 1.5090484530064216e-05, "loss": 0.6682, "step": 1034 }, { "epoch": 0.030218095822019796, "grad_norm": 0.8125897844082441, "learning_rate": 1.5105078809106832e-05, "loss": 0.7659, "step": 1035 }, { "epoch": 0.030247292049867158, "grad_norm": 0.8533341367000602, "learning_rate": 1.5119673088149446e-05, "loss": 0.7079, "step": 1036 }, { "epoch": 0.03027648827771452, "grad_norm": 0.8378422717035955, "learning_rate": 1.5134267367192062e-05, "loss": 0.7782, "step": 1037 }, { "epoch": 0.03030568450556188, "grad_norm": 0.8848094336603682, "learning_rate": 1.5148861646234678e-05, "loss": 0.7311, "step": 1038 }, { "epoch": 0.030334880733409245, "grad_norm": 0.759413714560793, "learning_rate": 1.5163455925277292e-05, "loss": 0.6575, "step": 1039 }, { "epoch": 0.030364076961256606, "grad_norm": 0.8108411782357964, "learning_rate": 1.5178050204319908e-05, "loss": 0.7513, "step": 1040 }, { "epoch": 0.030393273189103967, "grad_norm": 0.908721742267341, "learning_rate": 1.5192644483362522e-05, "loss": 0.7434, "step": 1041 }, { "epoch": 0.030422469416951328, "grad_norm": 1.0279247100300435, "learning_rate": 1.5207238762405138e-05, "loss": 0.8354, "step": 1042 }, { "epoch": 0.030451665644798693, "grad_norm": 0.9145975484478731, "learning_rate": 1.5221833041447752e-05, "loss": 0.7196, "step": 1043 }, { "epoch": 0.030480861872646054, "grad_norm": 0.8960592894678149, "learning_rate": 1.5236427320490368e-05, "loss": 0.8298, "step": 1044 }, { "epoch": 0.030510058100493415, "grad_norm": 0.8226431987615727, "learning_rate": 1.5251021599532982e-05, "loss": 0.7841, "step": 1045 }, { "epoch": 0.03053925432834078, "grad_norm": 0.7628289565410097, "learning_rate": 1.52656158785756e-05, "loss": 0.645, "step": 1046 }, { "epoch": 0.03056845055618814, "grad_norm": 0.7710462103959982, "learning_rate": 1.5280210157618212e-05, "loss": 0.6833, "step": 1047 }, { "epoch": 0.030597646784035502, "grad_norm": 0.8054466112783031, "learning_rate": 1.5294804436660828e-05, "loss": 0.7656, "step": 1048 }, { "epoch": 0.030626843011882864, "grad_norm": 0.7642654844370578, "learning_rate": 1.5309398715703447e-05, "loss": 0.7854, "step": 1049 }, { "epoch": 0.030656039239730228, "grad_norm": 0.887100158880595, "learning_rate": 1.532399299474606e-05, "loss": 0.7134, "step": 1050 }, { "epoch": 0.03068523546757759, "grad_norm": 0.7184977135981038, "learning_rate": 1.5338587273788676e-05, "loss": 0.6154, "step": 1051 }, { "epoch": 0.03071443169542495, "grad_norm": 1.0099762939675565, "learning_rate": 1.535318155283129e-05, "loss": 0.7992, "step": 1052 }, { "epoch": 0.030743627923272312, "grad_norm": 0.7983421806024514, "learning_rate": 1.5367775831873907e-05, "loss": 0.796, "step": 1053 }, { "epoch": 0.030772824151119677, "grad_norm": 0.9093512271018332, "learning_rate": 1.5382370110916523e-05, "loss": 0.8276, "step": 1054 }, { "epoch": 0.030802020378967038, "grad_norm": 0.8844163192652438, "learning_rate": 1.5396964389959136e-05, "loss": 0.8211, "step": 1055 }, { "epoch": 0.0308312166068144, "grad_norm": 0.8270701807379819, "learning_rate": 1.541155866900175e-05, "loss": 0.7554, "step": 1056 }, { "epoch": 0.03086041283466176, "grad_norm": 0.7403365604400822, "learning_rate": 1.5426152948044367e-05, "loss": 0.6708, "step": 1057 }, { "epoch": 0.030889609062509125, "grad_norm": 0.6937483565561, "learning_rate": 1.5440747227086983e-05, "loss": 0.6103, "step": 1058 }, { "epoch": 0.030918805290356486, "grad_norm": 0.7945054325312141, "learning_rate": 1.54553415061296e-05, "loss": 0.7788, "step": 1059 }, { "epoch": 0.030948001518203847, "grad_norm": 0.9289723771435329, "learning_rate": 1.546993578517221e-05, "loss": 0.6984, "step": 1060 }, { "epoch": 0.030977197746051212, "grad_norm": 0.9599705455208536, "learning_rate": 1.5484530064214827e-05, "loss": 0.6527, "step": 1061 }, { "epoch": 0.031006393973898573, "grad_norm": 0.7892656743555844, "learning_rate": 1.5499124343257443e-05, "loss": 0.6895, "step": 1062 }, { "epoch": 0.031035590201745934, "grad_norm": 0.7342033280300858, "learning_rate": 1.551371862230006e-05, "loss": 0.6497, "step": 1063 }, { "epoch": 0.031064786429593295, "grad_norm": 0.8857742799675147, "learning_rate": 1.552831290134267e-05, "loss": 0.8154, "step": 1064 }, { "epoch": 0.03109398265744066, "grad_norm": 0.8048856718204219, "learning_rate": 1.554290718038529e-05, "loss": 0.7059, "step": 1065 }, { "epoch": 0.03112317888528802, "grad_norm": 0.7682013201872074, "learning_rate": 1.5557501459427907e-05, "loss": 0.6647, "step": 1066 }, { "epoch": 0.031152375113135383, "grad_norm": 0.8117918850348284, "learning_rate": 1.5572095738470523e-05, "loss": 0.7757, "step": 1067 }, { "epoch": 0.031181571340982744, "grad_norm": 0.7724406613251691, "learning_rate": 1.5586690017513135e-05, "loss": 0.7475, "step": 1068 }, { "epoch": 0.03121076756883011, "grad_norm": 0.7964899960208414, "learning_rate": 1.560128429655575e-05, "loss": 0.7992, "step": 1069 }, { "epoch": 0.03123996379667747, "grad_norm": 0.831278168408104, "learning_rate": 1.5615878575598367e-05, "loss": 0.7774, "step": 1070 }, { "epoch": 0.03126916002452483, "grad_norm": 0.8725894838464107, "learning_rate": 1.5630472854640983e-05, "loss": 0.6921, "step": 1071 }, { "epoch": 0.03129835625237219, "grad_norm": 0.8342956216377501, "learning_rate": 1.56450671336836e-05, "loss": 0.6776, "step": 1072 }, { "epoch": 0.03132755248021955, "grad_norm": 0.9762232306334302, "learning_rate": 1.565966141272621e-05, "loss": 0.7974, "step": 1073 }, { "epoch": 0.031356748708066914, "grad_norm": 0.8987930661759275, "learning_rate": 1.5674255691768827e-05, "loss": 0.7208, "step": 1074 }, { "epoch": 0.03138594493591428, "grad_norm": 0.8093272671735131, "learning_rate": 1.5688849970811443e-05, "loss": 0.7465, "step": 1075 }, { "epoch": 0.031415141163761644, "grad_norm": 0.7291505971941875, "learning_rate": 1.570344424985406e-05, "loss": 0.6788, "step": 1076 }, { "epoch": 0.031444337391609005, "grad_norm": 0.8399416930483556, "learning_rate": 1.571803852889667e-05, "loss": 0.8565, "step": 1077 }, { "epoch": 0.031473533619456366, "grad_norm": 0.7357676415661243, "learning_rate": 1.5732632807939287e-05, "loss": 0.6202, "step": 1078 }, { "epoch": 0.03150272984730373, "grad_norm": 0.8162432451182939, "learning_rate": 1.5747227086981903e-05, "loss": 0.7794, "step": 1079 }, { "epoch": 0.03153192607515109, "grad_norm": 0.8142877397917281, "learning_rate": 1.576182136602452e-05, "loss": 0.6872, "step": 1080 }, { "epoch": 0.03156112230299845, "grad_norm": 0.9235280899211695, "learning_rate": 1.5776415645067134e-05, "loss": 0.7532, "step": 1081 }, { "epoch": 0.03159031853084582, "grad_norm": 0.7943737348889897, "learning_rate": 1.579100992410975e-05, "loss": 0.7458, "step": 1082 }, { "epoch": 0.03161951475869318, "grad_norm": 0.7995479750785169, "learning_rate": 1.5805604203152366e-05, "loss": 0.6948, "step": 1083 }, { "epoch": 0.03164871098654054, "grad_norm": 1.0653002019729056, "learning_rate": 1.5820198482194982e-05, "loss": 0.8055, "step": 1084 }, { "epoch": 0.0316779072143879, "grad_norm": 0.8485510773794118, "learning_rate": 1.5834792761237598e-05, "loss": 0.6953, "step": 1085 }, { "epoch": 0.03170710344223526, "grad_norm": 0.7306363446018284, "learning_rate": 1.584938704028021e-05, "loss": 0.6993, "step": 1086 }, { "epoch": 0.031736299670082624, "grad_norm": 0.7591181711375024, "learning_rate": 1.5863981319322826e-05, "loss": 0.7662, "step": 1087 }, { "epoch": 0.031765495897929985, "grad_norm": 0.7655777037525227, "learning_rate": 1.5878575598365442e-05, "loss": 0.7312, "step": 1088 }, { "epoch": 0.031794692125777346, "grad_norm": 0.7577236780270729, "learning_rate": 1.5893169877408058e-05, "loss": 0.7342, "step": 1089 }, { "epoch": 0.031823888353624714, "grad_norm": 0.8460862319577088, "learning_rate": 1.590776415645067e-05, "loss": 0.7815, "step": 1090 }, { "epoch": 0.031853084581472076, "grad_norm": 0.7674497782209236, "learning_rate": 1.5922358435493286e-05, "loss": 0.7141, "step": 1091 }, { "epoch": 0.03188228080931944, "grad_norm": 0.8511615527858862, "learning_rate": 1.5936952714535902e-05, "loss": 0.7491, "step": 1092 }, { "epoch": 0.0319114770371668, "grad_norm": 0.7937701670947714, "learning_rate": 1.5951546993578518e-05, "loss": 0.746, "step": 1093 }, { "epoch": 0.03194067326501416, "grad_norm": 0.7687999348529359, "learning_rate": 1.5966141272621134e-05, "loss": 0.6942, "step": 1094 }, { "epoch": 0.03196986949286152, "grad_norm": 0.8620908070205616, "learning_rate": 1.5980735551663746e-05, "loss": 0.7508, "step": 1095 }, { "epoch": 0.03199906572070888, "grad_norm": 0.7388767249588686, "learning_rate": 1.5995329830706362e-05, "loss": 0.6443, "step": 1096 }, { "epoch": 0.03202826194855625, "grad_norm": 0.8398932056446011, "learning_rate": 1.600992410974898e-05, "loss": 0.6808, "step": 1097 }, { "epoch": 0.03205745817640361, "grad_norm": 0.8058098878955982, "learning_rate": 1.6024518388791597e-05, "loss": 0.7168, "step": 1098 }, { "epoch": 0.03208665440425097, "grad_norm": 0.8537848648219868, "learning_rate": 1.603911266783421e-05, "loss": 0.7095, "step": 1099 }, { "epoch": 0.03211585063209833, "grad_norm": 1.6552187827741467, "learning_rate": 1.6053706946876825e-05, "loss": 0.7983, "step": 1100 }, { "epoch": 0.032145046859945695, "grad_norm": 0.7526621530206603, "learning_rate": 1.606830122591944e-05, "loss": 0.6761, "step": 1101 }, { "epoch": 0.032174243087793056, "grad_norm": 0.819817006799288, "learning_rate": 1.6082895504962057e-05, "loss": 0.7418, "step": 1102 }, { "epoch": 0.03220343931564042, "grad_norm": 0.7986324572634547, "learning_rate": 1.6097489784004673e-05, "loss": 0.7204, "step": 1103 }, { "epoch": 0.03223263554348778, "grad_norm": 0.7587068557616558, "learning_rate": 1.6112084063047285e-05, "loss": 0.6844, "step": 1104 }, { "epoch": 0.032261831771335146, "grad_norm": 0.8230840828096126, "learning_rate": 1.61266783420899e-05, "loss": 0.7757, "step": 1105 }, { "epoch": 0.03229102799918251, "grad_norm": 0.7407088986317414, "learning_rate": 1.6141272621132517e-05, "loss": 0.6834, "step": 1106 }, { "epoch": 0.03232022422702987, "grad_norm": 0.863174260694166, "learning_rate": 1.6155866900175133e-05, "loss": 0.7963, "step": 1107 }, { "epoch": 0.03234942045487723, "grad_norm": 1.8596476654114902, "learning_rate": 1.6170461179217745e-05, "loss": 0.7778, "step": 1108 }, { "epoch": 0.03237861668272459, "grad_norm": 0.7621091156089006, "learning_rate": 1.618505545826036e-05, "loss": 0.6869, "step": 1109 }, { "epoch": 0.03240781291057195, "grad_norm": 0.8604010126370539, "learning_rate": 1.6199649737302977e-05, "loss": 0.7175, "step": 1110 }, { "epoch": 0.032437009138419313, "grad_norm": 0.7670911593311046, "learning_rate": 1.6214244016345593e-05, "loss": 0.6154, "step": 1111 }, { "epoch": 0.03246620536626668, "grad_norm": 0.8113957105298736, "learning_rate": 1.622883829538821e-05, "loss": 0.7561, "step": 1112 }, { "epoch": 0.03249540159411404, "grad_norm": 0.7497260602388373, "learning_rate": 1.6243432574430825e-05, "loss": 0.6611, "step": 1113 }, { "epoch": 0.032524597821961404, "grad_norm": 1.1011803263260715, "learning_rate": 1.625802685347344e-05, "loss": 0.7176, "step": 1114 }, { "epoch": 0.032553794049808765, "grad_norm": 0.7909425273716449, "learning_rate": 1.6272621132516056e-05, "loss": 0.7139, "step": 1115 }, { "epoch": 0.032582990277656126, "grad_norm": 0.8274796692077195, "learning_rate": 1.6287215411558672e-05, "loss": 0.7498, "step": 1116 }, { "epoch": 0.03261218650550349, "grad_norm": 0.847194774775256, "learning_rate": 1.6301809690601285e-05, "loss": 0.7305, "step": 1117 }, { "epoch": 0.03264138273335085, "grad_norm": 0.7434692283876604, "learning_rate": 1.63164039696439e-05, "loss": 0.7174, "step": 1118 }, { "epoch": 0.03267057896119821, "grad_norm": 0.7358801946230268, "learning_rate": 1.6330998248686516e-05, "loss": 0.6386, "step": 1119 }, { "epoch": 0.03269977518904558, "grad_norm": 0.7647688036883802, "learning_rate": 1.6345592527729132e-05, "loss": 0.7325, "step": 1120 }, { "epoch": 0.03272897141689294, "grad_norm": 0.7687070193367977, "learning_rate": 1.6360186806771745e-05, "loss": 0.7583, "step": 1121 }, { "epoch": 0.0327581676447403, "grad_norm": 0.7544965972516606, "learning_rate": 1.637478108581436e-05, "loss": 0.6715, "step": 1122 }, { "epoch": 0.03278736387258766, "grad_norm": 0.8581282186581193, "learning_rate": 1.6389375364856976e-05, "loss": 0.7504, "step": 1123 }, { "epoch": 0.03281656010043502, "grad_norm": 0.7759102644181687, "learning_rate": 1.6403969643899592e-05, "loss": 0.7691, "step": 1124 }, { "epoch": 0.032845756328282384, "grad_norm": 0.7739232635758776, "learning_rate": 1.6418563922942208e-05, "loss": 0.6993, "step": 1125 }, { "epoch": 0.032874952556129745, "grad_norm": 0.7695657852289526, "learning_rate": 1.643315820198482e-05, "loss": 0.743, "step": 1126 }, { "epoch": 0.032904148783977114, "grad_norm": 0.8036871198065723, "learning_rate": 1.6447752481027436e-05, "loss": 0.7554, "step": 1127 }, { "epoch": 0.032933345011824475, "grad_norm": 0.7712544308736409, "learning_rate": 1.6462346760070052e-05, "loss": 0.7042, "step": 1128 }, { "epoch": 0.032962541239671836, "grad_norm": 0.8270803579640724, "learning_rate": 1.647694103911267e-05, "loss": 0.7162, "step": 1129 }, { "epoch": 0.0329917374675192, "grad_norm": 0.7674810101849376, "learning_rate": 1.6491535318155284e-05, "loss": 0.6338, "step": 1130 }, { "epoch": 0.03302093369536656, "grad_norm": 0.8424668169916254, "learning_rate": 1.65061295971979e-05, "loss": 0.7299, "step": 1131 }, { "epoch": 0.03305012992321392, "grad_norm": 0.7290206075403889, "learning_rate": 1.6520723876240516e-05, "loss": 0.6836, "step": 1132 }, { "epoch": 0.03307932615106128, "grad_norm": 0.7339835786492152, "learning_rate": 1.653531815528313e-05, "loss": 0.6187, "step": 1133 }, { "epoch": 0.03310852237890864, "grad_norm": 0.8358629810025445, "learning_rate": 1.6549912434325744e-05, "loss": 0.8057, "step": 1134 }, { "epoch": 0.03313771860675601, "grad_norm": 0.7332321814465806, "learning_rate": 1.656450671336836e-05, "loss": 0.6728, "step": 1135 }, { "epoch": 0.03316691483460337, "grad_norm": 0.7286166378166907, "learning_rate": 1.6579100992410976e-05, "loss": 0.7163, "step": 1136 }, { "epoch": 0.03319611106245073, "grad_norm": 0.7697356510289722, "learning_rate": 1.659369527145359e-05, "loss": 0.6681, "step": 1137 }, { "epoch": 0.033225307290298094, "grad_norm": 0.8760043210825642, "learning_rate": 1.6608289550496207e-05, "loss": 0.7921, "step": 1138 }, { "epoch": 0.033254503518145455, "grad_norm": 0.9074965703840653, "learning_rate": 1.662288382953882e-05, "loss": 0.8368, "step": 1139 }, { "epoch": 0.033283699745992816, "grad_norm": 0.7766364325263007, "learning_rate": 1.6637478108581436e-05, "loss": 0.6911, "step": 1140 }, { "epoch": 0.03331289597384018, "grad_norm": 0.8545405769478409, "learning_rate": 1.665207238762405e-05, "loss": 0.8015, "step": 1141 }, { "epoch": 0.033342092201687545, "grad_norm": 0.8056171103993213, "learning_rate": 1.6666666666666667e-05, "loss": 0.7265, "step": 1142 }, { "epoch": 0.03337128842953491, "grad_norm": 0.9321699413121343, "learning_rate": 1.6681260945709283e-05, "loss": 0.7014, "step": 1143 }, { "epoch": 0.03340048465738227, "grad_norm": 0.8982554707396295, "learning_rate": 1.6695855224751896e-05, "loss": 0.7464, "step": 1144 }, { "epoch": 0.03342968088522963, "grad_norm": 0.7976042510680048, "learning_rate": 1.6710449503794515e-05, "loss": 0.6604, "step": 1145 }, { "epoch": 0.03345887711307699, "grad_norm": 0.840878252508458, "learning_rate": 1.672504378283713e-05, "loss": 0.856, "step": 1146 }, { "epoch": 0.03348807334092435, "grad_norm": 0.8181628821196367, "learning_rate": 1.6739638061879743e-05, "loss": 0.7876, "step": 1147 }, { "epoch": 0.03351726956877171, "grad_norm": 0.7717319791212007, "learning_rate": 1.675423234092236e-05, "loss": 0.7293, "step": 1148 }, { "epoch": 0.033546465796619074, "grad_norm": 0.7395654473173723, "learning_rate": 1.6768826619964975e-05, "loss": 0.6447, "step": 1149 }, { "epoch": 0.03357566202446644, "grad_norm": 0.7862430365909249, "learning_rate": 1.678342089900759e-05, "loss": 0.6995, "step": 1150 }, { "epoch": 0.0336048582523138, "grad_norm": 0.8329256356106632, "learning_rate": 1.6798015178050207e-05, "loss": 0.7577, "step": 1151 }, { "epoch": 0.033634054480161164, "grad_norm": 0.814311719817982, "learning_rate": 1.681260945709282e-05, "loss": 0.6986, "step": 1152 }, { "epoch": 0.033663250708008526, "grad_norm": 0.7633194229509348, "learning_rate": 1.6827203736135435e-05, "loss": 0.7197, "step": 1153 }, { "epoch": 0.03369244693585589, "grad_norm": 0.7849029375600532, "learning_rate": 1.684179801517805e-05, "loss": 0.7105, "step": 1154 }, { "epoch": 0.03372164316370325, "grad_norm": 0.7376830669030765, "learning_rate": 1.6856392294220667e-05, "loss": 0.646, "step": 1155 }, { "epoch": 0.03375083939155061, "grad_norm": 0.7863020873993062, "learning_rate": 1.6870986573263283e-05, "loss": 0.8, "step": 1156 }, { "epoch": 0.03378003561939797, "grad_norm": 0.8510840681429543, "learning_rate": 1.6885580852305895e-05, "loss": 0.808, "step": 1157 }, { "epoch": 0.03380923184724534, "grad_norm": 0.7809623091408855, "learning_rate": 1.690017513134851e-05, "loss": 0.7056, "step": 1158 }, { "epoch": 0.0338384280750927, "grad_norm": 0.7368153835305776, "learning_rate": 1.6914769410391127e-05, "loss": 0.6877, "step": 1159 }, { "epoch": 0.03386762430294006, "grad_norm": 0.8891678924673261, "learning_rate": 1.6929363689433743e-05, "loss": 0.7871, "step": 1160 }, { "epoch": 0.03389682053078742, "grad_norm": 0.7762506098645369, "learning_rate": 1.694395796847636e-05, "loss": 0.7157, "step": 1161 }, { "epoch": 0.03392601675863478, "grad_norm": 0.8582620504945125, "learning_rate": 1.6958552247518974e-05, "loss": 0.8104, "step": 1162 }, { "epoch": 0.033955212986482144, "grad_norm": 0.788905441656707, "learning_rate": 1.697314652656159e-05, "loss": 0.7685, "step": 1163 }, { "epoch": 0.033984409214329506, "grad_norm": 0.8231802236926723, "learning_rate": 1.6987740805604206e-05, "loss": 0.8536, "step": 1164 }, { "epoch": 0.034013605442176874, "grad_norm": 0.8440412257937094, "learning_rate": 1.700233508464682e-05, "loss": 0.7825, "step": 1165 }, { "epoch": 0.034042801670024235, "grad_norm": 0.9318292430146891, "learning_rate": 1.7016929363689434e-05, "loss": 0.7698, "step": 1166 }, { "epoch": 0.034071997897871596, "grad_norm": 0.8492119723549786, "learning_rate": 1.703152364273205e-05, "loss": 0.74, "step": 1167 }, { "epoch": 0.03410119412571896, "grad_norm": 0.738874917224918, "learning_rate": 1.7046117921774666e-05, "loss": 0.6858, "step": 1168 }, { "epoch": 0.03413039035356632, "grad_norm": 0.7608897569853078, "learning_rate": 1.7060712200817282e-05, "loss": 0.6771, "step": 1169 }, { "epoch": 0.03415958658141368, "grad_norm": 0.7712372459654376, "learning_rate": 1.7075306479859894e-05, "loss": 0.6682, "step": 1170 }, { "epoch": 0.03418878280926104, "grad_norm": 0.8645126892724078, "learning_rate": 1.708990075890251e-05, "loss": 0.8179, "step": 1171 }, { "epoch": 0.0342179790371084, "grad_norm": 0.8532800180999002, "learning_rate": 1.7104495037945126e-05, "loss": 0.8358, "step": 1172 }, { "epoch": 0.03424717526495577, "grad_norm": 0.7259881150427536, "learning_rate": 1.7119089316987742e-05, "loss": 0.6189, "step": 1173 }, { "epoch": 0.03427637149280313, "grad_norm": 0.7814641518310925, "learning_rate": 1.7133683596030354e-05, "loss": 0.7311, "step": 1174 }, { "epoch": 0.03430556772065049, "grad_norm": 0.8701643606327023, "learning_rate": 1.714827787507297e-05, "loss": 0.7384, "step": 1175 }, { "epoch": 0.034334763948497854, "grad_norm": 0.7734793991982006, "learning_rate": 1.7162872154115586e-05, "loss": 0.745, "step": 1176 }, { "epoch": 0.034363960176345215, "grad_norm": 0.8173021975597471, "learning_rate": 1.7177466433158205e-05, "loss": 0.7954, "step": 1177 }, { "epoch": 0.034393156404192576, "grad_norm": 0.7513132737564806, "learning_rate": 1.7192060712200818e-05, "loss": 0.6663, "step": 1178 }, { "epoch": 0.03442235263203994, "grad_norm": 0.7679196806186035, "learning_rate": 1.7206654991243434e-05, "loss": 0.7256, "step": 1179 }, { "epoch": 0.034451548859887306, "grad_norm": 0.7489802229854203, "learning_rate": 1.722124927028605e-05, "loss": 0.6862, "step": 1180 }, { "epoch": 0.03448074508773467, "grad_norm": 0.844489794152567, "learning_rate": 1.7235843549328665e-05, "loss": 0.7893, "step": 1181 }, { "epoch": 0.03450994131558203, "grad_norm": 0.7809356521050681, "learning_rate": 1.725043782837128e-05, "loss": 0.7559, "step": 1182 }, { "epoch": 0.03453913754342939, "grad_norm": 0.7594263245178138, "learning_rate": 1.7265032107413894e-05, "loss": 0.7589, "step": 1183 }, { "epoch": 0.03456833377127675, "grad_norm": 0.79514295486183, "learning_rate": 1.727962638645651e-05, "loss": 0.7427, "step": 1184 }, { "epoch": 0.03459752999912411, "grad_norm": 0.7741521138118655, "learning_rate": 1.7294220665499125e-05, "loss": 0.7448, "step": 1185 }, { "epoch": 0.03462672622697147, "grad_norm": 0.91864274727771, "learning_rate": 1.730881494454174e-05, "loss": 0.7511, "step": 1186 }, { "epoch": 0.034655922454818834, "grad_norm": 0.7073616584922703, "learning_rate": 1.7323409223584354e-05, "loss": 0.6672, "step": 1187 }, { "epoch": 0.0346851186826662, "grad_norm": 0.979232106181946, "learning_rate": 1.733800350262697e-05, "loss": 0.7888, "step": 1188 }, { "epoch": 0.03471431491051356, "grad_norm": 0.8208913924059718, "learning_rate": 1.7352597781669585e-05, "loss": 0.7785, "step": 1189 }, { "epoch": 0.034743511138360925, "grad_norm": 0.7985373780642784, "learning_rate": 1.73671920607122e-05, "loss": 0.8192, "step": 1190 }, { "epoch": 0.034772707366208286, "grad_norm": 0.7074566269368546, "learning_rate": 1.7381786339754817e-05, "loss": 0.6069, "step": 1191 }, { "epoch": 0.03480190359405565, "grad_norm": 0.8123154566567935, "learning_rate": 1.739638061879743e-05, "loss": 0.8086, "step": 1192 }, { "epoch": 0.03483109982190301, "grad_norm": 0.8764643367908593, "learning_rate": 1.741097489784005e-05, "loss": 0.7738, "step": 1193 }, { "epoch": 0.03486029604975037, "grad_norm": 0.7936801552729622, "learning_rate": 1.7425569176882665e-05, "loss": 0.6918, "step": 1194 }, { "epoch": 0.03488949227759774, "grad_norm": 0.7974870044291668, "learning_rate": 1.744016345592528e-05, "loss": 0.7323, "step": 1195 }, { "epoch": 0.0349186885054451, "grad_norm": 0.8750548258095403, "learning_rate": 1.7454757734967893e-05, "loss": 0.9261, "step": 1196 }, { "epoch": 0.03494788473329246, "grad_norm": 0.7674119776079451, "learning_rate": 1.746935201401051e-05, "loss": 0.7015, "step": 1197 }, { "epoch": 0.03497708096113982, "grad_norm": 0.7469199096938066, "learning_rate": 1.7483946293053125e-05, "loss": 0.6835, "step": 1198 }, { "epoch": 0.03500627718898718, "grad_norm": 0.9779901813407346, "learning_rate": 1.749854057209574e-05, "loss": 0.7109, "step": 1199 }, { "epoch": 0.035035473416834544, "grad_norm": 0.7761201881971553, "learning_rate": 1.7513134851138353e-05, "loss": 0.7198, "step": 1200 }, { "epoch": 0.035064669644681905, "grad_norm": 0.8013535300593639, "learning_rate": 1.752772913018097e-05, "loss": 0.8136, "step": 1201 }, { "epoch": 0.035093865872529266, "grad_norm": 0.7345302691345817, "learning_rate": 1.7542323409223585e-05, "loss": 0.6703, "step": 1202 }, { "epoch": 0.035123062100376634, "grad_norm": 0.802896136083181, "learning_rate": 1.75569176882662e-05, "loss": 0.635, "step": 1203 }, { "epoch": 0.035152258328223995, "grad_norm": 0.8349899816072733, "learning_rate": 1.7571511967308816e-05, "loss": 0.8058, "step": 1204 }, { "epoch": 0.035181454556071357, "grad_norm": 0.7879207598065924, "learning_rate": 1.758610624635143e-05, "loss": 0.7316, "step": 1205 }, { "epoch": 0.03521065078391872, "grad_norm": 0.7855336746026412, "learning_rate": 1.7600700525394045e-05, "loss": 0.8246, "step": 1206 }, { "epoch": 0.03523984701176608, "grad_norm": 0.7272867701911447, "learning_rate": 1.761529480443666e-05, "loss": 0.7282, "step": 1207 }, { "epoch": 0.03526904323961344, "grad_norm": 0.8904198324809259, "learning_rate": 1.7629889083479277e-05, "loss": 0.8244, "step": 1208 }, { "epoch": 0.0352982394674608, "grad_norm": 0.8459149476017659, "learning_rate": 1.7644483362521892e-05, "loss": 0.8427, "step": 1209 }, { "epoch": 0.03532743569530817, "grad_norm": 0.708755427681074, "learning_rate": 1.7659077641564508e-05, "loss": 0.6522, "step": 1210 }, { "epoch": 0.03535663192315553, "grad_norm": 0.7301889439511523, "learning_rate": 1.7673671920607124e-05, "loss": 0.691, "step": 1211 }, { "epoch": 0.03538582815100289, "grad_norm": 0.8696712112696806, "learning_rate": 1.768826619964974e-05, "loss": 0.8185, "step": 1212 }, { "epoch": 0.03541502437885025, "grad_norm": 0.8428016335491778, "learning_rate": 1.7702860478692356e-05, "loss": 0.8064, "step": 1213 }, { "epoch": 0.035444220606697614, "grad_norm": 1.5395727225741933, "learning_rate": 1.7717454757734968e-05, "loss": 0.7042, "step": 1214 }, { "epoch": 0.035473416834544975, "grad_norm": 0.7868118554048663, "learning_rate": 1.7732049036777584e-05, "loss": 0.7513, "step": 1215 }, { "epoch": 0.03550261306239234, "grad_norm": 0.8646026532351855, "learning_rate": 1.77466433158202e-05, "loss": 0.7721, "step": 1216 }, { "epoch": 0.0355318092902397, "grad_norm": 0.8604576971007848, "learning_rate": 1.7761237594862816e-05, "loss": 0.7365, "step": 1217 }, { "epoch": 0.035561005518087066, "grad_norm": 0.9243650065737427, "learning_rate": 1.7775831873905428e-05, "loss": 0.6737, "step": 1218 }, { "epoch": 0.03559020174593443, "grad_norm": 0.9263172163019666, "learning_rate": 1.7790426152948044e-05, "loss": 0.7211, "step": 1219 }, { "epoch": 0.03561939797378179, "grad_norm": 0.8710322239356828, "learning_rate": 1.780502043199066e-05, "loss": 0.7837, "step": 1220 }, { "epoch": 0.03564859420162915, "grad_norm": 0.8537139675678432, "learning_rate": 1.7819614711033276e-05, "loss": 0.6629, "step": 1221 }, { "epoch": 0.03567779042947651, "grad_norm": 0.7201009360494726, "learning_rate": 1.783420899007589e-05, "loss": 0.6764, "step": 1222 }, { "epoch": 0.03570698665732387, "grad_norm": 0.8561716581094899, "learning_rate": 1.7848803269118504e-05, "loss": 0.7737, "step": 1223 }, { "epoch": 0.03573618288517123, "grad_norm": 0.75174130344512, "learning_rate": 1.786339754816112e-05, "loss": 0.6623, "step": 1224 }, { "epoch": 0.0357653791130186, "grad_norm": 0.7292592575861788, "learning_rate": 1.787799182720374e-05, "loss": 0.6717, "step": 1225 }, { "epoch": 0.03579457534086596, "grad_norm": 0.8167042729153846, "learning_rate": 1.7892586106246355e-05, "loss": 0.7163, "step": 1226 }, { "epoch": 0.035823771568713324, "grad_norm": 0.8222708803281332, "learning_rate": 1.7907180385288968e-05, "loss": 0.7643, "step": 1227 }, { "epoch": 0.035852967796560685, "grad_norm": 0.8711387157236318, "learning_rate": 1.7921774664331583e-05, "loss": 0.8285, "step": 1228 }, { "epoch": 0.035882164024408046, "grad_norm": 0.7397214162283591, "learning_rate": 1.79363689433742e-05, "loss": 0.6988, "step": 1229 }, { "epoch": 0.03591136025225541, "grad_norm": 0.7482728371918812, "learning_rate": 1.7950963222416815e-05, "loss": 0.7269, "step": 1230 }, { "epoch": 0.03594055648010277, "grad_norm": 0.8469018527890989, "learning_rate": 1.7965557501459428e-05, "loss": 0.7595, "step": 1231 }, { "epoch": 0.03596975270795013, "grad_norm": 0.7864110186724188, "learning_rate": 1.7980151780502043e-05, "loss": 0.7226, "step": 1232 }, { "epoch": 0.0359989489357975, "grad_norm": 0.8373628941227266, "learning_rate": 1.799474605954466e-05, "loss": 0.7506, "step": 1233 }, { "epoch": 0.03602814516364486, "grad_norm": 0.8220754589424433, "learning_rate": 1.8009340338587275e-05, "loss": 0.7637, "step": 1234 }, { "epoch": 0.03605734139149222, "grad_norm": 1.6010985569499943, "learning_rate": 1.802393461762989e-05, "loss": 0.798, "step": 1235 }, { "epoch": 0.03608653761933958, "grad_norm": 0.7243440401521283, "learning_rate": 1.8038528896672503e-05, "loss": 0.7055, "step": 1236 }, { "epoch": 0.03611573384718694, "grad_norm": 0.8505317732998319, "learning_rate": 1.805312317571512e-05, "loss": 0.7041, "step": 1237 }, { "epoch": 0.036144930075034304, "grad_norm": 0.7836099130734664, "learning_rate": 1.8067717454757735e-05, "loss": 0.668, "step": 1238 }, { "epoch": 0.036174126302881665, "grad_norm": 0.8064272987638492, "learning_rate": 1.808231173380035e-05, "loss": 0.7859, "step": 1239 }, { "epoch": 0.03620332253072903, "grad_norm": 0.7689789732424761, "learning_rate": 1.8096906012842963e-05, "loss": 0.7595, "step": 1240 }, { "epoch": 0.036232518758576394, "grad_norm": 0.9164090282652868, "learning_rate": 1.8111500291885583e-05, "loss": 0.6901, "step": 1241 }, { "epoch": 0.036261714986423756, "grad_norm": 0.7299459916308653, "learning_rate": 1.81260945709282e-05, "loss": 0.6657, "step": 1242 }, { "epoch": 0.03629091121427112, "grad_norm": 0.7959520295485358, "learning_rate": 1.8140688849970814e-05, "loss": 0.7099, "step": 1243 }, { "epoch": 0.03632010744211848, "grad_norm": 0.7432382754615018, "learning_rate": 1.8155283129013427e-05, "loss": 0.661, "step": 1244 }, { "epoch": 0.03634930366996584, "grad_norm": 0.7526769536616734, "learning_rate": 1.8169877408056043e-05, "loss": 0.7065, "step": 1245 }, { "epoch": 0.0363784998978132, "grad_norm": 0.8259619413296013, "learning_rate": 1.818447168709866e-05, "loss": 0.7582, "step": 1246 }, { "epoch": 0.03640769612566056, "grad_norm": 0.9436686813618905, "learning_rate": 1.8199065966141274e-05, "loss": 0.781, "step": 1247 }, { "epoch": 0.03643689235350793, "grad_norm": 0.7908446242742855, "learning_rate": 1.821366024518389e-05, "loss": 0.7171, "step": 1248 }, { "epoch": 0.03646608858135529, "grad_norm": 0.7593632628899385, "learning_rate": 1.8228254524226503e-05, "loss": 0.6368, "step": 1249 }, { "epoch": 0.03649528480920265, "grad_norm": 0.8115783446261309, "learning_rate": 1.824284880326912e-05, "loss": 0.668, "step": 1250 }, { "epoch": 0.03652448103705001, "grad_norm": 0.8025601730961781, "learning_rate": 1.8257443082311734e-05, "loss": 0.7022, "step": 1251 }, { "epoch": 0.036553677264897375, "grad_norm": 0.8211879707917378, "learning_rate": 1.827203736135435e-05, "loss": 0.7468, "step": 1252 }, { "epoch": 0.036582873492744736, "grad_norm": 0.8436178535239732, "learning_rate": 1.8286631640396966e-05, "loss": 0.7435, "step": 1253 }, { "epoch": 0.0366120697205921, "grad_norm": 0.7995081882201241, "learning_rate": 1.830122591943958e-05, "loss": 0.7777, "step": 1254 }, { "epoch": 0.03664126594843946, "grad_norm": 0.7991851375985194, "learning_rate": 1.8315820198482195e-05, "loss": 0.7935, "step": 1255 }, { "epoch": 0.036670462176286826, "grad_norm": 0.7245139867309409, "learning_rate": 1.833041447752481e-05, "loss": 0.6681, "step": 1256 }, { "epoch": 0.03669965840413419, "grad_norm": 0.9063038965662179, "learning_rate": 1.8345008756567426e-05, "loss": 0.7953, "step": 1257 }, { "epoch": 0.03672885463198155, "grad_norm": 0.7502280237362439, "learning_rate": 1.8359603035610042e-05, "loss": 0.6445, "step": 1258 }, { "epoch": 0.03675805085982891, "grad_norm": 0.826202275518878, "learning_rate": 1.8374197314652658e-05, "loss": 0.7968, "step": 1259 }, { "epoch": 0.03678724708767627, "grad_norm": 0.8140771526672126, "learning_rate": 1.8388791593695274e-05, "loss": 0.7559, "step": 1260 }, { "epoch": 0.03681644331552363, "grad_norm": 0.8508341501727489, "learning_rate": 1.840338587273789e-05, "loss": 0.734, "step": 1261 }, { "epoch": 0.036845639543370994, "grad_norm": 0.7446280757075406, "learning_rate": 1.8417980151780502e-05, "loss": 0.6725, "step": 1262 }, { "epoch": 0.03687483577121836, "grad_norm": 0.7900960086862372, "learning_rate": 1.8432574430823118e-05, "loss": 0.7269, "step": 1263 }, { "epoch": 0.03690403199906572, "grad_norm": 0.7234600648970795, "learning_rate": 1.8447168709865734e-05, "loss": 0.6426, "step": 1264 }, { "epoch": 0.036933228226913084, "grad_norm": 0.8945343604880659, "learning_rate": 1.846176298890835e-05, "loss": 0.7628, "step": 1265 }, { "epoch": 0.036962424454760445, "grad_norm": 0.8523446945515961, "learning_rate": 1.8476357267950966e-05, "loss": 0.7091, "step": 1266 }, { "epoch": 0.036991620682607806, "grad_norm": 0.7736262748487935, "learning_rate": 1.8490951546993578e-05, "loss": 0.7605, "step": 1267 }, { "epoch": 0.03702081691045517, "grad_norm": 0.8115441295217717, "learning_rate": 1.8505545826036194e-05, "loss": 0.7958, "step": 1268 }, { "epoch": 0.03705001313830253, "grad_norm": 1.1575285902163397, "learning_rate": 1.852014010507881e-05, "loss": 0.7003, "step": 1269 }, { "epoch": 0.03707920936614989, "grad_norm": 0.7765867628728119, "learning_rate": 1.8534734384121426e-05, "loss": 0.6641, "step": 1270 }, { "epoch": 0.03710840559399726, "grad_norm": 0.9136191299766033, "learning_rate": 1.8549328663164038e-05, "loss": 0.8003, "step": 1271 }, { "epoch": 0.03713760182184462, "grad_norm": 0.8212308667654489, "learning_rate": 1.8563922942206654e-05, "loss": 0.7189, "step": 1272 }, { "epoch": 0.03716679804969198, "grad_norm": 0.808607889481714, "learning_rate": 1.8578517221249273e-05, "loss": 0.6258, "step": 1273 }, { "epoch": 0.03719599427753934, "grad_norm": 0.7869134710136245, "learning_rate": 1.859311150029189e-05, "loss": 0.7305, "step": 1274 }, { "epoch": 0.0372251905053867, "grad_norm": 0.693666127020218, "learning_rate": 1.86077057793345e-05, "loss": 0.6076, "step": 1275 }, { "epoch": 0.037254386733234064, "grad_norm": 0.9985500572016377, "learning_rate": 1.8622300058377117e-05, "loss": 0.7909, "step": 1276 }, { "epoch": 0.037283582961081425, "grad_norm": 0.8938574988150922, "learning_rate": 1.8636894337419733e-05, "loss": 0.7685, "step": 1277 }, { "epoch": 0.037312779188928794, "grad_norm": 1.0219328966634142, "learning_rate": 1.865148861646235e-05, "loss": 0.7434, "step": 1278 }, { "epoch": 0.037341975416776155, "grad_norm": 0.7927201665080544, "learning_rate": 1.8666082895504965e-05, "loss": 0.7278, "step": 1279 }, { "epoch": 0.037371171644623516, "grad_norm": 0.7922699362647185, "learning_rate": 1.8680677174547577e-05, "loss": 0.6632, "step": 1280 }, { "epoch": 0.03740036787247088, "grad_norm": 0.8985090649917473, "learning_rate": 1.8695271453590193e-05, "loss": 0.7408, "step": 1281 }, { "epoch": 0.03742956410031824, "grad_norm": 0.8130988885225234, "learning_rate": 1.870986573263281e-05, "loss": 0.7463, "step": 1282 }, { "epoch": 0.0374587603281656, "grad_norm": 0.8201095471644487, "learning_rate": 1.8724460011675425e-05, "loss": 0.7132, "step": 1283 }, { "epoch": 0.03748795655601296, "grad_norm": 0.7552139858614509, "learning_rate": 1.8739054290718037e-05, "loss": 0.6704, "step": 1284 }, { "epoch": 0.03751715278386032, "grad_norm": 0.7291820465650876, "learning_rate": 1.8753648569760653e-05, "loss": 0.6836, "step": 1285 }, { "epoch": 0.03754634901170769, "grad_norm": 0.7638696685693592, "learning_rate": 1.876824284880327e-05, "loss": 0.7534, "step": 1286 }, { "epoch": 0.03757554523955505, "grad_norm": 0.8010500484135863, "learning_rate": 1.8782837127845885e-05, "loss": 0.6764, "step": 1287 }, { "epoch": 0.03760474146740241, "grad_norm": 0.8094041785794327, "learning_rate": 1.87974314068885e-05, "loss": 0.7065, "step": 1288 }, { "epoch": 0.037633937695249774, "grad_norm": 0.7415144303482097, "learning_rate": 1.8812025685931117e-05, "loss": 0.6714, "step": 1289 }, { "epoch": 0.037663133923097135, "grad_norm": 0.7637713640572998, "learning_rate": 1.8826619964973732e-05, "loss": 0.7309, "step": 1290 }, { "epoch": 0.037692330150944496, "grad_norm": 0.744680657230386, "learning_rate": 1.8841214244016348e-05, "loss": 0.6668, "step": 1291 }, { "epoch": 0.03772152637879186, "grad_norm": 0.7771690370544453, "learning_rate": 1.8855808523058964e-05, "loss": 0.7379, "step": 1292 }, { "epoch": 0.037750722606639225, "grad_norm": 1.3311367239432652, "learning_rate": 1.8870402802101577e-05, "loss": 0.7951, "step": 1293 }, { "epoch": 0.03777991883448659, "grad_norm": 0.8570896334196986, "learning_rate": 1.8884997081144192e-05, "loss": 0.6587, "step": 1294 }, { "epoch": 0.03780911506233395, "grad_norm": 0.7681231352574924, "learning_rate": 1.889959136018681e-05, "loss": 0.7157, "step": 1295 }, { "epoch": 0.03783831129018131, "grad_norm": 0.7205919874394896, "learning_rate": 1.8914185639229424e-05, "loss": 0.6393, "step": 1296 }, { "epoch": 0.03786750751802867, "grad_norm": 0.7829454779368478, "learning_rate": 1.8928779918272037e-05, "loss": 0.7617, "step": 1297 }, { "epoch": 0.03789670374587603, "grad_norm": 0.7292031334205424, "learning_rate": 1.8943374197314652e-05, "loss": 0.6996, "step": 1298 }, { "epoch": 0.03792589997372339, "grad_norm": 0.726850561726081, "learning_rate": 1.895796847635727e-05, "loss": 0.6518, "step": 1299 }, { "epoch": 0.037955096201570754, "grad_norm": 0.7048144465274457, "learning_rate": 1.8972562755399884e-05, "loss": 0.6313, "step": 1300 }, { "epoch": 0.03798429242941812, "grad_norm": 0.8795755375138115, "learning_rate": 1.89871570344425e-05, "loss": 0.7732, "step": 1301 }, { "epoch": 0.03801348865726548, "grad_norm": 0.8751389083879193, "learning_rate": 1.9001751313485113e-05, "loss": 0.7866, "step": 1302 }, { "epoch": 0.038042684885112844, "grad_norm": 0.8028406667079805, "learning_rate": 1.901634559252773e-05, "loss": 0.7259, "step": 1303 }, { "epoch": 0.038071881112960206, "grad_norm": 0.8307628878520475, "learning_rate": 1.9030939871570344e-05, "loss": 0.6282, "step": 1304 }, { "epoch": 0.03810107734080757, "grad_norm": 0.7909543485663486, "learning_rate": 1.9045534150612963e-05, "loss": 0.7772, "step": 1305 }, { "epoch": 0.03813027356865493, "grad_norm": 0.8976161494431222, "learning_rate": 1.9060128429655576e-05, "loss": 0.7817, "step": 1306 }, { "epoch": 0.03815946979650229, "grad_norm": 0.7357346327697568, "learning_rate": 1.9074722708698192e-05, "loss": 0.6264, "step": 1307 }, { "epoch": 0.03818866602434966, "grad_norm": 0.7390553904996159, "learning_rate": 1.9089316987740808e-05, "loss": 0.6947, "step": 1308 }, { "epoch": 0.03821786225219702, "grad_norm": 0.8107454202524311, "learning_rate": 1.9103911266783423e-05, "loss": 0.747, "step": 1309 }, { "epoch": 0.03824705848004438, "grad_norm": 0.8252324668806136, "learning_rate": 1.9118505545826036e-05, "loss": 0.7107, "step": 1310 }, { "epoch": 0.03827625470789174, "grad_norm": 1.1296820407935435, "learning_rate": 1.9133099824868652e-05, "loss": 0.7663, "step": 1311 }, { "epoch": 0.0383054509357391, "grad_norm": 0.8220262190545998, "learning_rate": 1.9147694103911268e-05, "loss": 0.6552, "step": 1312 }, { "epoch": 0.03833464716358646, "grad_norm": 0.9327684923979873, "learning_rate": 1.9162288382953884e-05, "loss": 0.7607, "step": 1313 }, { "epoch": 0.038363843391433824, "grad_norm": 0.7671343865040792, "learning_rate": 1.91768826619965e-05, "loss": 0.6645, "step": 1314 }, { "epoch": 0.038393039619281186, "grad_norm": 1.4050153790441382, "learning_rate": 1.9191476941039112e-05, "loss": 0.7433, "step": 1315 }, { "epoch": 0.038422235847128554, "grad_norm": 0.773390038188858, "learning_rate": 1.9206071220081728e-05, "loss": 0.7189, "step": 1316 }, { "epoch": 0.038451432074975915, "grad_norm": 0.8511693064446622, "learning_rate": 1.9220665499124344e-05, "loss": 0.8205, "step": 1317 }, { "epoch": 0.038480628302823276, "grad_norm": 0.7231422810302323, "learning_rate": 1.923525977816696e-05, "loss": 0.6645, "step": 1318 }, { "epoch": 0.03850982453067064, "grad_norm": 0.7758770279778232, "learning_rate": 1.9249854057209575e-05, "loss": 0.6634, "step": 1319 }, { "epoch": 0.038539020758518, "grad_norm": 0.8102743711743808, "learning_rate": 1.9264448336252188e-05, "loss": 0.7352, "step": 1320 }, { "epoch": 0.03856821698636536, "grad_norm": 0.8851793209273632, "learning_rate": 1.9279042615294807e-05, "loss": 0.7329, "step": 1321 }, { "epoch": 0.03859741321421272, "grad_norm": 0.798064142376525, "learning_rate": 1.9293636894337423e-05, "loss": 0.6914, "step": 1322 }, { "epoch": 0.03862660944206009, "grad_norm": 0.7713341684615779, "learning_rate": 1.9308231173380035e-05, "loss": 0.7092, "step": 1323 }, { "epoch": 0.03865580566990745, "grad_norm": 0.8038051933142374, "learning_rate": 1.932282545242265e-05, "loss": 0.7521, "step": 1324 }, { "epoch": 0.03868500189775481, "grad_norm": 0.7753359917367555, "learning_rate": 1.9337419731465267e-05, "loss": 0.759, "step": 1325 }, { "epoch": 0.03871419812560217, "grad_norm": 0.8704842864684715, "learning_rate": 1.9352014010507883e-05, "loss": 0.7885, "step": 1326 }, { "epoch": 0.038743394353449534, "grad_norm": 0.7926237436893501, "learning_rate": 1.93666082895505e-05, "loss": 0.7817, "step": 1327 }, { "epoch": 0.038772590581296895, "grad_norm": 0.8463870061835008, "learning_rate": 1.938120256859311e-05, "loss": 0.7939, "step": 1328 }, { "epoch": 0.038801786809144256, "grad_norm": 0.7249093105144145, "learning_rate": 1.9395796847635727e-05, "loss": 0.7144, "step": 1329 }, { "epoch": 0.03883098303699162, "grad_norm": 0.7692463667750031, "learning_rate": 1.9410391126678343e-05, "loss": 0.6251, "step": 1330 }, { "epoch": 0.038860179264838986, "grad_norm": 0.7839899051083491, "learning_rate": 1.942498540572096e-05, "loss": 0.7107, "step": 1331 }, { "epoch": 0.03888937549268635, "grad_norm": 1.0054268053055158, "learning_rate": 1.9439579684763575e-05, "loss": 0.6659, "step": 1332 }, { "epoch": 0.03891857172053371, "grad_norm": 0.8630028429131079, "learning_rate": 1.9454173963806187e-05, "loss": 0.7737, "step": 1333 }, { "epoch": 0.03894776794838107, "grad_norm": 0.9014602698998742, "learning_rate": 1.9468768242848803e-05, "loss": 0.793, "step": 1334 }, { "epoch": 0.03897696417622843, "grad_norm": 0.7556839344445424, "learning_rate": 1.948336252189142e-05, "loss": 0.7324, "step": 1335 }, { "epoch": 0.03900616040407579, "grad_norm": 0.9385786147177843, "learning_rate": 1.9497956800934035e-05, "loss": 0.7466, "step": 1336 }, { "epoch": 0.03903535663192315, "grad_norm": 0.8121441477930798, "learning_rate": 1.951255107997665e-05, "loss": 0.7541, "step": 1337 }, { "epoch": 0.03906455285977052, "grad_norm": 0.7459362754919957, "learning_rate": 1.9527145359019266e-05, "loss": 0.7164, "step": 1338 }, { "epoch": 0.03909374908761788, "grad_norm": 0.860531292880434, "learning_rate": 1.9541739638061882e-05, "loss": 0.76, "step": 1339 }, { "epoch": 0.03912294531546524, "grad_norm": 0.8281903562488752, "learning_rate": 1.9556333917104498e-05, "loss": 0.7811, "step": 1340 }, { "epoch": 0.039152141543312605, "grad_norm": 0.8799707020799805, "learning_rate": 1.957092819614711e-05, "loss": 0.7033, "step": 1341 }, { "epoch": 0.039181337771159966, "grad_norm": 0.7527659786027845, "learning_rate": 1.9585522475189726e-05, "loss": 0.6745, "step": 1342 }, { "epoch": 0.03921053399900733, "grad_norm": 1.0365908900675904, "learning_rate": 1.9600116754232342e-05, "loss": 0.669, "step": 1343 }, { "epoch": 0.03923973022685469, "grad_norm": 0.7853130427539873, "learning_rate": 1.9614711033274958e-05, "loss": 0.7047, "step": 1344 }, { "epoch": 0.03926892645470205, "grad_norm": 0.7987082294867169, "learning_rate": 1.9629305312317574e-05, "loss": 0.7743, "step": 1345 }, { "epoch": 0.03929812268254942, "grad_norm": 1.5686561762695863, "learning_rate": 1.9643899591360186e-05, "loss": 0.721, "step": 1346 }, { "epoch": 0.03932731891039678, "grad_norm": 0.9646990749548573, "learning_rate": 1.9658493870402802e-05, "loss": 0.7658, "step": 1347 }, { "epoch": 0.03935651513824414, "grad_norm": 0.880130798177457, "learning_rate": 1.9673088149445418e-05, "loss": 0.8144, "step": 1348 }, { "epoch": 0.0393857113660915, "grad_norm": 0.9112699234810114, "learning_rate": 1.9687682428488034e-05, "loss": 0.7007, "step": 1349 }, { "epoch": 0.03941490759393886, "grad_norm": 0.7954934463839675, "learning_rate": 1.9702276707530646e-05, "loss": 0.797, "step": 1350 }, { "epoch": 0.039444103821786224, "grad_norm": 0.7543224895617298, "learning_rate": 1.9716870986573262e-05, "loss": 0.7408, "step": 1351 }, { "epoch": 0.039473300049633585, "grad_norm": 0.777237294333178, "learning_rate": 1.9731465265615878e-05, "loss": 0.8174, "step": 1352 }, { "epoch": 0.039502496277480946, "grad_norm": 0.7618917618120223, "learning_rate": 1.9746059544658497e-05, "loss": 0.7125, "step": 1353 }, { "epoch": 0.039531692505328314, "grad_norm": 0.7977108349868041, "learning_rate": 1.976065382370111e-05, "loss": 0.8456, "step": 1354 }, { "epoch": 0.039560888733175675, "grad_norm": 0.7583947019244872, "learning_rate": 1.9775248102743726e-05, "loss": 0.6941, "step": 1355 }, { "epoch": 0.03959008496102304, "grad_norm": 0.8142179449251526, "learning_rate": 1.978984238178634e-05, "loss": 0.6392, "step": 1356 }, { "epoch": 0.0396192811888704, "grad_norm": 0.9281891984343751, "learning_rate": 1.9804436660828957e-05, "loss": 0.7993, "step": 1357 }, { "epoch": 0.03964847741671776, "grad_norm": 0.7782652372667519, "learning_rate": 1.9819030939871573e-05, "loss": 0.7651, "step": 1358 }, { "epoch": 0.03967767364456512, "grad_norm": 0.7913172136968397, "learning_rate": 1.9833625218914186e-05, "loss": 0.6915, "step": 1359 }, { "epoch": 0.03970686987241248, "grad_norm": 0.7466007338611559, "learning_rate": 1.98482194979568e-05, "loss": 0.7022, "step": 1360 }, { "epoch": 0.03973606610025985, "grad_norm": 0.7931358186743481, "learning_rate": 1.9862813776999417e-05, "loss": 0.7459, "step": 1361 }, { "epoch": 0.03976526232810721, "grad_norm": 0.7724015708796067, "learning_rate": 1.9877408056042033e-05, "loss": 0.7714, "step": 1362 }, { "epoch": 0.03979445855595457, "grad_norm": 0.7682723729247777, "learning_rate": 1.989200233508465e-05, "loss": 0.7491, "step": 1363 }, { "epoch": 0.03982365478380193, "grad_norm": 0.798590106998195, "learning_rate": 1.990659661412726e-05, "loss": 0.7448, "step": 1364 }, { "epoch": 0.039852851011649294, "grad_norm": 0.7660801376346713, "learning_rate": 1.9921190893169877e-05, "loss": 0.7509, "step": 1365 }, { "epoch": 0.039882047239496655, "grad_norm": 0.8487814163852191, "learning_rate": 1.9935785172212493e-05, "loss": 0.7818, "step": 1366 }, { "epoch": 0.03991124346734402, "grad_norm": 0.7014350689482836, "learning_rate": 1.995037945125511e-05, "loss": 0.6112, "step": 1367 }, { "epoch": 0.03994043969519138, "grad_norm": 0.762477981402473, "learning_rate": 1.996497373029772e-05, "loss": 0.6716, "step": 1368 }, { "epoch": 0.039969635923038746, "grad_norm": 0.7557009280513607, "learning_rate": 1.997956800934034e-05, "loss": 0.7404, "step": 1369 }, { "epoch": 0.03999883215088611, "grad_norm": 0.8247826037632328, "learning_rate": 1.9994162288382957e-05, "loss": 0.8304, "step": 1370 }, { "epoch": 0.04002802837873347, "grad_norm": 0.7705885323041782, "learning_rate": 2.0008756567425573e-05, "loss": 0.7604, "step": 1371 }, { "epoch": 0.04005722460658083, "grad_norm": 0.9582723496741246, "learning_rate": 2.0023350846468185e-05, "loss": 0.8017, "step": 1372 }, { "epoch": 0.04008642083442819, "grad_norm": 0.8070242581295805, "learning_rate": 2.00379451255108e-05, "loss": 0.7631, "step": 1373 }, { "epoch": 0.04011561706227555, "grad_norm": 0.8481140450663041, "learning_rate": 2.0052539404553417e-05, "loss": 0.6623, "step": 1374 }, { "epoch": 0.04014481329012291, "grad_norm": 0.8555465644189685, "learning_rate": 2.0067133683596033e-05, "loss": 0.7799, "step": 1375 }, { "epoch": 0.04017400951797028, "grad_norm": 0.7443248161810938, "learning_rate": 2.008172796263865e-05, "loss": 0.72, "step": 1376 }, { "epoch": 0.04020320574581764, "grad_norm": 0.7254985262523734, "learning_rate": 2.009632224168126e-05, "loss": 0.6642, "step": 1377 }, { "epoch": 0.040232401973665004, "grad_norm": 0.7931129430383232, "learning_rate": 2.0110916520723877e-05, "loss": 0.7889, "step": 1378 }, { "epoch": 0.040261598201512365, "grad_norm": 0.8281034411794154, "learning_rate": 2.0125510799766493e-05, "loss": 0.7583, "step": 1379 }, { "epoch": 0.040290794429359726, "grad_norm": 0.8113864795384316, "learning_rate": 2.014010507880911e-05, "loss": 0.7605, "step": 1380 }, { "epoch": 0.04031999065720709, "grad_norm": 0.7389344114609364, "learning_rate": 2.015469935785172e-05, "loss": 0.6183, "step": 1381 }, { "epoch": 0.04034918688505445, "grad_norm": 0.8840418407510543, "learning_rate": 2.0169293636894337e-05, "loss": 0.8162, "step": 1382 }, { "epoch": 0.04037838311290181, "grad_norm": 0.8966055387377857, "learning_rate": 2.0183887915936953e-05, "loss": 0.6908, "step": 1383 }, { "epoch": 0.04040757934074918, "grad_norm": 0.798879071435045, "learning_rate": 2.019848219497957e-05, "loss": 0.6844, "step": 1384 }, { "epoch": 0.04043677556859654, "grad_norm": 1.0042688882531767, "learning_rate": 2.0213076474022184e-05, "loss": 0.6517, "step": 1385 }, { "epoch": 0.0404659717964439, "grad_norm": 0.9551442826018917, "learning_rate": 2.02276707530648e-05, "loss": 0.8456, "step": 1386 }, { "epoch": 0.04049516802429126, "grad_norm": 0.8096160370520542, "learning_rate": 2.0242265032107416e-05, "loss": 0.7014, "step": 1387 }, { "epoch": 0.04052436425213862, "grad_norm": 0.9717102679609906, "learning_rate": 2.0256859311150032e-05, "loss": 0.7804, "step": 1388 }, { "epoch": 0.040553560479985984, "grad_norm": 0.7735603595692256, "learning_rate": 2.0271453590192648e-05, "loss": 0.7084, "step": 1389 }, { "epoch": 0.040582756707833345, "grad_norm": 0.7620361998646435, "learning_rate": 2.028604786923526e-05, "loss": 0.7533, "step": 1390 }, { "epoch": 0.04061195293568071, "grad_norm": 0.8910446374303373, "learning_rate": 2.0300642148277876e-05, "loss": 0.672, "step": 1391 }, { "epoch": 0.040641149163528074, "grad_norm": 1.3559544110337516, "learning_rate": 2.0315236427320492e-05, "loss": 0.7899, "step": 1392 }, { "epoch": 0.040670345391375436, "grad_norm": 0.7726155367339698, "learning_rate": 2.0329830706363108e-05, "loss": 0.7427, "step": 1393 }, { "epoch": 0.0406995416192228, "grad_norm": 0.862555752869402, "learning_rate": 2.034442498540572e-05, "loss": 0.7478, "step": 1394 }, { "epoch": 0.04072873784707016, "grad_norm": 0.7891843088838095, "learning_rate": 2.0359019264448336e-05, "loss": 0.8617, "step": 1395 }, { "epoch": 0.04075793407491752, "grad_norm": 0.7820898875903677, "learning_rate": 2.0373613543490952e-05, "loss": 0.7604, "step": 1396 }, { "epoch": 0.04078713030276488, "grad_norm": 0.8419453213289079, "learning_rate": 2.0388207822533568e-05, "loss": 0.6431, "step": 1397 }, { "epoch": 0.04081632653061224, "grad_norm": 0.7977827246754493, "learning_rate": 2.0402802101576184e-05, "loss": 0.7396, "step": 1398 }, { "epoch": 0.04084552275845961, "grad_norm": 0.7727089633099509, "learning_rate": 2.0417396380618796e-05, "loss": 0.7379, "step": 1399 }, { "epoch": 0.04087471898630697, "grad_norm": 0.8076045269881573, "learning_rate": 2.0431990659661412e-05, "loss": 0.7164, "step": 1400 }, { "epoch": 0.04090391521415433, "grad_norm": 0.7838473444518611, "learning_rate": 2.044658493870403e-05, "loss": 0.8207, "step": 1401 }, { "epoch": 0.04093311144200169, "grad_norm": 0.7271847336377812, "learning_rate": 2.0461179217746647e-05, "loss": 0.6428, "step": 1402 }, { "epoch": 0.040962307669849055, "grad_norm": 0.8901796706681439, "learning_rate": 2.047577349678926e-05, "loss": 0.7103, "step": 1403 }, { "epoch": 0.040991503897696416, "grad_norm": 0.7832489836333969, "learning_rate": 2.0490367775831875e-05, "loss": 0.7158, "step": 1404 }, { "epoch": 0.04102070012554378, "grad_norm": 0.8025823796621615, "learning_rate": 2.050496205487449e-05, "loss": 0.6915, "step": 1405 }, { "epoch": 0.041049896353391145, "grad_norm": 0.936449495310282, "learning_rate": 2.0519556333917107e-05, "loss": 0.7402, "step": 1406 }, { "epoch": 0.041079092581238506, "grad_norm": 0.949195841204874, "learning_rate": 2.053415061295972e-05, "loss": 0.6414, "step": 1407 }, { "epoch": 0.04110828880908587, "grad_norm": 0.7904393563882693, "learning_rate": 2.0548744892002335e-05, "loss": 0.7457, "step": 1408 }, { "epoch": 0.04113748503693323, "grad_norm": 0.7660629550429177, "learning_rate": 2.056333917104495e-05, "loss": 0.6518, "step": 1409 }, { "epoch": 0.04116668126478059, "grad_norm": 0.8614055428256252, "learning_rate": 2.0577933450087567e-05, "loss": 0.7759, "step": 1410 }, { "epoch": 0.04119587749262795, "grad_norm": 0.7626297527394748, "learning_rate": 2.0592527729130183e-05, "loss": 0.7027, "step": 1411 }, { "epoch": 0.04122507372047531, "grad_norm": 0.7839468138415208, "learning_rate": 2.0607122008172795e-05, "loss": 0.7538, "step": 1412 }, { "epoch": 0.041254269948322674, "grad_norm": 0.7991780519085303, "learning_rate": 2.062171628721541e-05, "loss": 0.6905, "step": 1413 }, { "epoch": 0.04128346617617004, "grad_norm": 0.7694807843794389, "learning_rate": 2.0636310566258027e-05, "loss": 0.5981, "step": 1414 }, { "epoch": 0.0413126624040174, "grad_norm": 0.8758814567215482, "learning_rate": 2.0650904845300643e-05, "loss": 0.7553, "step": 1415 }, { "epoch": 0.041341858631864764, "grad_norm": 0.7631414545164737, "learning_rate": 2.066549912434326e-05, "loss": 0.7471, "step": 1416 }, { "epoch": 0.041371054859712125, "grad_norm": 0.8192445985626986, "learning_rate": 2.0680093403385875e-05, "loss": 0.7207, "step": 1417 }, { "epoch": 0.041400251087559486, "grad_norm": 0.7329595145361639, "learning_rate": 2.069468768242849e-05, "loss": 0.6538, "step": 1418 }, { "epoch": 0.04142944731540685, "grad_norm": 0.8202466841972957, "learning_rate": 2.0709281961471106e-05, "loss": 0.8218, "step": 1419 }, { "epoch": 0.04145864354325421, "grad_norm": 0.8094515611905144, "learning_rate": 2.072387624051372e-05, "loss": 0.6779, "step": 1420 }, { "epoch": 0.04148783977110158, "grad_norm": 0.8101272582832565, "learning_rate": 2.0738470519556335e-05, "loss": 0.7372, "step": 1421 }, { "epoch": 0.04151703599894894, "grad_norm": 0.7596225223184745, "learning_rate": 2.075306479859895e-05, "loss": 0.6974, "step": 1422 }, { "epoch": 0.0415462322267963, "grad_norm": 0.7598047094453406, "learning_rate": 2.0767659077641566e-05, "loss": 0.7276, "step": 1423 }, { "epoch": 0.04157542845464366, "grad_norm": 0.7150725219203485, "learning_rate": 2.0782253356684182e-05, "loss": 0.6809, "step": 1424 }, { "epoch": 0.04160462468249102, "grad_norm": 0.9561350900487879, "learning_rate": 2.0796847635726795e-05, "loss": 0.7594, "step": 1425 }, { "epoch": 0.04163382091033838, "grad_norm": 0.7734410500149301, "learning_rate": 2.081144191476941e-05, "loss": 0.6765, "step": 1426 }, { "epoch": 0.041663017138185744, "grad_norm": 0.7850766723411571, "learning_rate": 2.0826036193812026e-05, "loss": 0.723, "step": 1427 }, { "epoch": 0.041692213366033105, "grad_norm": 0.8438733063940661, "learning_rate": 2.0840630472854642e-05, "loss": 0.8352, "step": 1428 }, { "epoch": 0.041721409593880474, "grad_norm": 0.8287545367273069, "learning_rate": 2.0855224751897258e-05, "loss": 0.7581, "step": 1429 }, { "epoch": 0.041750605821727835, "grad_norm": 0.7072113828563248, "learning_rate": 2.086981903093987e-05, "loss": 0.6846, "step": 1430 }, { "epoch": 0.041779802049575196, "grad_norm": 0.8855294955385906, "learning_rate": 2.0884413309982486e-05, "loss": 0.6701, "step": 1431 }, { "epoch": 0.04180899827742256, "grad_norm": 0.7732222992368146, "learning_rate": 2.0899007589025102e-05, "loss": 0.6694, "step": 1432 }, { "epoch": 0.04183819450526992, "grad_norm": 0.815058341842425, "learning_rate": 2.0913601868067718e-05, "loss": 0.8343, "step": 1433 }, { "epoch": 0.04186739073311728, "grad_norm": 0.8703373205458086, "learning_rate": 2.0928196147110334e-05, "loss": 0.8284, "step": 1434 }, { "epoch": 0.04189658696096464, "grad_norm": 0.7208900623384243, "learning_rate": 2.094279042615295e-05, "loss": 0.6552, "step": 1435 }, { "epoch": 0.04192578318881201, "grad_norm": 0.8166427674129317, "learning_rate": 2.0957384705195566e-05, "loss": 0.7252, "step": 1436 }, { "epoch": 0.04195497941665937, "grad_norm": 1.0671861613012177, "learning_rate": 2.097197898423818e-05, "loss": 0.7401, "step": 1437 }, { "epoch": 0.04198417564450673, "grad_norm": 0.8565538409456098, "learning_rate": 2.0986573263280794e-05, "loss": 0.7707, "step": 1438 }, { "epoch": 0.04201337187235409, "grad_norm": 1.2898333136404512, "learning_rate": 2.100116754232341e-05, "loss": 0.904, "step": 1439 }, { "epoch": 0.042042568100201454, "grad_norm": 0.7985950152840641, "learning_rate": 2.1015761821366026e-05, "loss": 0.6992, "step": 1440 }, { "epoch": 0.042071764328048815, "grad_norm": 0.7498168135970065, "learning_rate": 2.103035610040864e-05, "loss": 0.7086, "step": 1441 }, { "epoch": 0.042100960555896176, "grad_norm": 0.8230828438118372, "learning_rate": 2.1044950379451257e-05, "loss": 0.7728, "step": 1442 }, { "epoch": 0.04213015678374354, "grad_norm": 1.3537660498647075, "learning_rate": 2.105954465849387e-05, "loss": 0.7783, "step": 1443 }, { "epoch": 0.042159353011590905, "grad_norm": 2.2442134809186327, "learning_rate": 2.1074138937536486e-05, "loss": 0.6956, "step": 1444 }, { "epoch": 0.04218854923943827, "grad_norm": 0.7533192013936103, "learning_rate": 2.10887332165791e-05, "loss": 0.6888, "step": 1445 }, { "epoch": 0.04221774546728563, "grad_norm": 0.9867446954964348, "learning_rate": 2.1103327495621717e-05, "loss": 0.8157, "step": 1446 }, { "epoch": 0.04224694169513299, "grad_norm": 0.7147522888144718, "learning_rate": 2.111792177466433e-05, "loss": 0.6658, "step": 1447 }, { "epoch": 0.04227613792298035, "grad_norm": 0.7829553393048406, "learning_rate": 2.1132516053706946e-05, "loss": 0.6825, "step": 1448 }, { "epoch": 0.04230533415082771, "grad_norm": 0.8079096258725925, "learning_rate": 2.1147110332749565e-05, "loss": 0.7425, "step": 1449 }, { "epoch": 0.04233453037867507, "grad_norm": 0.7990232798066582, "learning_rate": 2.116170461179218e-05, "loss": 0.7681, "step": 1450 }, { "epoch": 0.04236372660652244, "grad_norm": 0.7529963828363606, "learning_rate": 2.1176298890834793e-05, "loss": 0.7261, "step": 1451 }, { "epoch": 0.0423929228343698, "grad_norm": 0.7843754632627375, "learning_rate": 2.119089316987741e-05, "loss": 0.6979, "step": 1452 }, { "epoch": 0.04242211906221716, "grad_norm": 0.8968920696430267, "learning_rate": 2.1205487448920025e-05, "loss": 0.8396, "step": 1453 }, { "epoch": 0.042451315290064524, "grad_norm": 0.9036160767814595, "learning_rate": 2.122008172796264e-05, "loss": 0.7156, "step": 1454 }, { "epoch": 0.042480511517911886, "grad_norm": 0.784376096839755, "learning_rate": 2.1234676007005257e-05, "loss": 0.6682, "step": 1455 }, { "epoch": 0.04250970774575925, "grad_norm": 0.9062258693747217, "learning_rate": 2.124927028604787e-05, "loss": 0.8181, "step": 1456 }, { "epoch": 0.04253890397360661, "grad_norm": 0.8404625751367056, "learning_rate": 2.1263864565090485e-05, "loss": 0.7693, "step": 1457 }, { "epoch": 0.04256810020145397, "grad_norm": 0.9135619675982153, "learning_rate": 2.12784588441331e-05, "loss": 0.6618, "step": 1458 }, { "epoch": 0.04259729642930134, "grad_norm": 0.7999958797314315, "learning_rate": 2.1293053123175717e-05, "loss": 0.691, "step": 1459 }, { "epoch": 0.0426264926571487, "grad_norm": 0.8163607214583046, "learning_rate": 2.130764740221833e-05, "loss": 0.6849, "step": 1460 }, { "epoch": 0.04265568888499606, "grad_norm": 0.854467013841314, "learning_rate": 2.1322241681260945e-05, "loss": 0.7602, "step": 1461 }, { "epoch": 0.04268488511284342, "grad_norm": 0.7635683021075724, "learning_rate": 2.133683596030356e-05, "loss": 0.6311, "step": 1462 }, { "epoch": 0.04271408134069078, "grad_norm": 0.7830259477150852, "learning_rate": 2.1351430239346177e-05, "loss": 0.6678, "step": 1463 }, { "epoch": 0.04274327756853814, "grad_norm": 0.826172400582112, "learning_rate": 2.1366024518388793e-05, "loss": 0.7235, "step": 1464 }, { "epoch": 0.042772473796385505, "grad_norm": 0.8958105994339988, "learning_rate": 2.138061879743141e-05, "loss": 0.7023, "step": 1465 }, { "epoch": 0.042801670024232866, "grad_norm": 0.7941610794119173, "learning_rate": 2.1395213076474024e-05, "loss": 0.7499, "step": 1466 }, { "epoch": 0.042830866252080234, "grad_norm": 0.9242517000745109, "learning_rate": 2.140980735551664e-05, "loss": 0.682, "step": 1467 }, { "epoch": 0.042860062479927595, "grad_norm": 0.7011146468662377, "learning_rate": 2.1424401634559256e-05, "loss": 0.6224, "step": 1468 }, { "epoch": 0.042889258707774956, "grad_norm": 0.7884707376982142, "learning_rate": 2.143899591360187e-05, "loss": 0.6641, "step": 1469 }, { "epoch": 0.04291845493562232, "grad_norm": 1.1842135929387083, "learning_rate": 2.1453590192644484e-05, "loss": 0.8472, "step": 1470 }, { "epoch": 0.04294765116346968, "grad_norm": 0.7083050854053157, "learning_rate": 2.14681844716871e-05, "loss": 0.6278, "step": 1471 }, { "epoch": 0.04297684739131704, "grad_norm": 0.7483358931788084, "learning_rate": 2.1482778750729716e-05, "loss": 0.6828, "step": 1472 }, { "epoch": 0.0430060436191644, "grad_norm": 0.7908319724669464, "learning_rate": 2.1497373029772332e-05, "loss": 0.6131, "step": 1473 }, { "epoch": 0.04303523984701177, "grad_norm": 0.872104410458215, "learning_rate": 2.1511967308814944e-05, "loss": 0.7335, "step": 1474 }, { "epoch": 0.04306443607485913, "grad_norm": 0.7796937580806773, "learning_rate": 2.152656158785756e-05, "loss": 0.7201, "step": 1475 }, { "epoch": 0.04309363230270649, "grad_norm": 0.760042341944252, "learning_rate": 2.1541155866900176e-05, "loss": 0.6419, "step": 1476 }, { "epoch": 0.04312282853055385, "grad_norm": 1.0941173530774508, "learning_rate": 2.1555750145942792e-05, "loss": 0.6803, "step": 1477 }, { "epoch": 0.043152024758401214, "grad_norm": 0.8264478794245492, "learning_rate": 2.1570344424985404e-05, "loss": 0.6713, "step": 1478 }, { "epoch": 0.043181220986248575, "grad_norm": 0.7299766051333121, "learning_rate": 2.158493870402802e-05, "loss": 0.6759, "step": 1479 }, { "epoch": 0.043210417214095936, "grad_norm": 0.770243255470921, "learning_rate": 2.1599532983070636e-05, "loss": 0.685, "step": 1480 }, { "epoch": 0.0432396134419433, "grad_norm": 0.8479601424213662, "learning_rate": 2.1614127262113255e-05, "loss": 0.7278, "step": 1481 }, { "epoch": 0.043268809669790666, "grad_norm": 0.7962533682907791, "learning_rate": 2.1628721541155868e-05, "loss": 0.8455, "step": 1482 }, { "epoch": 0.04329800589763803, "grad_norm": 0.7865824481229617, "learning_rate": 2.1643315820198484e-05, "loss": 0.7177, "step": 1483 }, { "epoch": 0.04332720212548539, "grad_norm": 0.7533051238500995, "learning_rate": 2.16579100992411e-05, "loss": 0.7335, "step": 1484 }, { "epoch": 0.04335639835333275, "grad_norm": 0.7489101782734103, "learning_rate": 2.1672504378283715e-05, "loss": 0.7013, "step": 1485 }, { "epoch": 0.04338559458118011, "grad_norm": 0.8433989655692462, "learning_rate": 2.168709865732633e-05, "loss": 0.8004, "step": 1486 }, { "epoch": 0.04341479080902747, "grad_norm": 0.8485089540111687, "learning_rate": 2.1701692936368944e-05, "loss": 0.8017, "step": 1487 }, { "epoch": 0.04344398703687483, "grad_norm": 0.7573657909060098, "learning_rate": 2.171628721541156e-05, "loss": 0.6826, "step": 1488 }, { "epoch": 0.0434731832647222, "grad_norm": 0.7979401985972726, "learning_rate": 2.1730881494454175e-05, "loss": 0.7014, "step": 1489 }, { "epoch": 0.04350237949256956, "grad_norm": 0.72267718562823, "learning_rate": 2.174547577349679e-05, "loss": 0.6422, "step": 1490 }, { "epoch": 0.043531575720416923, "grad_norm": 0.7961925818514576, "learning_rate": 2.1760070052539404e-05, "loss": 0.7368, "step": 1491 }, { "epoch": 0.043560771948264285, "grad_norm": 0.8045667391508156, "learning_rate": 2.177466433158202e-05, "loss": 0.7066, "step": 1492 }, { "epoch": 0.043589968176111646, "grad_norm": 0.8068516752048999, "learning_rate": 2.1789258610624635e-05, "loss": 0.7454, "step": 1493 }, { "epoch": 0.04361916440395901, "grad_norm": 0.8206735438490125, "learning_rate": 2.180385288966725e-05, "loss": 0.7622, "step": 1494 }, { "epoch": 0.04364836063180637, "grad_norm": 0.722620625615518, "learning_rate": 2.1818447168709867e-05, "loss": 0.6194, "step": 1495 }, { "epoch": 0.04367755685965373, "grad_norm": 0.7941501732567134, "learning_rate": 2.183304144775248e-05, "loss": 0.7447, "step": 1496 }, { "epoch": 0.0437067530875011, "grad_norm": 0.7447850487786107, "learning_rate": 2.18476357267951e-05, "loss": 0.6727, "step": 1497 }, { "epoch": 0.04373594931534846, "grad_norm": 1.520710703401789, "learning_rate": 2.1862230005837715e-05, "loss": 0.7452, "step": 1498 }, { "epoch": 0.04376514554319582, "grad_norm": 0.7811306375546877, "learning_rate": 2.187682428488033e-05, "loss": 0.7635, "step": 1499 }, { "epoch": 0.04379434177104318, "grad_norm": 0.7878812081476821, "learning_rate": 2.1891418563922943e-05, "loss": 0.7558, "step": 1500 }, { "epoch": 0.04382353799889054, "grad_norm": 0.7471430694714714, "learning_rate": 2.190601284296556e-05, "loss": 0.6926, "step": 1501 }, { "epoch": 0.043852734226737904, "grad_norm": 0.8494844519710656, "learning_rate": 2.1920607122008175e-05, "loss": 0.8352, "step": 1502 }, { "epoch": 0.043881930454585265, "grad_norm": 0.8242849925833984, "learning_rate": 2.193520140105079e-05, "loss": 0.7102, "step": 1503 }, { "epoch": 0.04391112668243263, "grad_norm": 0.7588641438234608, "learning_rate": 2.1949795680093403e-05, "loss": 0.7115, "step": 1504 }, { "epoch": 0.043940322910279994, "grad_norm": 0.8688318119296962, "learning_rate": 2.196438995913602e-05, "loss": 0.7234, "step": 1505 }, { "epoch": 0.043969519138127355, "grad_norm": 0.764988940169059, "learning_rate": 2.1978984238178635e-05, "loss": 0.6754, "step": 1506 }, { "epoch": 0.04399871536597472, "grad_norm": 0.7876873973824673, "learning_rate": 2.199357851722125e-05, "loss": 0.6752, "step": 1507 }, { "epoch": 0.04402791159382208, "grad_norm": 0.7834036427034179, "learning_rate": 2.2008172796263866e-05, "loss": 0.6439, "step": 1508 }, { "epoch": 0.04405710782166944, "grad_norm": 0.7677778032002698, "learning_rate": 2.202276707530648e-05, "loss": 0.7295, "step": 1509 }, { "epoch": 0.0440863040495168, "grad_norm": 0.7360241210405543, "learning_rate": 2.2037361354349095e-05, "loss": 0.6659, "step": 1510 }, { "epoch": 0.04411550027736416, "grad_norm": 0.8642639019893203, "learning_rate": 2.205195563339171e-05, "loss": 0.7625, "step": 1511 }, { "epoch": 0.04414469650521153, "grad_norm": 0.8829717933152619, "learning_rate": 2.2066549912434326e-05, "loss": 0.8237, "step": 1512 }, { "epoch": 0.04417389273305889, "grad_norm": 0.7862153321351727, "learning_rate": 2.2081144191476942e-05, "loss": 0.7069, "step": 1513 }, { "epoch": 0.04420308896090625, "grad_norm": 0.7251706261081542, "learning_rate": 2.2095738470519558e-05, "loss": 0.669, "step": 1514 }, { "epoch": 0.04423228518875361, "grad_norm": 0.7282171153315184, "learning_rate": 2.2110332749562174e-05, "loss": 0.6945, "step": 1515 }, { "epoch": 0.044261481416600974, "grad_norm": 0.8554765016877671, "learning_rate": 2.212492702860479e-05, "loss": 0.8352, "step": 1516 }, { "epoch": 0.044290677644448335, "grad_norm": 0.7757052678208263, "learning_rate": 2.2139521307647402e-05, "loss": 0.687, "step": 1517 }, { "epoch": 0.0443198738722957, "grad_norm": 0.7346954624994128, "learning_rate": 2.2154115586690018e-05, "loss": 0.6603, "step": 1518 }, { "epoch": 0.044349070100143065, "grad_norm": 0.7113921312292247, "learning_rate": 2.2168709865732634e-05, "loss": 0.6396, "step": 1519 }, { "epoch": 0.044378266327990426, "grad_norm": 0.8085725696309635, "learning_rate": 2.218330414477525e-05, "loss": 0.7199, "step": 1520 }, { "epoch": 0.04440746255583779, "grad_norm": 0.7493709382773076, "learning_rate": 2.2197898423817866e-05, "loss": 0.7225, "step": 1521 }, { "epoch": 0.04443665878368515, "grad_norm": 0.7599998347796469, "learning_rate": 2.2212492702860478e-05, "loss": 0.6693, "step": 1522 }, { "epoch": 0.04446585501153251, "grad_norm": 0.8741877934800956, "learning_rate": 2.2227086981903094e-05, "loss": 0.723, "step": 1523 }, { "epoch": 0.04449505123937987, "grad_norm": 0.727814722460494, "learning_rate": 2.224168126094571e-05, "loss": 0.6568, "step": 1524 }, { "epoch": 0.04452424746722723, "grad_norm": 0.7761667767425938, "learning_rate": 2.2256275539988326e-05, "loss": 0.6942, "step": 1525 }, { "epoch": 0.04455344369507459, "grad_norm": 0.7329285100305473, "learning_rate": 2.227086981903094e-05, "loss": 0.7088, "step": 1526 }, { "epoch": 0.04458263992292196, "grad_norm": 0.8556661904214218, "learning_rate": 2.2285464098073554e-05, "loss": 0.721, "step": 1527 }, { "epoch": 0.04461183615076932, "grad_norm": 0.9252414250356809, "learning_rate": 2.230005837711617e-05, "loss": 0.7268, "step": 1528 }, { "epoch": 0.044641032378616684, "grad_norm": 0.8065697651081242, "learning_rate": 2.231465265615879e-05, "loss": 0.7078, "step": 1529 }, { "epoch": 0.044670228606464045, "grad_norm": 0.8128128986417479, "learning_rate": 2.23292469352014e-05, "loss": 0.7946, "step": 1530 }, { "epoch": 0.044699424834311406, "grad_norm": 0.7826800330528757, "learning_rate": 2.2343841214244018e-05, "loss": 0.8134, "step": 1531 }, { "epoch": 0.04472862106215877, "grad_norm": 0.7719156202632212, "learning_rate": 2.2358435493286633e-05, "loss": 0.8003, "step": 1532 }, { "epoch": 0.04475781729000613, "grad_norm": 0.7610653707062174, "learning_rate": 2.237302977232925e-05, "loss": 0.6569, "step": 1533 }, { "epoch": 0.0447870135178535, "grad_norm": 0.7618209548668117, "learning_rate": 2.2387624051371865e-05, "loss": 0.7285, "step": 1534 }, { "epoch": 0.04481620974570086, "grad_norm": 0.7632092076135308, "learning_rate": 2.2402218330414478e-05, "loss": 0.67, "step": 1535 }, { "epoch": 0.04484540597354822, "grad_norm": 0.7229641228457819, "learning_rate": 2.2416812609457093e-05, "loss": 0.6574, "step": 1536 }, { "epoch": 0.04487460220139558, "grad_norm": 0.7758752220194302, "learning_rate": 2.243140688849971e-05, "loss": 0.7401, "step": 1537 }, { "epoch": 0.04490379842924294, "grad_norm": 0.6987522196690962, "learning_rate": 2.2446001167542325e-05, "loss": 0.6368, "step": 1538 }, { "epoch": 0.0449329946570903, "grad_norm": 0.7377568535206297, "learning_rate": 2.246059544658494e-05, "loss": 0.6696, "step": 1539 }, { "epoch": 0.044962190884937664, "grad_norm": 0.7233079893946736, "learning_rate": 2.2475189725627553e-05, "loss": 0.649, "step": 1540 }, { "epoch": 0.044991387112785025, "grad_norm": 0.7749404484794582, "learning_rate": 2.248978400467017e-05, "loss": 0.681, "step": 1541 }, { "epoch": 0.04502058334063239, "grad_norm": 0.7920434505031171, "learning_rate": 2.2504378283712785e-05, "loss": 0.7936, "step": 1542 }, { "epoch": 0.045049779568479754, "grad_norm": 0.7891587343836004, "learning_rate": 2.25189725627554e-05, "loss": 0.8073, "step": 1543 }, { "epoch": 0.045078975796327116, "grad_norm": 0.7346670708613197, "learning_rate": 2.2533566841798013e-05, "loss": 0.6166, "step": 1544 }, { "epoch": 0.04510817202417448, "grad_norm": 0.8622940097553731, "learning_rate": 2.2548161120840633e-05, "loss": 0.7391, "step": 1545 }, { "epoch": 0.04513736825202184, "grad_norm": 0.7206129089777531, "learning_rate": 2.256275539988325e-05, "loss": 0.7018, "step": 1546 }, { "epoch": 0.0451665644798692, "grad_norm": 0.7642587576487307, "learning_rate": 2.2577349678925864e-05, "loss": 0.6993, "step": 1547 }, { "epoch": 0.04519576070771656, "grad_norm": 0.7267096030029997, "learning_rate": 2.2591943957968477e-05, "loss": 0.6563, "step": 1548 }, { "epoch": 0.04522495693556393, "grad_norm": 0.7745423802219592, "learning_rate": 2.2606538237011093e-05, "loss": 0.77, "step": 1549 }, { "epoch": 0.04525415316341129, "grad_norm": 0.8855286825511892, "learning_rate": 2.262113251605371e-05, "loss": 0.6825, "step": 1550 }, { "epoch": 0.04528334939125865, "grad_norm": 0.722033289375369, "learning_rate": 2.2635726795096324e-05, "loss": 0.6803, "step": 1551 }, { "epoch": 0.04531254561910601, "grad_norm": 0.8630481848028079, "learning_rate": 2.265032107413894e-05, "loss": 0.7145, "step": 1552 }, { "epoch": 0.04534174184695337, "grad_norm": 0.8108334104757862, "learning_rate": 2.2664915353181553e-05, "loss": 0.7945, "step": 1553 }, { "epoch": 0.045370938074800735, "grad_norm": 0.7861197789370894, "learning_rate": 2.267950963222417e-05, "loss": 0.7534, "step": 1554 }, { "epoch": 0.045400134302648096, "grad_norm": 0.7758426468054116, "learning_rate": 2.2694103911266784e-05, "loss": 0.6908, "step": 1555 }, { "epoch": 0.04542933053049546, "grad_norm": 0.9275684757606492, "learning_rate": 2.27086981903094e-05, "loss": 0.6309, "step": 1556 }, { "epoch": 0.045458526758342825, "grad_norm": 0.8188442822975148, "learning_rate": 2.2723292469352013e-05, "loss": 0.7635, "step": 1557 }, { "epoch": 0.045487722986190186, "grad_norm": 0.7984970431028431, "learning_rate": 2.273788674839463e-05, "loss": 0.7995, "step": 1558 }, { "epoch": 0.04551691921403755, "grad_norm": 0.8030783986455232, "learning_rate": 2.2752481027437244e-05, "loss": 0.7414, "step": 1559 }, { "epoch": 0.04554611544188491, "grad_norm": 0.8714089964021567, "learning_rate": 2.276707530647986e-05, "loss": 0.7112, "step": 1560 }, { "epoch": 0.04557531166973227, "grad_norm": 0.7776550376923853, "learning_rate": 2.2781669585522476e-05, "loss": 0.6825, "step": 1561 }, { "epoch": 0.04560450789757963, "grad_norm": 0.9141385461118794, "learning_rate": 2.2796263864565092e-05, "loss": 0.7778, "step": 1562 }, { "epoch": 0.04563370412542699, "grad_norm": 0.7833134278661636, "learning_rate": 2.2810858143607708e-05, "loss": 0.7613, "step": 1563 }, { "epoch": 0.045662900353274354, "grad_norm": 0.7332471139827827, "learning_rate": 2.2825452422650324e-05, "loss": 0.687, "step": 1564 }, { "epoch": 0.04569209658112172, "grad_norm": 0.7461151933456153, "learning_rate": 2.284004670169294e-05, "loss": 0.6758, "step": 1565 }, { "epoch": 0.04572129280896908, "grad_norm": 0.7751710863071527, "learning_rate": 2.2854640980735552e-05, "loss": 0.705, "step": 1566 }, { "epoch": 0.045750489036816444, "grad_norm": 0.7925864435440167, "learning_rate": 2.2869235259778168e-05, "loss": 0.6806, "step": 1567 }, { "epoch": 0.045779685264663805, "grad_norm": 1.2529122307955713, "learning_rate": 2.2883829538820784e-05, "loss": 0.8156, "step": 1568 }, { "epoch": 0.045808881492511166, "grad_norm": 0.7536783493387857, "learning_rate": 2.28984238178634e-05, "loss": 0.6963, "step": 1569 }, { "epoch": 0.04583807772035853, "grad_norm": 0.810991429114842, "learning_rate": 2.2913018096906012e-05, "loss": 0.7874, "step": 1570 }, { "epoch": 0.04586727394820589, "grad_norm": 0.7339618063848355, "learning_rate": 2.2927612375948628e-05, "loss": 0.6688, "step": 1571 }, { "epoch": 0.04589647017605326, "grad_norm": 1.0101645359615843, "learning_rate": 2.2942206654991244e-05, "loss": 0.8449, "step": 1572 }, { "epoch": 0.04592566640390062, "grad_norm": 0.8677914019412543, "learning_rate": 2.295680093403386e-05, "loss": 0.779, "step": 1573 }, { "epoch": 0.04595486263174798, "grad_norm": 0.7995708989347494, "learning_rate": 2.2971395213076476e-05, "loss": 0.7334, "step": 1574 }, { "epoch": 0.04598405885959534, "grad_norm": 0.7311818988589865, "learning_rate": 2.2985989492119088e-05, "loss": 0.6872, "step": 1575 }, { "epoch": 0.0460132550874427, "grad_norm": 0.8114649927328372, "learning_rate": 2.3000583771161704e-05, "loss": 0.7014, "step": 1576 }, { "epoch": 0.04604245131529006, "grad_norm": 0.7831883290836626, "learning_rate": 2.3015178050204323e-05, "loss": 0.595, "step": 1577 }, { "epoch": 0.046071647543137424, "grad_norm": 0.798187515971568, "learning_rate": 2.302977232924694e-05, "loss": 0.7142, "step": 1578 }, { "epoch": 0.046100843770984785, "grad_norm": 0.8144234769671298, "learning_rate": 2.304436660828955e-05, "loss": 0.7236, "step": 1579 }, { "epoch": 0.046130039998832154, "grad_norm": 0.9211468484433064, "learning_rate": 2.3058960887332167e-05, "loss": 0.7281, "step": 1580 }, { "epoch": 0.046159236226679515, "grad_norm": 0.8031811451106993, "learning_rate": 2.3073555166374783e-05, "loss": 0.6579, "step": 1581 }, { "epoch": 0.046188432454526876, "grad_norm": 0.7753366303388672, "learning_rate": 2.30881494454174e-05, "loss": 0.7857, "step": 1582 }, { "epoch": 0.04621762868237424, "grad_norm": 0.7084145239359434, "learning_rate": 2.310274372446001e-05, "loss": 0.6678, "step": 1583 }, { "epoch": 0.0462468249102216, "grad_norm": 0.7274747512184323, "learning_rate": 2.3117338003502627e-05, "loss": 0.6586, "step": 1584 }, { "epoch": 0.04627602113806896, "grad_norm": 0.8192026862044611, "learning_rate": 2.3131932282545243e-05, "loss": 0.7398, "step": 1585 }, { "epoch": 0.04630521736591632, "grad_norm": 0.7721227579674973, "learning_rate": 2.314652656158786e-05, "loss": 0.6588, "step": 1586 }, { "epoch": 0.04633441359376369, "grad_norm": 0.7387959073318767, "learning_rate": 2.3161120840630475e-05, "loss": 0.6256, "step": 1587 }, { "epoch": 0.04636360982161105, "grad_norm": 0.8073346856544307, "learning_rate": 2.3175715119673087e-05, "loss": 0.8141, "step": 1588 }, { "epoch": 0.04639280604945841, "grad_norm": 0.7540512135018264, "learning_rate": 2.3190309398715703e-05, "loss": 0.7433, "step": 1589 }, { "epoch": 0.04642200227730577, "grad_norm": 0.8614759420834961, "learning_rate": 2.320490367775832e-05, "loss": 0.756, "step": 1590 }, { "epoch": 0.046451198505153134, "grad_norm": 1.0470494430466297, "learning_rate": 2.3219497956800935e-05, "loss": 0.7572, "step": 1591 }, { "epoch": 0.046480394733000495, "grad_norm": 0.7621253629671201, "learning_rate": 2.323409223584355e-05, "loss": 0.7217, "step": 1592 }, { "epoch": 0.046509590960847856, "grad_norm": 0.8191311828038005, "learning_rate": 2.3248686514886167e-05, "loss": 0.7753, "step": 1593 }, { "epoch": 0.04653878718869522, "grad_norm": 0.8076902647432411, "learning_rate": 2.3263280793928782e-05, "loss": 0.7416, "step": 1594 }, { "epoch": 0.046567983416542585, "grad_norm": 0.7551769377063295, "learning_rate": 2.3277875072971398e-05, "loss": 0.6566, "step": 1595 }, { "epoch": 0.04659717964438995, "grad_norm": 0.7319996168303536, "learning_rate": 2.3292469352014014e-05, "loss": 0.6596, "step": 1596 }, { "epoch": 0.04662637587223731, "grad_norm": 0.7900782712609762, "learning_rate": 2.3307063631056627e-05, "loss": 0.7582, "step": 1597 }, { "epoch": 0.04665557210008467, "grad_norm": 0.7289320419816693, "learning_rate": 2.3321657910099242e-05, "loss": 0.6344, "step": 1598 }, { "epoch": 0.04668476832793203, "grad_norm": 0.7301733215247364, "learning_rate": 2.3336252189141858e-05, "loss": 0.6256, "step": 1599 }, { "epoch": 0.04671396455577939, "grad_norm": 0.8681870472539599, "learning_rate": 2.3350846468184474e-05, "loss": 0.753, "step": 1600 }, { "epoch": 0.04674316078362675, "grad_norm": 1.0081830395969675, "learning_rate": 2.3365440747227087e-05, "loss": 0.7696, "step": 1601 }, { "epoch": 0.04677235701147412, "grad_norm": 0.7608103038215175, "learning_rate": 2.3380035026269702e-05, "loss": 0.7073, "step": 1602 }, { "epoch": 0.04680155323932148, "grad_norm": 0.9535363346570532, "learning_rate": 2.339462930531232e-05, "loss": 0.9202, "step": 1603 }, { "epoch": 0.04683074946716884, "grad_norm": 0.7727758511597228, "learning_rate": 2.3409223584354934e-05, "loss": 0.8319, "step": 1604 }, { "epoch": 0.046859945695016204, "grad_norm": 0.7824002813603924, "learning_rate": 2.342381786339755e-05, "loss": 0.7765, "step": 1605 }, { "epoch": 0.046889141922863566, "grad_norm": 0.8371298100576575, "learning_rate": 2.3438412142440162e-05, "loss": 0.7379, "step": 1606 }, { "epoch": 0.04691833815071093, "grad_norm": 0.7838347722009598, "learning_rate": 2.345300642148278e-05, "loss": 0.7264, "step": 1607 }, { "epoch": 0.04694753437855829, "grad_norm": 0.7943068110622008, "learning_rate": 2.3467600700525394e-05, "loss": 0.6967, "step": 1608 }, { "epoch": 0.04697673060640565, "grad_norm": 0.7623491526203646, "learning_rate": 2.3482194979568013e-05, "loss": 0.7199, "step": 1609 }, { "epoch": 0.04700592683425302, "grad_norm": 0.7532483458388405, "learning_rate": 2.3496789258610626e-05, "loss": 0.742, "step": 1610 }, { "epoch": 0.04703512306210038, "grad_norm": 0.8620055161182483, "learning_rate": 2.3511383537653242e-05, "loss": 0.7487, "step": 1611 }, { "epoch": 0.04706431928994774, "grad_norm": 0.7744040316829478, "learning_rate": 2.3525977816695858e-05, "loss": 0.7493, "step": 1612 }, { "epoch": 0.0470935155177951, "grad_norm": 0.7490899519001833, "learning_rate": 2.3540572095738473e-05, "loss": 0.6958, "step": 1613 }, { "epoch": 0.04712271174564246, "grad_norm": 0.8274666758860613, "learning_rate": 2.3555166374781086e-05, "loss": 0.6911, "step": 1614 }, { "epoch": 0.04715190797348982, "grad_norm": 0.9907345064914574, "learning_rate": 2.3569760653823702e-05, "loss": 0.6689, "step": 1615 }, { "epoch": 0.047181104201337185, "grad_norm": 0.7499087906173348, "learning_rate": 2.3584354932866318e-05, "loss": 0.702, "step": 1616 }, { "epoch": 0.04721030042918455, "grad_norm": 0.7984645234742629, "learning_rate": 2.3598949211908933e-05, "loss": 0.7774, "step": 1617 }, { "epoch": 0.047239496657031914, "grad_norm": 0.746568554046967, "learning_rate": 2.361354349095155e-05, "loss": 0.6813, "step": 1618 }, { "epoch": 0.047268692884879275, "grad_norm": 0.7216564895245714, "learning_rate": 2.3628137769994162e-05, "loss": 0.6819, "step": 1619 }, { "epoch": 0.047297889112726636, "grad_norm": 0.791991567707324, "learning_rate": 2.3642732049036778e-05, "loss": 0.6993, "step": 1620 }, { "epoch": 0.047327085340574, "grad_norm": 0.7710180920720869, "learning_rate": 2.3657326328079394e-05, "loss": 0.7361, "step": 1621 }, { "epoch": 0.04735628156842136, "grad_norm": 0.7492984535393079, "learning_rate": 2.367192060712201e-05, "loss": 0.7195, "step": 1622 }, { "epoch": 0.04738547779626872, "grad_norm": 1.039509879636696, "learning_rate": 2.3686514886164625e-05, "loss": 0.6893, "step": 1623 }, { "epoch": 0.04741467402411608, "grad_norm": 0.7792484781532814, "learning_rate": 2.3701109165207238e-05, "loss": 0.6969, "step": 1624 }, { "epoch": 0.04744387025196345, "grad_norm": 0.7575685673273503, "learning_rate": 2.3715703444249857e-05, "loss": 0.6922, "step": 1625 }, { "epoch": 0.04747306647981081, "grad_norm": 1.839023595787036, "learning_rate": 2.3730297723292473e-05, "loss": 0.7939, "step": 1626 }, { "epoch": 0.04750226270765817, "grad_norm": 0.97385349756905, "learning_rate": 2.3744892002335085e-05, "loss": 0.8611, "step": 1627 }, { "epoch": 0.04753145893550553, "grad_norm": 0.8569177840867997, "learning_rate": 2.37594862813777e-05, "loss": 0.789, "step": 1628 }, { "epoch": 0.047560655163352894, "grad_norm": 0.7353223407703442, "learning_rate": 2.3774080560420317e-05, "loss": 0.6922, "step": 1629 }, { "epoch": 0.047589851391200255, "grad_norm": 0.7695372059685066, "learning_rate": 2.3788674839462933e-05, "loss": 0.7512, "step": 1630 }, { "epoch": 0.047619047619047616, "grad_norm": 0.7786821041239291, "learning_rate": 2.380326911850555e-05, "loss": 0.6934, "step": 1631 }, { "epoch": 0.047648243846894985, "grad_norm": 0.8483416092616984, "learning_rate": 2.381786339754816e-05, "loss": 0.7867, "step": 1632 }, { "epoch": 0.047677440074742346, "grad_norm": 0.7379472241397474, "learning_rate": 2.3832457676590777e-05, "loss": 0.6893, "step": 1633 }, { "epoch": 0.04770663630258971, "grad_norm": 0.7939836958324451, "learning_rate": 2.3847051955633393e-05, "loss": 0.6782, "step": 1634 }, { "epoch": 0.04773583253043707, "grad_norm": 0.7701241996653002, "learning_rate": 2.386164623467601e-05, "loss": 0.7095, "step": 1635 }, { "epoch": 0.04776502875828443, "grad_norm": 0.7375236321289287, "learning_rate": 2.3876240513718625e-05, "loss": 0.6357, "step": 1636 }, { "epoch": 0.04779422498613179, "grad_norm": 0.9462350671490365, "learning_rate": 2.3890834792761237e-05, "loss": 0.7993, "step": 1637 }, { "epoch": 0.04782342121397915, "grad_norm": 0.7690994472476753, "learning_rate": 2.3905429071803853e-05, "loss": 0.6672, "step": 1638 }, { "epoch": 0.04785261744182651, "grad_norm": 0.7367615717109093, "learning_rate": 2.392002335084647e-05, "loss": 0.6427, "step": 1639 }, { "epoch": 0.04788181366967388, "grad_norm": 0.7662861532416534, "learning_rate": 2.3934617629889085e-05, "loss": 0.7913, "step": 1640 }, { "epoch": 0.04791100989752124, "grad_norm": 0.6877555064389514, "learning_rate": 2.39492119089317e-05, "loss": 0.5922, "step": 1641 }, { "epoch": 0.047940206125368603, "grad_norm": 0.8994493326438638, "learning_rate": 2.3963806187974316e-05, "loss": 0.7563, "step": 1642 }, { "epoch": 0.047969402353215965, "grad_norm": 0.8185567619002118, "learning_rate": 2.3978400467016932e-05, "loss": 0.6995, "step": 1643 }, { "epoch": 0.047998598581063326, "grad_norm": 0.7474228738259295, "learning_rate": 2.3992994746059548e-05, "loss": 0.7211, "step": 1644 }, { "epoch": 0.04802779480891069, "grad_norm": 0.7260974527844457, "learning_rate": 2.400758902510216e-05, "loss": 0.708, "step": 1645 }, { "epoch": 0.04805699103675805, "grad_norm": 0.8315826345447639, "learning_rate": 2.4022183304144776e-05, "loss": 0.7407, "step": 1646 }, { "epoch": 0.048086187264605416, "grad_norm": 0.6973986074707758, "learning_rate": 2.4036777583187392e-05, "loss": 0.6624, "step": 1647 }, { "epoch": 0.04811538349245278, "grad_norm": 0.81241246030593, "learning_rate": 2.4051371862230008e-05, "loss": 0.7365, "step": 1648 }, { "epoch": 0.04814457972030014, "grad_norm": 0.8729726713993455, "learning_rate": 2.4065966141272624e-05, "loss": 0.8171, "step": 1649 }, { "epoch": 0.0481737759481475, "grad_norm": 0.7866026595445398, "learning_rate": 2.4080560420315236e-05, "loss": 0.7414, "step": 1650 }, { "epoch": 0.04820297217599486, "grad_norm": 0.716738899535954, "learning_rate": 2.4095154699357852e-05, "loss": 0.605, "step": 1651 }, { "epoch": 0.04823216840384222, "grad_norm": 0.7952376819852832, "learning_rate": 2.4109748978400468e-05, "loss": 0.7761, "step": 1652 }, { "epoch": 0.048261364631689584, "grad_norm": 0.7175546152050754, "learning_rate": 2.4124343257443084e-05, "loss": 0.6223, "step": 1653 }, { "epoch": 0.048290560859536945, "grad_norm": 0.7109364980469038, "learning_rate": 2.4138937536485696e-05, "loss": 0.6196, "step": 1654 }, { "epoch": 0.04831975708738431, "grad_norm": 0.8190070409556515, "learning_rate": 2.4153531815528312e-05, "loss": 0.7616, "step": 1655 }, { "epoch": 0.048348953315231674, "grad_norm": 0.7986733263901635, "learning_rate": 2.4168126094570928e-05, "loss": 0.7971, "step": 1656 }, { "epoch": 0.048378149543079035, "grad_norm": 0.7866378270852062, "learning_rate": 2.4182720373613547e-05, "loss": 0.7793, "step": 1657 }, { "epoch": 0.0484073457709264, "grad_norm": 0.7371432236451306, "learning_rate": 2.419731465265616e-05, "loss": 0.7129, "step": 1658 }, { "epoch": 0.04843654199877376, "grad_norm": 0.7072249122305372, "learning_rate": 2.4211908931698776e-05, "loss": 0.6229, "step": 1659 }, { "epoch": 0.04846573822662112, "grad_norm": 0.7651011222878445, "learning_rate": 2.422650321074139e-05, "loss": 0.8012, "step": 1660 }, { "epoch": 0.04849493445446848, "grad_norm": 0.6542259387439844, "learning_rate": 2.4241097489784007e-05, "loss": 0.5303, "step": 1661 }, { "epoch": 0.04852413068231584, "grad_norm": 0.8033273708258812, "learning_rate": 2.4255691768826623e-05, "loss": 0.7173, "step": 1662 }, { "epoch": 0.04855332691016321, "grad_norm": 0.7846753651404083, "learning_rate": 2.4270286047869236e-05, "loss": 0.6476, "step": 1663 }, { "epoch": 0.04858252313801057, "grad_norm": 0.9208990311577117, "learning_rate": 2.428488032691185e-05, "loss": 0.5936, "step": 1664 }, { "epoch": 0.04861171936585793, "grad_norm": 0.7787476942664362, "learning_rate": 2.4299474605954467e-05, "loss": 0.799, "step": 1665 }, { "epoch": 0.04864091559370529, "grad_norm": 0.7752163872081247, "learning_rate": 2.4314068884997083e-05, "loss": 0.7931, "step": 1666 }, { "epoch": 0.048670111821552654, "grad_norm": 0.7045972105263132, "learning_rate": 2.4328663164039696e-05, "loss": 0.5932, "step": 1667 }, { "epoch": 0.048699308049400016, "grad_norm": 0.7750813601488671, "learning_rate": 2.434325744308231e-05, "loss": 0.8267, "step": 1668 }, { "epoch": 0.04872850427724738, "grad_norm": 0.8218443138834006, "learning_rate": 2.4357851722124927e-05, "loss": 0.7508, "step": 1669 }, { "epoch": 0.048757700505094745, "grad_norm": 0.7571018921656762, "learning_rate": 2.4372446001167543e-05, "loss": 0.7704, "step": 1670 }, { "epoch": 0.048786896732942106, "grad_norm": 0.7545681777320624, "learning_rate": 2.438704028021016e-05, "loss": 0.6891, "step": 1671 }, { "epoch": 0.04881609296078947, "grad_norm": 0.9685409588449376, "learning_rate": 2.440163455925277e-05, "loss": 0.7154, "step": 1672 }, { "epoch": 0.04884528918863683, "grad_norm": 0.7709363052224316, "learning_rate": 2.441622883829539e-05, "loss": 0.6599, "step": 1673 }, { "epoch": 0.04887448541648419, "grad_norm": 0.7529012616005811, "learning_rate": 2.4430823117338007e-05, "loss": 0.6006, "step": 1674 }, { "epoch": 0.04890368164433155, "grad_norm": 0.678132542291486, "learning_rate": 2.4445417396380622e-05, "loss": 0.5815, "step": 1675 }, { "epoch": 0.04893287787217891, "grad_norm": 0.7789119677010191, "learning_rate": 2.4460011675423235e-05, "loss": 0.7162, "step": 1676 }, { "epoch": 0.04896207410002627, "grad_norm": 0.8483916588244478, "learning_rate": 2.447460595446585e-05, "loss": 0.8231, "step": 1677 }, { "epoch": 0.04899127032787364, "grad_norm": 0.8676613457183823, "learning_rate": 2.4489200233508467e-05, "loss": 0.7994, "step": 1678 }, { "epoch": 0.049020466555721, "grad_norm": 0.7810454522435113, "learning_rate": 2.4503794512551083e-05, "loss": 0.6674, "step": 1679 }, { "epoch": 0.049049662783568364, "grad_norm": 0.8184293075475318, "learning_rate": 2.4518388791593695e-05, "loss": 0.7664, "step": 1680 }, { "epoch": 0.049078859011415725, "grad_norm": 0.7673170063003701, "learning_rate": 2.453298307063631e-05, "loss": 0.7379, "step": 1681 }, { "epoch": 0.049108055239263086, "grad_norm": 0.8966145719444487, "learning_rate": 2.4547577349678927e-05, "loss": 0.7158, "step": 1682 }, { "epoch": 0.04913725146711045, "grad_norm": 0.7327934884419685, "learning_rate": 2.4562171628721543e-05, "loss": 0.7045, "step": 1683 }, { "epoch": 0.04916644769495781, "grad_norm": 0.6876061091742092, "learning_rate": 2.457676590776416e-05, "loss": 0.6751, "step": 1684 }, { "epoch": 0.04919564392280518, "grad_norm": 0.8978938474642665, "learning_rate": 2.459136018680677e-05, "loss": 0.6422, "step": 1685 }, { "epoch": 0.04922484015065254, "grad_norm": 0.784495557510004, "learning_rate": 2.4605954465849387e-05, "loss": 0.6114, "step": 1686 }, { "epoch": 0.0492540363784999, "grad_norm": 0.7776147919885559, "learning_rate": 2.4620548744892003e-05, "loss": 0.7668, "step": 1687 }, { "epoch": 0.04928323260634726, "grad_norm": 0.7203217898482527, "learning_rate": 2.463514302393462e-05, "loss": 0.7335, "step": 1688 }, { "epoch": 0.04931242883419462, "grad_norm": 0.6623066396688603, "learning_rate": 2.4649737302977234e-05, "loss": 0.5564, "step": 1689 }, { "epoch": 0.04934162506204198, "grad_norm": 0.7118158567316236, "learning_rate": 2.466433158201985e-05, "loss": 0.6543, "step": 1690 }, { "epoch": 0.049370821289889344, "grad_norm": 0.7374884862353566, "learning_rate": 2.4678925861062466e-05, "loss": 0.7207, "step": 1691 }, { "epoch": 0.049400017517736705, "grad_norm": 0.8011407247518584, "learning_rate": 2.4693520140105082e-05, "loss": 0.7286, "step": 1692 }, { "epoch": 0.04942921374558407, "grad_norm": 0.8797157771570157, "learning_rate": 2.4708114419147694e-05, "loss": 0.7978, "step": 1693 }, { "epoch": 0.049458409973431434, "grad_norm": 0.8355370532015938, "learning_rate": 2.472270869819031e-05, "loss": 0.7015, "step": 1694 }, { "epoch": 0.049487606201278796, "grad_norm": 0.7516955854582194, "learning_rate": 2.4737302977232926e-05, "loss": 0.6573, "step": 1695 }, { "epoch": 0.04951680242912616, "grad_norm": 0.7370201894675324, "learning_rate": 2.4751897256275542e-05, "loss": 0.6879, "step": 1696 }, { "epoch": 0.04954599865697352, "grad_norm": 0.8208321724827647, "learning_rate": 2.4766491535318158e-05, "loss": 0.7062, "step": 1697 }, { "epoch": 0.04957519488482088, "grad_norm": 0.7565947176938964, "learning_rate": 2.478108581436077e-05, "loss": 0.722, "step": 1698 }, { "epoch": 0.04960439111266824, "grad_norm": 0.7806078242838184, "learning_rate": 2.4795680093403386e-05, "loss": 0.7488, "step": 1699 }, { "epoch": 0.04963358734051561, "grad_norm": 0.7018785934848855, "learning_rate": 2.4810274372446002e-05, "loss": 0.6726, "step": 1700 }, { "epoch": 0.04966278356836297, "grad_norm": 0.9850089707961749, "learning_rate": 2.4824868651488618e-05, "loss": 0.8278, "step": 1701 }, { "epoch": 0.04969197979621033, "grad_norm": 0.7704935342674122, "learning_rate": 2.4839462930531234e-05, "loss": 0.6809, "step": 1702 }, { "epoch": 0.04972117602405769, "grad_norm": 0.7257394165257678, "learning_rate": 2.4854057209573846e-05, "loss": 0.6723, "step": 1703 }, { "epoch": 0.04975037225190505, "grad_norm": 0.7264968837214926, "learning_rate": 2.4868651488616462e-05, "loss": 0.6416, "step": 1704 }, { "epoch": 0.049779568479752415, "grad_norm": 0.9744829818645709, "learning_rate": 2.488324576765908e-05, "loss": 0.6487, "step": 1705 }, { "epoch": 0.049808764707599776, "grad_norm": 0.7651289021334503, "learning_rate": 2.4897840046701697e-05, "loss": 0.7353, "step": 1706 }, { "epoch": 0.04983796093544714, "grad_norm": 1.0220564509694399, "learning_rate": 2.491243432574431e-05, "loss": 0.6739, "step": 1707 }, { "epoch": 0.049867157163294505, "grad_norm": 0.8838829668995859, "learning_rate": 2.4927028604786925e-05, "loss": 0.6062, "step": 1708 }, { "epoch": 0.049896353391141866, "grad_norm": 0.8251925687436024, "learning_rate": 2.494162288382954e-05, "loss": 0.7338, "step": 1709 }, { "epoch": 0.04992554961898923, "grad_norm": 0.7849884596246043, "learning_rate": 2.4956217162872157e-05, "loss": 0.7543, "step": 1710 }, { "epoch": 0.04995474584683659, "grad_norm": 0.8108288213320466, "learning_rate": 2.497081144191477e-05, "loss": 0.8049, "step": 1711 }, { "epoch": 0.04998394207468395, "grad_norm": 0.8404666413692403, "learning_rate": 2.4985405720957385e-05, "loss": 0.8029, "step": 1712 }, { "epoch": 0.05001313830253131, "grad_norm": 0.7518566040819074, "learning_rate": 2.5e-05, "loss": 0.7053, "step": 1713 }, { "epoch": 0.05004233453037867, "grad_norm": 0.7942010771098467, "learning_rate": 2.501459427904262e-05, "loss": 0.7906, "step": 1714 }, { "epoch": 0.05007153075822604, "grad_norm": 0.7397620519359883, "learning_rate": 2.5029188558085233e-05, "loss": 0.6403, "step": 1715 }, { "epoch": 0.0501007269860734, "grad_norm": 0.804882095054776, "learning_rate": 2.504378283712785e-05, "loss": 0.745, "step": 1716 }, { "epoch": 0.05012992321392076, "grad_norm": 0.7963664602547542, "learning_rate": 2.505837711617046e-05, "loss": 0.7506, "step": 1717 }, { "epoch": 0.050159119441768124, "grad_norm": 0.7928651572598299, "learning_rate": 2.507297139521308e-05, "loss": 0.7014, "step": 1718 }, { "epoch": 0.050188315669615485, "grad_norm": 0.701594925167188, "learning_rate": 2.5087565674255693e-05, "loss": 0.6203, "step": 1719 }, { "epoch": 0.050217511897462846, "grad_norm": 0.8241240819870032, "learning_rate": 2.510215995329831e-05, "loss": 0.7924, "step": 1720 }, { "epoch": 0.05024670812531021, "grad_norm": 0.7117412194899709, "learning_rate": 2.511675423234092e-05, "loss": 0.6536, "step": 1721 }, { "epoch": 0.05027590435315757, "grad_norm": 0.693678070152923, "learning_rate": 2.513134851138354e-05, "loss": 0.5893, "step": 1722 }, { "epoch": 0.05030510058100494, "grad_norm": 0.7251452172609334, "learning_rate": 2.5145942790426153e-05, "loss": 0.6181, "step": 1723 }, { "epoch": 0.0503342968088523, "grad_norm": 0.7526142205993556, "learning_rate": 2.516053706946877e-05, "loss": 0.7238, "step": 1724 }, { "epoch": 0.05036349303669966, "grad_norm": 0.8424471550766902, "learning_rate": 2.517513134851138e-05, "loss": 0.7233, "step": 1725 }, { "epoch": 0.05039268926454702, "grad_norm": 0.7824291068287941, "learning_rate": 2.5189725627554e-05, "loss": 0.6864, "step": 1726 }, { "epoch": 0.05042188549239438, "grad_norm": 0.8487047053402982, "learning_rate": 2.5204319906596613e-05, "loss": 0.8031, "step": 1727 }, { "epoch": 0.05045108172024174, "grad_norm": 0.7516462324469217, "learning_rate": 2.5218914185639232e-05, "loss": 0.6746, "step": 1728 }, { "epoch": 0.050480277948089104, "grad_norm": 0.8032682921191049, "learning_rate": 2.5233508464681845e-05, "loss": 0.6962, "step": 1729 }, { "epoch": 0.05050947417593647, "grad_norm": 0.891288962168708, "learning_rate": 2.524810274372446e-05, "loss": 0.6782, "step": 1730 }, { "epoch": 0.050538670403783834, "grad_norm": 0.8090666096645017, "learning_rate": 2.526269702276708e-05, "loss": 0.7495, "step": 1731 }, { "epoch": 0.050567866631631195, "grad_norm": 0.6829764247010166, "learning_rate": 2.5277291301809692e-05, "loss": 0.6142, "step": 1732 }, { "epoch": 0.050597062859478556, "grad_norm": 0.7105043382124135, "learning_rate": 2.5291885580852308e-05, "loss": 0.675, "step": 1733 }, { "epoch": 0.05062625908732592, "grad_norm": 0.8507934417900166, "learning_rate": 2.530647985989492e-05, "loss": 0.739, "step": 1734 }, { "epoch": 0.05065545531517328, "grad_norm": 0.970669379676724, "learning_rate": 2.532107413893754e-05, "loss": 0.667, "step": 1735 }, { "epoch": 0.05068465154302064, "grad_norm": 0.7539990660236454, "learning_rate": 2.5335668417980152e-05, "loss": 0.6586, "step": 1736 }, { "epoch": 0.050713847770868, "grad_norm": 0.7558170180380256, "learning_rate": 2.5350262697022768e-05, "loss": 0.6779, "step": 1737 }, { "epoch": 0.05074304399871537, "grad_norm": 1.1280280639927303, "learning_rate": 2.536485697606538e-05, "loss": 0.7731, "step": 1738 }, { "epoch": 0.05077224022656273, "grad_norm": 0.8782727723911972, "learning_rate": 2.5379451255108e-05, "loss": 0.9038, "step": 1739 }, { "epoch": 0.05080143645441009, "grad_norm": 0.7551168174871161, "learning_rate": 2.5394045534150612e-05, "loss": 0.6797, "step": 1740 }, { "epoch": 0.05083063268225745, "grad_norm": 0.8721845294953521, "learning_rate": 2.540863981319323e-05, "loss": 0.7814, "step": 1741 }, { "epoch": 0.050859828910104814, "grad_norm": 0.7120498790718649, "learning_rate": 2.5423234092235844e-05, "loss": 0.6134, "step": 1742 }, { "epoch": 0.050889025137952175, "grad_norm": 0.7550991603945031, "learning_rate": 2.543782837127846e-05, "loss": 0.6472, "step": 1743 }, { "epoch": 0.050918221365799536, "grad_norm": 0.8823580387022016, "learning_rate": 2.5452422650321072e-05, "loss": 0.7686, "step": 1744 }, { "epoch": 0.050947417593646904, "grad_norm": 0.80890724118604, "learning_rate": 2.546701692936369e-05, "loss": 0.7283, "step": 1745 }, { "epoch": 0.050976613821494265, "grad_norm": 0.8738017282275918, "learning_rate": 2.5481611208406307e-05, "loss": 0.8078, "step": 1746 }, { "epoch": 0.05100581004934163, "grad_norm": 0.8025626393431475, "learning_rate": 2.549620548744892e-05, "loss": 0.7383, "step": 1747 }, { "epoch": 0.05103500627718899, "grad_norm": 0.7074225714423528, "learning_rate": 2.551079976649154e-05, "loss": 0.6637, "step": 1748 }, { "epoch": 0.05106420250503635, "grad_norm": 0.7633535644068823, "learning_rate": 2.552539404553415e-05, "loss": 0.6712, "step": 1749 }, { "epoch": 0.05109339873288371, "grad_norm": 0.7990724219529296, "learning_rate": 2.5539988324576767e-05, "loss": 0.6096, "step": 1750 }, { "epoch": 0.05112259496073107, "grad_norm": 0.7388954995438506, "learning_rate": 2.555458260361938e-05, "loss": 0.7035, "step": 1751 }, { "epoch": 0.05115179118857843, "grad_norm": 0.7860639288856924, "learning_rate": 2.5569176882662e-05, "loss": 0.7412, "step": 1752 }, { "epoch": 0.0511809874164258, "grad_norm": 0.804218562183381, "learning_rate": 2.558377116170461e-05, "loss": 0.7258, "step": 1753 }, { "epoch": 0.05121018364427316, "grad_norm": 0.7599191516310495, "learning_rate": 2.559836544074723e-05, "loss": 0.7369, "step": 1754 }, { "epoch": 0.05123937987212052, "grad_norm": 0.8490794537454329, "learning_rate": 2.5612959719789843e-05, "loss": 0.6652, "step": 1755 }, { "epoch": 0.051268576099967884, "grad_norm": 0.7226465615303291, "learning_rate": 2.562755399883246e-05, "loss": 0.6936, "step": 1756 }, { "epoch": 0.051297772327815246, "grad_norm": 1.0151875061640783, "learning_rate": 2.564214827787507e-05, "loss": 0.7591, "step": 1757 }, { "epoch": 0.05132696855566261, "grad_norm": 0.8490411311211515, "learning_rate": 2.565674255691769e-05, "loss": 0.7788, "step": 1758 }, { "epoch": 0.05135616478350997, "grad_norm": 0.793805413153203, "learning_rate": 2.5671336835960303e-05, "loss": 0.7238, "step": 1759 }, { "epoch": 0.05138536101135733, "grad_norm": 0.7538272688250334, "learning_rate": 2.568593111500292e-05, "loss": 0.6499, "step": 1760 }, { "epoch": 0.0514145572392047, "grad_norm": 0.7325234254128535, "learning_rate": 2.570052539404553e-05, "loss": 0.6718, "step": 1761 }, { "epoch": 0.05144375346705206, "grad_norm": 0.8315908976773005, "learning_rate": 2.571511967308815e-05, "loss": 0.7833, "step": 1762 }, { "epoch": 0.05147294969489942, "grad_norm": 0.782529954370645, "learning_rate": 2.5729713952130767e-05, "loss": 0.7444, "step": 1763 }, { "epoch": 0.05150214592274678, "grad_norm": 0.7832032065949176, "learning_rate": 2.574430823117338e-05, "loss": 0.7297, "step": 1764 }, { "epoch": 0.05153134215059414, "grad_norm": 0.8015129478507096, "learning_rate": 2.5758902510216e-05, "loss": 0.6993, "step": 1765 }, { "epoch": 0.0515605383784415, "grad_norm": 0.8966439525927067, "learning_rate": 2.577349678925861e-05, "loss": 0.7051, "step": 1766 }, { "epoch": 0.051589734606288865, "grad_norm": 0.9165514927561766, "learning_rate": 2.578809106830123e-05, "loss": 0.7603, "step": 1767 }, { "epoch": 0.05161893083413623, "grad_norm": 0.8987153057570527, "learning_rate": 2.5802685347343843e-05, "loss": 0.6852, "step": 1768 }, { "epoch": 0.051648127061983594, "grad_norm": 0.9106955418878165, "learning_rate": 2.581727962638646e-05, "loss": 0.8431, "step": 1769 }, { "epoch": 0.051677323289830955, "grad_norm": 0.8183685933705803, "learning_rate": 2.583187390542907e-05, "loss": 0.7243, "step": 1770 }, { "epoch": 0.051706519517678316, "grad_norm": 0.7748260088351872, "learning_rate": 2.584646818447169e-05, "loss": 0.7776, "step": 1771 }, { "epoch": 0.05173571574552568, "grad_norm": 0.7698080197986391, "learning_rate": 2.5861062463514303e-05, "loss": 0.7122, "step": 1772 }, { "epoch": 0.05176491197337304, "grad_norm": 1.3122112688957406, "learning_rate": 2.587565674255692e-05, "loss": 0.7312, "step": 1773 }, { "epoch": 0.0517941082012204, "grad_norm": 1.3651362999407943, "learning_rate": 2.589025102159953e-05, "loss": 0.7138, "step": 1774 }, { "epoch": 0.05182330442906776, "grad_norm": 0.7404116566388953, "learning_rate": 2.590484530064215e-05, "loss": 0.6432, "step": 1775 }, { "epoch": 0.05185250065691513, "grad_norm": 0.7156088944964425, "learning_rate": 2.5919439579684763e-05, "loss": 0.6544, "step": 1776 }, { "epoch": 0.05188169688476249, "grad_norm": 0.7347081590792462, "learning_rate": 2.593403385872738e-05, "loss": 0.6681, "step": 1777 }, { "epoch": 0.05191089311260985, "grad_norm": 0.8133626699343951, "learning_rate": 2.5948628137769998e-05, "loss": 0.6919, "step": 1778 }, { "epoch": 0.05194008934045721, "grad_norm": 0.728520684031072, "learning_rate": 2.596322241681261e-05, "loss": 0.7573, "step": 1779 }, { "epoch": 0.051969285568304574, "grad_norm": 0.8106185857461241, "learning_rate": 2.597781669585523e-05, "loss": 0.7435, "step": 1780 }, { "epoch": 0.051998481796151935, "grad_norm": 0.7891346225350205, "learning_rate": 2.5992410974897842e-05, "loss": 0.7348, "step": 1781 }, { "epoch": 0.052027678023999296, "grad_norm": 0.7731865596560481, "learning_rate": 2.6007005253940458e-05, "loss": 0.7497, "step": 1782 }, { "epoch": 0.052056874251846665, "grad_norm": 0.756091112821642, "learning_rate": 2.602159953298307e-05, "loss": 0.7447, "step": 1783 }, { "epoch": 0.052086070479694026, "grad_norm": 0.8054512333859641, "learning_rate": 2.603619381202569e-05, "loss": 0.7673, "step": 1784 }, { "epoch": 0.05211526670754139, "grad_norm": 0.7459867212569374, "learning_rate": 2.6050788091068302e-05, "loss": 0.7418, "step": 1785 }, { "epoch": 0.05214446293538875, "grad_norm": 1.0170797980744237, "learning_rate": 2.6065382370110918e-05, "loss": 0.7863, "step": 1786 }, { "epoch": 0.05217365916323611, "grad_norm": 0.7654318785922012, "learning_rate": 2.607997664915353e-05, "loss": 0.7264, "step": 1787 }, { "epoch": 0.05220285539108347, "grad_norm": 0.8212332750583072, "learning_rate": 2.609457092819615e-05, "loss": 0.7458, "step": 1788 }, { "epoch": 0.05223205161893083, "grad_norm": 0.7335628179519343, "learning_rate": 2.6109165207238762e-05, "loss": 0.6922, "step": 1789 }, { "epoch": 0.05226124784677819, "grad_norm": 0.8407793925365287, "learning_rate": 2.6123759486281378e-05, "loss": 0.7643, "step": 1790 }, { "epoch": 0.05229044407462556, "grad_norm": 0.8960123600707761, "learning_rate": 2.613835376532399e-05, "loss": 0.7699, "step": 1791 }, { "epoch": 0.05231964030247292, "grad_norm": 0.7595670610143177, "learning_rate": 2.615294804436661e-05, "loss": 0.7283, "step": 1792 }, { "epoch": 0.052348836530320283, "grad_norm": 0.7261651056223113, "learning_rate": 2.6167542323409222e-05, "loss": 0.6319, "step": 1793 }, { "epoch": 0.052378032758167645, "grad_norm": 0.7648761227180091, "learning_rate": 2.618213660245184e-05, "loss": 0.7381, "step": 1794 }, { "epoch": 0.052407228986015006, "grad_norm": 0.8387424819907475, "learning_rate": 2.6196730881494457e-05, "loss": 0.6668, "step": 1795 }, { "epoch": 0.05243642521386237, "grad_norm": 0.7418175566665847, "learning_rate": 2.621132516053707e-05, "loss": 0.6392, "step": 1796 }, { "epoch": 0.05246562144170973, "grad_norm": 0.7985399888571335, "learning_rate": 2.622591943957969e-05, "loss": 0.7411, "step": 1797 }, { "epoch": 0.052494817669557096, "grad_norm": 0.8348130068379453, "learning_rate": 2.62405137186223e-05, "loss": 0.7317, "step": 1798 }, { "epoch": 0.05252401389740446, "grad_norm": 0.7139088239181827, "learning_rate": 2.6255107997664917e-05, "loss": 0.6884, "step": 1799 }, { "epoch": 0.05255321012525182, "grad_norm": 0.7136020323160215, "learning_rate": 2.626970227670753e-05, "loss": 0.6896, "step": 1800 }, { "epoch": 0.05258240635309918, "grad_norm": 0.7955757330542688, "learning_rate": 2.628429655575015e-05, "loss": 0.7495, "step": 1801 }, { "epoch": 0.05261160258094654, "grad_norm": 1.99066006139599, "learning_rate": 2.629889083479276e-05, "loss": 0.7089, "step": 1802 }, { "epoch": 0.0526407988087939, "grad_norm": 0.755267633697472, "learning_rate": 2.6313485113835377e-05, "loss": 0.7271, "step": 1803 }, { "epoch": 0.052669995036641264, "grad_norm": 0.7152119152818853, "learning_rate": 2.632807939287799e-05, "loss": 0.6984, "step": 1804 }, { "epoch": 0.052699191264488625, "grad_norm": 0.6968073969032005, "learning_rate": 2.634267367192061e-05, "loss": 0.6464, "step": 1805 }, { "epoch": 0.05272838749233599, "grad_norm": 0.7634824537003267, "learning_rate": 2.635726795096322e-05, "loss": 0.7039, "step": 1806 }, { "epoch": 0.052757583720183354, "grad_norm": 0.7728725213802075, "learning_rate": 2.637186223000584e-05, "loss": 0.7299, "step": 1807 }, { "epoch": 0.052786779948030715, "grad_norm": 0.6964952382614318, "learning_rate": 2.6386456509048453e-05, "loss": 0.648, "step": 1808 }, { "epoch": 0.05281597617587808, "grad_norm": 0.766183705444211, "learning_rate": 2.640105078809107e-05, "loss": 0.7588, "step": 1809 }, { "epoch": 0.05284517240372544, "grad_norm": 1.771776911014181, "learning_rate": 2.6415645067133688e-05, "loss": 0.6404, "step": 1810 }, { "epoch": 0.0528743686315728, "grad_norm": 0.7802399197112299, "learning_rate": 2.64302393461763e-05, "loss": 0.7509, "step": 1811 }, { "epoch": 0.05290356485942016, "grad_norm": 0.8373986994859132, "learning_rate": 2.6444833625218916e-05, "loss": 0.8175, "step": 1812 }, { "epoch": 0.05293276108726753, "grad_norm": 0.873748022931118, "learning_rate": 2.645942790426153e-05, "loss": 0.7743, "step": 1813 }, { "epoch": 0.05296195731511489, "grad_norm": 0.729845272571415, "learning_rate": 2.6474022183304148e-05, "loss": 0.6425, "step": 1814 }, { "epoch": 0.05299115354296225, "grad_norm": 0.7656604470788858, "learning_rate": 2.648861646234676e-05, "loss": 0.7446, "step": 1815 }, { "epoch": 0.05302034977080961, "grad_norm": 0.7702614121351046, "learning_rate": 2.650321074138938e-05, "loss": 0.7859, "step": 1816 }, { "epoch": 0.05304954599865697, "grad_norm": 0.7053766134640164, "learning_rate": 2.651780502043199e-05, "loss": 0.6263, "step": 1817 }, { "epoch": 0.053078742226504334, "grad_norm": 0.704156058828942, "learning_rate": 2.6532399299474608e-05, "loss": 0.6447, "step": 1818 }, { "epoch": 0.053107938454351696, "grad_norm": 0.9604879697931715, "learning_rate": 2.654699357851722e-05, "loss": 0.7527, "step": 1819 }, { "epoch": 0.05313713468219906, "grad_norm": 0.7485651527863578, "learning_rate": 2.656158785755984e-05, "loss": 0.7621, "step": 1820 }, { "epoch": 0.053166330910046425, "grad_norm": 0.6396185766255882, "learning_rate": 2.6576182136602452e-05, "loss": 0.5775, "step": 1821 }, { "epoch": 0.053195527137893786, "grad_norm": 0.7411128354855943, "learning_rate": 2.6590776415645068e-05, "loss": 0.6611, "step": 1822 }, { "epoch": 0.05322472336574115, "grad_norm": 0.7803411651399573, "learning_rate": 2.660537069468768e-05, "loss": 0.7738, "step": 1823 }, { "epoch": 0.05325391959358851, "grad_norm": 0.7566024600429612, "learning_rate": 2.66199649737303e-05, "loss": 0.7639, "step": 1824 }, { "epoch": 0.05328311582143587, "grad_norm": 0.7257971244203006, "learning_rate": 2.6634559252772912e-05, "loss": 0.6927, "step": 1825 }, { "epoch": 0.05331231204928323, "grad_norm": 0.7493589677301807, "learning_rate": 2.6649153531815528e-05, "loss": 0.7434, "step": 1826 }, { "epoch": 0.05334150827713059, "grad_norm": 0.70628797818255, "learning_rate": 2.6663747810858147e-05, "loss": 0.6656, "step": 1827 }, { "epoch": 0.05337070450497796, "grad_norm": 0.8947906482191726, "learning_rate": 2.667834208990076e-05, "loss": 0.7735, "step": 1828 }, { "epoch": 0.05339990073282532, "grad_norm": 0.6988164732804203, "learning_rate": 2.669293636894338e-05, "loss": 0.6338, "step": 1829 }, { "epoch": 0.05342909696067268, "grad_norm": 0.7813702180471842, "learning_rate": 2.6707530647985988e-05, "loss": 0.7314, "step": 1830 }, { "epoch": 0.053458293188520044, "grad_norm": 0.7063636476406036, "learning_rate": 2.6722124927028608e-05, "loss": 0.6653, "step": 1831 }, { "epoch": 0.053487489416367405, "grad_norm": 0.7588603497748079, "learning_rate": 2.673671920607122e-05, "loss": 0.5975, "step": 1832 }, { "epoch": 0.053516685644214766, "grad_norm": 0.8725761605568159, "learning_rate": 2.675131348511384e-05, "loss": 0.6899, "step": 1833 }, { "epoch": 0.05354588187206213, "grad_norm": 0.7571079281942519, "learning_rate": 2.676590776415645e-05, "loss": 0.7215, "step": 1834 }, { "epoch": 0.05357507809990949, "grad_norm": 0.7558115634543157, "learning_rate": 2.6780502043199068e-05, "loss": 0.6744, "step": 1835 }, { "epoch": 0.05360427432775686, "grad_norm": 0.8116082597401044, "learning_rate": 2.679509632224168e-05, "loss": 0.8601, "step": 1836 }, { "epoch": 0.05363347055560422, "grad_norm": 0.8065491873345662, "learning_rate": 2.68096906012843e-05, "loss": 0.6475, "step": 1837 }, { "epoch": 0.05366266678345158, "grad_norm": 0.7654748292899181, "learning_rate": 2.6824284880326912e-05, "loss": 0.6631, "step": 1838 }, { "epoch": 0.05369186301129894, "grad_norm": 0.7813927840739707, "learning_rate": 2.6838879159369528e-05, "loss": 0.7025, "step": 1839 }, { "epoch": 0.0537210592391463, "grad_norm": 0.8060094136135951, "learning_rate": 2.685347343841214e-05, "loss": 0.7219, "step": 1840 }, { "epoch": 0.05375025546699366, "grad_norm": 0.7013013678958799, "learning_rate": 2.686806771745476e-05, "loss": 0.6679, "step": 1841 }, { "epoch": 0.053779451694841024, "grad_norm": 0.7227775520324192, "learning_rate": 2.688266199649738e-05, "loss": 0.6379, "step": 1842 }, { "epoch": 0.05380864792268839, "grad_norm": 0.8422530924483087, "learning_rate": 2.689725627553999e-05, "loss": 0.7853, "step": 1843 }, { "epoch": 0.05383784415053575, "grad_norm": 0.7394839589173713, "learning_rate": 2.6911850554582607e-05, "loss": 0.693, "step": 1844 }, { "epoch": 0.053867040378383114, "grad_norm": 0.8605647045234947, "learning_rate": 2.692644483362522e-05, "loss": 0.7521, "step": 1845 }, { "epoch": 0.053896236606230476, "grad_norm": 0.6885488414842639, "learning_rate": 2.694103911266784e-05, "loss": 0.6658, "step": 1846 }, { "epoch": 0.05392543283407784, "grad_norm": 0.8604484978281558, "learning_rate": 2.695563339171045e-05, "loss": 0.804, "step": 1847 }, { "epoch": 0.0539546290619252, "grad_norm": 0.7752355656494109, "learning_rate": 2.6970227670753067e-05, "loss": 0.6646, "step": 1848 }, { "epoch": 0.05398382528977256, "grad_norm": 0.9096811138278169, "learning_rate": 2.698482194979568e-05, "loss": 0.7464, "step": 1849 }, { "epoch": 0.05401302151761992, "grad_norm": 0.7459233772888204, "learning_rate": 2.69994162288383e-05, "loss": 0.7392, "step": 1850 }, { "epoch": 0.05404221774546729, "grad_norm": 0.743315070816246, "learning_rate": 2.701401050788091e-05, "loss": 0.7496, "step": 1851 }, { "epoch": 0.05407141397331465, "grad_norm": 0.7870250932771127, "learning_rate": 2.7028604786923527e-05, "loss": 0.689, "step": 1852 }, { "epoch": 0.05410061020116201, "grad_norm": 0.7720886792692762, "learning_rate": 2.704319906596614e-05, "loss": 0.7444, "step": 1853 }, { "epoch": 0.05412980642900937, "grad_norm": 0.7119147029921538, "learning_rate": 2.705779334500876e-05, "loss": 0.6706, "step": 1854 }, { "epoch": 0.05415900265685673, "grad_norm": 0.7279609100064899, "learning_rate": 2.707238762405137e-05, "loss": 0.6456, "step": 1855 }, { "epoch": 0.054188198884704095, "grad_norm": 0.6935457551766754, "learning_rate": 2.708698190309399e-05, "loss": 0.6225, "step": 1856 }, { "epoch": 0.054217395112551456, "grad_norm": 0.7934839346056517, "learning_rate": 2.71015761821366e-05, "loss": 0.7807, "step": 1857 }, { "epoch": 0.05424659134039882, "grad_norm": 0.751176827379782, "learning_rate": 2.711617046117922e-05, "loss": 0.6792, "step": 1858 }, { "epoch": 0.054275787568246185, "grad_norm": 0.7862583988514615, "learning_rate": 2.7130764740221838e-05, "loss": 0.7691, "step": 1859 }, { "epoch": 0.054304983796093546, "grad_norm": 0.8285476065690418, "learning_rate": 2.714535901926445e-05, "loss": 0.7623, "step": 1860 }, { "epoch": 0.05433418002394091, "grad_norm": 0.6907060246643495, "learning_rate": 2.7159953298307066e-05, "loss": 0.6116, "step": 1861 }, { "epoch": 0.05436337625178827, "grad_norm": 0.72757770015939, "learning_rate": 2.717454757734968e-05, "loss": 0.706, "step": 1862 }, { "epoch": 0.05439257247963563, "grad_norm": 1.412686233917281, "learning_rate": 2.7189141856392298e-05, "loss": 0.7303, "step": 1863 }, { "epoch": 0.05442176870748299, "grad_norm": 0.7333070546258905, "learning_rate": 2.720373613543491e-05, "loss": 0.6845, "step": 1864 }, { "epoch": 0.05445096493533035, "grad_norm": 0.8734106096742907, "learning_rate": 2.7218330414477526e-05, "loss": 0.7311, "step": 1865 }, { "epoch": 0.05448016116317772, "grad_norm": 2.0851815049279905, "learning_rate": 2.723292469352014e-05, "loss": 0.7345, "step": 1866 }, { "epoch": 0.05450935739102508, "grad_norm": 0.8454920623442774, "learning_rate": 2.7247518972562758e-05, "loss": 0.7975, "step": 1867 }, { "epoch": 0.05453855361887244, "grad_norm": 0.7559319989797078, "learning_rate": 2.726211325160537e-05, "loss": 0.7411, "step": 1868 }, { "epoch": 0.054567749846719804, "grad_norm": 0.7795864193762686, "learning_rate": 2.727670753064799e-05, "loss": 0.7684, "step": 1869 }, { "epoch": 0.054596946074567165, "grad_norm": 0.6899644834240815, "learning_rate": 2.72913018096906e-05, "loss": 0.63, "step": 1870 }, { "epoch": 0.054626142302414527, "grad_norm": 0.8179943596747299, "learning_rate": 2.7305896088733218e-05, "loss": 0.7267, "step": 1871 }, { "epoch": 0.05465533853026189, "grad_norm": 0.7837865400597009, "learning_rate": 2.732049036777583e-05, "loss": 0.758, "step": 1872 }, { "epoch": 0.05468453475810925, "grad_norm": 0.8288063702014334, "learning_rate": 2.733508464681845e-05, "loss": 0.7659, "step": 1873 }, { "epoch": 0.05471373098595662, "grad_norm": 0.7132947521464938, "learning_rate": 2.7349678925861065e-05, "loss": 0.7059, "step": 1874 }, { "epoch": 0.05474292721380398, "grad_norm": 0.6587339903362783, "learning_rate": 2.7364273204903678e-05, "loss": 0.6015, "step": 1875 }, { "epoch": 0.05477212344165134, "grad_norm": 0.799324642233701, "learning_rate": 2.7378867483946297e-05, "loss": 0.7061, "step": 1876 }, { "epoch": 0.0548013196694987, "grad_norm": 0.765377777544823, "learning_rate": 2.739346176298891e-05, "loss": 0.7237, "step": 1877 }, { "epoch": 0.05483051589734606, "grad_norm": 0.7419865970535152, "learning_rate": 2.7408056042031526e-05, "loss": 0.7409, "step": 1878 }, { "epoch": 0.05485971212519342, "grad_norm": 0.8031415130267082, "learning_rate": 2.7422650321074138e-05, "loss": 0.7268, "step": 1879 }, { "epoch": 0.054888908353040784, "grad_norm": 0.7894601064817182, "learning_rate": 2.7437244600116757e-05, "loss": 0.7622, "step": 1880 }, { "epoch": 0.05491810458088815, "grad_norm": 0.7781787445031108, "learning_rate": 2.745183887915937e-05, "loss": 0.6889, "step": 1881 }, { "epoch": 0.054947300808735514, "grad_norm": 0.7623423748441147, "learning_rate": 2.746643315820199e-05, "loss": 0.7653, "step": 1882 }, { "epoch": 0.054976497036582875, "grad_norm": 0.7586821006657745, "learning_rate": 2.74810274372446e-05, "loss": 0.7793, "step": 1883 }, { "epoch": 0.055005693264430236, "grad_norm": 0.8841286853181606, "learning_rate": 2.7495621716287217e-05, "loss": 0.7053, "step": 1884 }, { "epoch": 0.0550348894922776, "grad_norm": 0.6900905546127452, "learning_rate": 2.751021599532983e-05, "loss": 0.6538, "step": 1885 }, { "epoch": 0.05506408572012496, "grad_norm": 1.290607751926574, "learning_rate": 2.752481027437245e-05, "loss": 0.7014, "step": 1886 }, { "epoch": 0.05509328194797232, "grad_norm": 0.8217801980714736, "learning_rate": 2.753940455341506e-05, "loss": 0.7379, "step": 1887 }, { "epoch": 0.05512247817581968, "grad_norm": 0.7765813142771542, "learning_rate": 2.7553998832457677e-05, "loss": 0.7459, "step": 1888 }, { "epoch": 0.05515167440366705, "grad_norm": 0.8650320484871414, "learning_rate": 2.756859311150029e-05, "loss": 0.7646, "step": 1889 }, { "epoch": 0.05518087063151441, "grad_norm": 0.7134610122623136, "learning_rate": 2.758318739054291e-05, "loss": 0.6587, "step": 1890 }, { "epoch": 0.05521006685936177, "grad_norm": 0.7546129228780909, "learning_rate": 2.7597781669585525e-05, "loss": 0.6607, "step": 1891 }, { "epoch": 0.05523926308720913, "grad_norm": 0.7631538851373664, "learning_rate": 2.7612375948628137e-05, "loss": 0.6633, "step": 1892 }, { "epoch": 0.055268459315056494, "grad_norm": 0.7974312478919802, "learning_rate": 2.7626970227670757e-05, "loss": 0.6941, "step": 1893 }, { "epoch": 0.055297655542903855, "grad_norm": 0.8374130931005153, "learning_rate": 2.764156450671337e-05, "loss": 0.7137, "step": 1894 }, { "epoch": 0.055326851770751216, "grad_norm": 0.6775221364683336, "learning_rate": 2.7656158785755988e-05, "loss": 0.5826, "step": 1895 }, { "epoch": 0.055356047998598584, "grad_norm": 0.7167706549881195, "learning_rate": 2.76707530647986e-05, "loss": 0.6715, "step": 1896 }, { "epoch": 0.055385244226445945, "grad_norm": 0.6826645306132688, "learning_rate": 2.7685347343841217e-05, "loss": 0.6118, "step": 1897 }, { "epoch": 0.05541444045429331, "grad_norm": 0.6951962077037099, "learning_rate": 2.769994162288383e-05, "loss": 0.667, "step": 1898 }, { "epoch": 0.05544363668214067, "grad_norm": 0.7822956436027969, "learning_rate": 2.7714535901926448e-05, "loss": 0.6768, "step": 1899 }, { "epoch": 0.05547283290998803, "grad_norm": 0.7206994472851659, "learning_rate": 2.772913018096906e-05, "loss": 0.7035, "step": 1900 }, { "epoch": 0.05550202913783539, "grad_norm": 0.7734476182517556, "learning_rate": 2.7743724460011677e-05, "loss": 0.7549, "step": 1901 }, { "epoch": 0.05553122536568275, "grad_norm": 0.777499197834971, "learning_rate": 2.775831873905429e-05, "loss": 0.6801, "step": 1902 }, { "epoch": 0.05556042159353011, "grad_norm": 0.8300423066007219, "learning_rate": 2.7772913018096908e-05, "loss": 0.7773, "step": 1903 }, { "epoch": 0.05558961782137748, "grad_norm": 0.7818571168855057, "learning_rate": 2.778750729713952e-05, "loss": 0.644, "step": 1904 }, { "epoch": 0.05561881404922484, "grad_norm": 0.7460219294306307, "learning_rate": 2.7802101576182137e-05, "loss": 0.7068, "step": 1905 }, { "epoch": 0.0556480102770722, "grad_norm": 0.8249550679368017, "learning_rate": 2.7816695855224756e-05, "loss": 0.7474, "step": 1906 }, { "epoch": 0.055677206504919564, "grad_norm": 0.7658078577386455, "learning_rate": 2.783129013426737e-05, "loss": 0.7157, "step": 1907 }, { "epoch": 0.055706402732766926, "grad_norm": 0.7570273469472032, "learning_rate": 2.7845884413309988e-05, "loss": 0.6673, "step": 1908 }, { "epoch": 0.05573559896061429, "grad_norm": 0.7854006520993765, "learning_rate": 2.78604786923526e-05, "loss": 0.7412, "step": 1909 }, { "epoch": 0.05576479518846165, "grad_norm": 0.7640032828908364, "learning_rate": 2.7875072971395216e-05, "loss": 0.6744, "step": 1910 }, { "epoch": 0.055793991416309016, "grad_norm": 0.7088991312717514, "learning_rate": 2.788966725043783e-05, "loss": 0.6378, "step": 1911 }, { "epoch": 0.05582318764415638, "grad_norm": 0.8666213416028352, "learning_rate": 2.7904261529480448e-05, "loss": 0.7937, "step": 1912 }, { "epoch": 0.05585238387200374, "grad_norm": 0.7126677097131868, "learning_rate": 2.791885580852306e-05, "loss": 0.6732, "step": 1913 }, { "epoch": 0.0558815800998511, "grad_norm": 0.77586699212342, "learning_rate": 2.7933450087565676e-05, "loss": 0.624, "step": 1914 }, { "epoch": 0.05591077632769846, "grad_norm": 0.8051519927640121, "learning_rate": 2.794804436660829e-05, "loss": 0.7449, "step": 1915 }, { "epoch": 0.05593997255554582, "grad_norm": 0.776231928019395, "learning_rate": 2.7962638645650908e-05, "loss": 0.7352, "step": 1916 }, { "epoch": 0.05596916878339318, "grad_norm": 0.7520168667103575, "learning_rate": 2.797723292469352e-05, "loss": 0.734, "step": 1917 }, { "epoch": 0.055998365011240545, "grad_norm": 1.058358754231635, "learning_rate": 2.7991827203736136e-05, "loss": 0.7252, "step": 1918 }, { "epoch": 0.05602756123908791, "grad_norm": 0.7354817452978049, "learning_rate": 2.800642148277875e-05, "loss": 0.6847, "step": 1919 }, { "epoch": 0.056056757466935274, "grad_norm": 0.7209387955625045, "learning_rate": 2.8021015761821368e-05, "loss": 0.659, "step": 1920 }, { "epoch": 0.056085953694782635, "grad_norm": 0.8651641613758695, "learning_rate": 2.803561004086398e-05, "loss": 0.6931, "step": 1921 }, { "epoch": 0.056115149922629996, "grad_norm": 0.8391139289884507, "learning_rate": 2.80502043199066e-05, "loss": 0.8077, "step": 1922 }, { "epoch": 0.05614434615047736, "grad_norm": 0.7603094197042523, "learning_rate": 2.8064798598949215e-05, "loss": 0.7098, "step": 1923 }, { "epoch": 0.05617354237832472, "grad_norm": 0.7027500648866891, "learning_rate": 2.8079392877991828e-05, "loss": 0.6319, "step": 1924 }, { "epoch": 0.05620273860617208, "grad_norm": 0.7992053035789057, "learning_rate": 2.8093987157034447e-05, "loss": 0.6844, "step": 1925 }, { "epoch": 0.05623193483401945, "grad_norm": 0.7712628887480605, "learning_rate": 2.810858143607706e-05, "loss": 0.6962, "step": 1926 }, { "epoch": 0.05626113106186681, "grad_norm": 0.6855290174127623, "learning_rate": 2.8123175715119675e-05, "loss": 0.686, "step": 1927 }, { "epoch": 0.05629032728971417, "grad_norm": 0.750422156356603, "learning_rate": 2.8137769994162288e-05, "loss": 0.7575, "step": 1928 }, { "epoch": 0.05631952351756153, "grad_norm": 0.7698344899901517, "learning_rate": 2.8152364273204907e-05, "loss": 0.6723, "step": 1929 }, { "epoch": 0.05634871974540889, "grad_norm": 0.817407810493688, "learning_rate": 2.816695855224752e-05, "loss": 0.7482, "step": 1930 }, { "epoch": 0.056377915973256254, "grad_norm": 0.775448411409861, "learning_rate": 2.8181552831290135e-05, "loss": 0.7348, "step": 1931 }, { "epoch": 0.056407112201103615, "grad_norm": 0.6946403066095214, "learning_rate": 2.8196147110332748e-05, "loss": 0.6541, "step": 1932 }, { "epoch": 0.056436308428950976, "grad_norm": 0.7345948842153835, "learning_rate": 2.8210741389375367e-05, "loss": 0.66, "step": 1933 }, { "epoch": 0.056465504656798345, "grad_norm": 0.7301959077670072, "learning_rate": 2.822533566841798e-05, "loss": 0.68, "step": 1934 }, { "epoch": 0.056494700884645706, "grad_norm": 0.767477511739845, "learning_rate": 2.82399299474606e-05, "loss": 0.7533, "step": 1935 }, { "epoch": 0.05652389711249307, "grad_norm": 1.8983895029255269, "learning_rate": 2.825452422650321e-05, "loss": 0.8053, "step": 1936 }, { "epoch": 0.05655309334034043, "grad_norm": 1.2677211437843492, "learning_rate": 2.8269118505545827e-05, "loss": 0.806, "step": 1937 }, { "epoch": 0.05658228956818779, "grad_norm": 0.8191682555391289, "learning_rate": 2.8283712784588446e-05, "loss": 0.7928, "step": 1938 }, { "epoch": 0.05661148579603515, "grad_norm": 0.6847615916662546, "learning_rate": 2.829830706363106e-05, "loss": 0.6074, "step": 1939 }, { "epoch": 0.05664068202388251, "grad_norm": 0.7019127829772073, "learning_rate": 2.8312901342673675e-05, "loss": 0.641, "step": 1940 }, { "epoch": 0.05666987825172988, "grad_norm": 0.7997850833309983, "learning_rate": 2.8327495621716287e-05, "loss": 0.7293, "step": 1941 }, { "epoch": 0.05669907447957724, "grad_norm": 0.7337762951855334, "learning_rate": 2.8342089900758906e-05, "loss": 0.7144, "step": 1942 }, { "epoch": 0.0567282707074246, "grad_norm": 0.7931749136084795, "learning_rate": 2.835668417980152e-05, "loss": 0.6964, "step": 1943 }, { "epoch": 0.056757466935271964, "grad_norm": 0.7393264530809563, "learning_rate": 2.8371278458844135e-05, "loss": 0.7151, "step": 1944 }, { "epoch": 0.056786663163119325, "grad_norm": 0.7002699013492986, "learning_rate": 2.8385872737886747e-05, "loss": 0.6544, "step": 1945 }, { "epoch": 0.056815859390966686, "grad_norm": 1.3614661559831698, "learning_rate": 2.8400467016929366e-05, "loss": 0.7149, "step": 1946 }, { "epoch": 0.05684505561881405, "grad_norm": 0.6734209555414888, "learning_rate": 2.841506129597198e-05, "loss": 0.6075, "step": 1947 }, { "epoch": 0.05687425184666141, "grad_norm": 0.7390693965393167, "learning_rate": 2.8429655575014598e-05, "loss": 0.6732, "step": 1948 }, { "epoch": 0.056903448074508776, "grad_norm": 0.6590392462498675, "learning_rate": 2.844424985405721e-05, "loss": 0.5721, "step": 1949 }, { "epoch": 0.05693264430235614, "grad_norm": 0.643915618620154, "learning_rate": 2.8458844133099826e-05, "loss": 0.5673, "step": 1950 }, { "epoch": 0.0569618405302035, "grad_norm": 0.6921292595153324, "learning_rate": 2.847343841214244e-05, "loss": 0.6649, "step": 1951 }, { "epoch": 0.05699103675805086, "grad_norm": 0.9115585078569772, "learning_rate": 2.8488032691185058e-05, "loss": 0.7457, "step": 1952 }, { "epoch": 0.05702023298589822, "grad_norm": 0.7766394556153873, "learning_rate": 2.850262697022767e-05, "loss": 0.7622, "step": 1953 }, { "epoch": 0.05704942921374558, "grad_norm": 0.7303364372237896, "learning_rate": 2.8517221249270286e-05, "loss": 0.7073, "step": 1954 }, { "epoch": 0.057078625441592944, "grad_norm": 0.7233561419026103, "learning_rate": 2.8531815528312906e-05, "loss": 0.6988, "step": 1955 }, { "epoch": 0.057107821669440305, "grad_norm": 0.6907872224009045, "learning_rate": 2.8546409807355518e-05, "loss": 0.6088, "step": 1956 }, { "epoch": 0.05713701789728767, "grad_norm": 0.8026943709232978, "learning_rate": 2.8561004086398134e-05, "loss": 0.7282, "step": 1957 }, { "epoch": 0.057166214125135034, "grad_norm": 0.7521666964752283, "learning_rate": 2.8575598365440746e-05, "loss": 0.692, "step": 1958 }, { "epoch": 0.057195410352982395, "grad_norm": 0.7228911359127481, "learning_rate": 2.8590192644483366e-05, "loss": 0.7112, "step": 1959 }, { "epoch": 0.05722460658082976, "grad_norm": 0.6826114670614862, "learning_rate": 2.8604786923525978e-05, "loss": 0.7033, "step": 1960 }, { "epoch": 0.05725380280867712, "grad_norm": 0.6915640565232971, "learning_rate": 2.8619381202568597e-05, "loss": 0.6111, "step": 1961 }, { "epoch": 0.05728299903652448, "grad_norm": 0.7446660685181534, "learning_rate": 2.863397548161121e-05, "loss": 0.651, "step": 1962 }, { "epoch": 0.05731219526437184, "grad_norm": 0.7358632424592183, "learning_rate": 2.8648569760653826e-05, "loss": 0.7147, "step": 1963 }, { "epoch": 0.05734139149221921, "grad_norm": 0.7585155839415567, "learning_rate": 2.8663164039696438e-05, "loss": 0.773, "step": 1964 }, { "epoch": 0.05737058772006657, "grad_norm": 0.7468690318581026, "learning_rate": 2.8677758318739057e-05, "loss": 0.7414, "step": 1965 }, { "epoch": 0.05739978394791393, "grad_norm": 0.7635385151188576, "learning_rate": 2.869235259778167e-05, "loss": 0.7587, "step": 1966 }, { "epoch": 0.05742898017576129, "grad_norm": 0.7081854129558616, "learning_rate": 2.8706946876824286e-05, "loss": 0.6392, "step": 1967 }, { "epoch": 0.05745817640360865, "grad_norm": 0.8336477162659257, "learning_rate": 2.8721541155866898e-05, "loss": 0.7332, "step": 1968 }, { "epoch": 0.057487372631456014, "grad_norm": 0.7825233285929507, "learning_rate": 2.8736135434909517e-05, "loss": 0.7005, "step": 1969 }, { "epoch": 0.057516568859303376, "grad_norm": 0.7363608503155906, "learning_rate": 2.8750729713952133e-05, "loss": 0.6317, "step": 1970 }, { "epoch": 0.05754576508715074, "grad_norm": 0.8468713520883201, "learning_rate": 2.8765323992994746e-05, "loss": 0.7688, "step": 1971 }, { "epoch": 0.057574961314998105, "grad_norm": 0.7641154949798411, "learning_rate": 2.8779918272037365e-05, "loss": 0.6106, "step": 1972 }, { "epoch": 0.057604157542845466, "grad_norm": 0.7792861139715576, "learning_rate": 2.8794512551079977e-05, "loss": 0.7002, "step": 1973 }, { "epoch": 0.05763335377069283, "grad_norm": 0.7841458609744488, "learning_rate": 2.8809106830122597e-05, "loss": 0.7607, "step": 1974 }, { "epoch": 0.05766254999854019, "grad_norm": 0.8180002921683586, "learning_rate": 2.882370110916521e-05, "loss": 0.7251, "step": 1975 }, { "epoch": 0.05769174622638755, "grad_norm": 0.7382313910417808, "learning_rate": 2.8838295388207825e-05, "loss": 0.675, "step": 1976 }, { "epoch": 0.05772094245423491, "grad_norm": 0.7305788968785295, "learning_rate": 2.8852889667250437e-05, "loss": 0.6649, "step": 1977 }, { "epoch": 0.05775013868208227, "grad_norm": 0.7635715542860619, "learning_rate": 2.8867483946293057e-05, "loss": 0.6997, "step": 1978 }, { "epoch": 0.05777933490992964, "grad_norm": 0.7383079732159924, "learning_rate": 2.888207822533567e-05, "loss": 0.7498, "step": 1979 }, { "epoch": 0.057808531137777, "grad_norm": 0.8069310538403076, "learning_rate": 2.8896672504378285e-05, "loss": 0.7859, "step": 1980 }, { "epoch": 0.05783772736562436, "grad_norm": 0.6777773095544266, "learning_rate": 2.8911266783420897e-05, "loss": 0.6155, "step": 1981 }, { "epoch": 0.057866923593471724, "grad_norm": 0.7591682171370907, "learning_rate": 2.8925861062463517e-05, "loss": 0.6977, "step": 1982 }, { "epoch": 0.057896119821319085, "grad_norm": 0.8345507029285516, "learning_rate": 2.894045534150613e-05, "loss": 0.7856, "step": 1983 }, { "epoch": 0.057925316049166446, "grad_norm": 0.7205433100753799, "learning_rate": 2.8955049620548745e-05, "loss": 0.6809, "step": 1984 }, { "epoch": 0.05795451227701381, "grad_norm": 0.7410619691740737, "learning_rate": 2.8969643899591357e-05, "loss": 0.7101, "step": 1985 }, { "epoch": 0.05798370850486117, "grad_norm": 0.7534509581766418, "learning_rate": 2.8984238178633977e-05, "loss": 0.7521, "step": 1986 }, { "epoch": 0.05801290473270854, "grad_norm": 0.656282713580173, "learning_rate": 2.8998832457676596e-05, "loss": 0.5934, "step": 1987 }, { "epoch": 0.0580421009605559, "grad_norm": 0.7192878909488579, "learning_rate": 2.901342673671921e-05, "loss": 0.6289, "step": 1988 }, { "epoch": 0.05807129718840326, "grad_norm": 0.7387307059731915, "learning_rate": 2.9028021015761824e-05, "loss": 0.7, "step": 1989 }, { "epoch": 0.05810049341625062, "grad_norm": 0.7285598070418586, "learning_rate": 2.9042615294804437e-05, "loss": 0.7022, "step": 1990 }, { "epoch": 0.05812968964409798, "grad_norm": 0.893940100295026, "learning_rate": 2.9057209573847056e-05, "loss": 0.7181, "step": 1991 }, { "epoch": 0.05815888587194534, "grad_norm": 0.653105418823501, "learning_rate": 2.907180385288967e-05, "loss": 0.6018, "step": 1992 }, { "epoch": 0.058188082099792704, "grad_norm": 0.6944276471293273, "learning_rate": 2.9086398131932284e-05, "loss": 0.6321, "step": 1993 }, { "epoch": 0.05821727832764007, "grad_norm": 0.7525533828014082, "learning_rate": 2.9100992410974897e-05, "loss": 0.6692, "step": 1994 }, { "epoch": 0.05824647455548743, "grad_norm": 0.7160766890059993, "learning_rate": 2.9115586690017516e-05, "loss": 0.5933, "step": 1995 }, { "epoch": 0.058275670783334795, "grad_norm": 0.7894783115432259, "learning_rate": 2.913018096906013e-05, "loss": 0.7345, "step": 1996 }, { "epoch": 0.058304867011182156, "grad_norm": 0.7337948071562731, "learning_rate": 2.9144775248102744e-05, "loss": 0.7006, "step": 1997 }, { "epoch": 0.05833406323902952, "grad_norm": 0.9874344642421289, "learning_rate": 2.9159369527145357e-05, "loss": 0.6842, "step": 1998 }, { "epoch": 0.05836325946687688, "grad_norm": 0.7049837228492503, "learning_rate": 2.9173963806187976e-05, "loss": 0.6587, "step": 1999 }, { "epoch": 0.05839245569472424, "grad_norm": 0.8606928427809123, "learning_rate": 2.918855808523059e-05, "loss": 0.8428, "step": 2000 }, { "epoch": 0.0584216519225716, "grad_norm": 0.820131994634751, "learning_rate": 2.9203152364273208e-05, "loss": 0.7205, "step": 2001 }, { "epoch": 0.05845084815041897, "grad_norm": 1.7011199305163753, "learning_rate": 2.9217746643315824e-05, "loss": 0.7287, "step": 2002 }, { "epoch": 0.05848004437826633, "grad_norm": 0.7070422823496741, "learning_rate": 2.9232340922358436e-05, "loss": 0.6213, "step": 2003 }, { "epoch": 0.05850924060611369, "grad_norm": 0.8043131752521888, "learning_rate": 2.9246935201401055e-05, "loss": 0.7393, "step": 2004 }, { "epoch": 0.05853843683396105, "grad_norm": 0.8133538240817396, "learning_rate": 2.9261529480443668e-05, "loss": 0.795, "step": 2005 }, { "epoch": 0.05856763306180841, "grad_norm": 0.8471896542811449, "learning_rate": 2.9276123759486284e-05, "loss": 0.7419, "step": 2006 }, { "epoch": 0.058596829289655775, "grad_norm": 0.809251483096793, "learning_rate": 2.9290718038528896e-05, "loss": 0.6021, "step": 2007 }, { "epoch": 0.058626025517503136, "grad_norm": 0.7643840158789611, "learning_rate": 2.9305312317571515e-05, "loss": 0.7226, "step": 2008 }, { "epoch": 0.058655221745350504, "grad_norm": 0.8202260820869037, "learning_rate": 2.9319906596614128e-05, "loss": 0.7802, "step": 2009 }, { "epoch": 0.058684417973197865, "grad_norm": 0.8210427093192392, "learning_rate": 2.9334500875656744e-05, "loss": 0.6647, "step": 2010 }, { "epoch": 0.058713614201045226, "grad_norm": 0.7935343093216869, "learning_rate": 2.9349095154699356e-05, "loss": 0.7574, "step": 2011 }, { "epoch": 0.05874281042889259, "grad_norm": 0.7331481771975031, "learning_rate": 2.9363689433741975e-05, "loss": 0.7002, "step": 2012 }, { "epoch": 0.05877200665673995, "grad_norm": 0.8844439607780794, "learning_rate": 2.9378283712784588e-05, "loss": 0.8114, "step": 2013 }, { "epoch": 0.05880120288458731, "grad_norm": 0.7401179954226292, "learning_rate": 2.9392877991827207e-05, "loss": 0.6499, "step": 2014 }, { "epoch": 0.05883039911243467, "grad_norm": 1.296667311127443, "learning_rate": 2.940747227086982e-05, "loss": 0.7276, "step": 2015 }, { "epoch": 0.05885959534028203, "grad_norm": 0.7870261919140564, "learning_rate": 2.9422066549912435e-05, "loss": 0.7562, "step": 2016 }, { "epoch": 0.0588887915681294, "grad_norm": 0.7645167233831265, "learning_rate": 2.9436660828955048e-05, "loss": 0.7396, "step": 2017 }, { "epoch": 0.05891798779597676, "grad_norm": 0.7242379114656724, "learning_rate": 2.9451255107997667e-05, "loss": 0.6512, "step": 2018 }, { "epoch": 0.05894718402382412, "grad_norm": 0.9317503797823377, "learning_rate": 2.9465849387040283e-05, "loss": 0.8047, "step": 2019 }, { "epoch": 0.058976380251671484, "grad_norm": 0.7274419166880258, "learning_rate": 2.9480443666082895e-05, "loss": 0.7304, "step": 2020 }, { "epoch": 0.059005576479518845, "grad_norm": 0.7803655672506669, "learning_rate": 2.9495037945125515e-05, "loss": 0.6988, "step": 2021 }, { "epoch": 0.059034772707366207, "grad_norm": 0.8140506712416822, "learning_rate": 2.9509632224168127e-05, "loss": 0.7272, "step": 2022 }, { "epoch": 0.05906396893521357, "grad_norm": 0.6915173145461765, "learning_rate": 2.9524226503210743e-05, "loss": 0.6703, "step": 2023 }, { "epoch": 0.059093165163060936, "grad_norm": 0.7456570522957113, "learning_rate": 2.9538820782253355e-05, "loss": 0.7409, "step": 2024 }, { "epoch": 0.0591223613909083, "grad_norm": 0.8391230730041163, "learning_rate": 2.9553415061295975e-05, "loss": 0.6438, "step": 2025 }, { "epoch": 0.05915155761875566, "grad_norm": 0.7293473208254139, "learning_rate": 2.9568009340338587e-05, "loss": 0.6852, "step": 2026 }, { "epoch": 0.05918075384660302, "grad_norm": 0.6959897209920792, "learning_rate": 2.9582603619381206e-05, "loss": 0.6678, "step": 2027 }, { "epoch": 0.05920995007445038, "grad_norm": 0.7528240773506067, "learning_rate": 2.959719789842382e-05, "loss": 0.7158, "step": 2028 }, { "epoch": 0.05923914630229774, "grad_norm": 2.4765329340200903, "learning_rate": 2.9611792177466435e-05, "loss": 0.8131, "step": 2029 }, { "epoch": 0.0592683425301451, "grad_norm": 0.7901320242008051, "learning_rate": 2.9626386456509047e-05, "loss": 0.6821, "step": 2030 }, { "epoch": 0.059297538757992464, "grad_norm": 0.7634424186488482, "learning_rate": 2.9640980735551666e-05, "loss": 0.7091, "step": 2031 }, { "epoch": 0.05932673498583983, "grad_norm": 0.7300849924303064, "learning_rate": 2.965557501459428e-05, "loss": 0.7258, "step": 2032 }, { "epoch": 0.059355931213687194, "grad_norm": 0.8511740755872486, "learning_rate": 2.9670169293636895e-05, "loss": 0.6595, "step": 2033 }, { "epoch": 0.059385127441534555, "grad_norm": 0.7187738607497182, "learning_rate": 2.9684763572679514e-05, "loss": 0.7014, "step": 2034 }, { "epoch": 0.059414323669381916, "grad_norm": 0.7569899423486716, "learning_rate": 2.9699357851722126e-05, "loss": 0.7658, "step": 2035 }, { "epoch": 0.05944351989722928, "grad_norm": 0.7312096716619826, "learning_rate": 2.9713952130764746e-05, "loss": 0.7489, "step": 2036 }, { "epoch": 0.05947271612507664, "grad_norm": 0.6493757695354101, "learning_rate": 2.9728546409807355e-05, "loss": 0.5386, "step": 2037 }, { "epoch": 0.059501912352924, "grad_norm": 0.7705635856710746, "learning_rate": 2.9743140688849974e-05, "loss": 0.739, "step": 2038 }, { "epoch": 0.05953110858077137, "grad_norm": 0.8010843174346364, "learning_rate": 2.9757734967892586e-05, "loss": 0.8037, "step": 2039 }, { "epoch": 0.05956030480861873, "grad_norm": 0.7727753766942715, "learning_rate": 2.9772329246935206e-05, "loss": 0.7771, "step": 2040 }, { "epoch": 0.05958950103646609, "grad_norm": 0.7180917957506885, "learning_rate": 2.9786923525977818e-05, "loss": 0.7181, "step": 2041 }, { "epoch": 0.05961869726431345, "grad_norm": 0.8988072495587718, "learning_rate": 2.9801517805020434e-05, "loss": 0.7347, "step": 2042 }, { "epoch": 0.05964789349216081, "grad_norm": 0.7706468850372374, "learning_rate": 2.9816112084063046e-05, "loss": 0.6547, "step": 2043 }, { "epoch": 0.059677089720008174, "grad_norm": 0.7634759979321059, "learning_rate": 2.9830706363105666e-05, "loss": 0.6778, "step": 2044 }, { "epoch": 0.059706285947855535, "grad_norm": 0.8136123764305528, "learning_rate": 2.9845300642148278e-05, "loss": 0.7089, "step": 2045 }, { "epoch": 0.059735482175702896, "grad_norm": 1.0242972870055247, "learning_rate": 2.9859894921190894e-05, "loss": 0.7155, "step": 2046 }, { "epoch": 0.059764678403550264, "grad_norm": 0.6895217527254708, "learning_rate": 2.9874489200233506e-05, "loss": 0.6088, "step": 2047 }, { "epoch": 0.059793874631397625, "grad_norm": 0.7706635522849721, "learning_rate": 2.9889083479276126e-05, "loss": 0.7129, "step": 2048 }, { "epoch": 0.05982307085924499, "grad_norm": 0.7963890862828674, "learning_rate": 2.9903677758318738e-05, "loss": 0.7799, "step": 2049 }, { "epoch": 0.05985226708709235, "grad_norm": 0.7645665136057412, "learning_rate": 2.9918272037361354e-05, "loss": 0.7111, "step": 2050 }, { "epoch": 0.05988146331493971, "grad_norm": 0.6922609643569038, "learning_rate": 2.9932866316403973e-05, "loss": 0.6345, "step": 2051 }, { "epoch": 0.05991065954278707, "grad_norm": 0.694592017690637, "learning_rate": 2.9947460595446586e-05, "loss": 0.6696, "step": 2052 }, { "epoch": 0.05993985577063443, "grad_norm": 0.7773632457818788, "learning_rate": 2.9962054874489205e-05, "loss": 0.6868, "step": 2053 }, { "epoch": 0.05996905199848179, "grad_norm": 0.8901466164231043, "learning_rate": 2.9976649153531817e-05, "loss": 0.7396, "step": 2054 }, { "epoch": 0.05999824822632916, "grad_norm": 0.7873183454045198, "learning_rate": 2.9991243432574433e-05, "loss": 0.7904, "step": 2055 }, { "epoch": 0.06002744445417652, "grad_norm": 0.7520071872952431, "learning_rate": 3.0005837711617046e-05, "loss": 0.7209, "step": 2056 }, { "epoch": 0.06005664068202388, "grad_norm": 0.731472386309082, "learning_rate": 3.0020431990659665e-05, "loss": 0.7028, "step": 2057 }, { "epoch": 0.060085836909871244, "grad_norm": 0.6746410072783219, "learning_rate": 3.0035026269702277e-05, "loss": 0.6476, "step": 2058 }, { "epoch": 0.060115033137718606, "grad_norm": 0.8687205504397628, "learning_rate": 3.0049620548744893e-05, "loss": 0.7392, "step": 2059 }, { "epoch": 0.06014422936556597, "grad_norm": 0.8069609005524289, "learning_rate": 3.0064214827787506e-05, "loss": 0.6702, "step": 2060 }, { "epoch": 0.06017342559341333, "grad_norm": 0.7503165852282895, "learning_rate": 3.0078809106830125e-05, "loss": 0.6751, "step": 2061 }, { "epoch": 0.060202621821260696, "grad_norm": 0.7240216533229058, "learning_rate": 3.0093403385872737e-05, "loss": 0.6883, "step": 2062 }, { "epoch": 0.06023181804910806, "grad_norm": 0.9930161475084675, "learning_rate": 3.0107997664915353e-05, "loss": 0.756, "step": 2063 }, { "epoch": 0.06026101427695542, "grad_norm": 0.7395361852085821, "learning_rate": 3.0122591943957966e-05, "loss": 0.7273, "step": 2064 }, { "epoch": 0.06029021050480278, "grad_norm": 0.775090734584093, "learning_rate": 3.0137186223000585e-05, "loss": 0.7872, "step": 2065 }, { "epoch": 0.06031940673265014, "grad_norm": 0.7454129049652858, "learning_rate": 3.0151780502043204e-05, "loss": 0.6447, "step": 2066 }, { "epoch": 0.0603486029604975, "grad_norm": 0.7320045713269446, "learning_rate": 3.0166374781085817e-05, "loss": 0.6901, "step": 2067 }, { "epoch": 0.06037779918834486, "grad_norm": 0.7142106670081038, "learning_rate": 3.0180969060128433e-05, "loss": 0.6585, "step": 2068 }, { "epoch": 0.060406995416192225, "grad_norm": 0.8209335160453012, "learning_rate": 3.0195563339171045e-05, "loss": 0.6321, "step": 2069 }, { "epoch": 0.06043619164403959, "grad_norm": 0.7461225969140262, "learning_rate": 3.0210157618213664e-05, "loss": 0.7511, "step": 2070 }, { "epoch": 0.060465387871886954, "grad_norm": 0.778258976660021, "learning_rate": 3.0224751897256277e-05, "loss": 0.7239, "step": 2071 }, { "epoch": 0.060494584099734315, "grad_norm": 0.8527872872538875, "learning_rate": 3.0239346176298893e-05, "loss": 0.7201, "step": 2072 }, { "epoch": 0.060523780327581676, "grad_norm": 0.8497792486852225, "learning_rate": 3.0253940455341505e-05, "loss": 0.7285, "step": 2073 }, { "epoch": 0.06055297655542904, "grad_norm": 0.7407480332323825, "learning_rate": 3.0268534734384124e-05, "loss": 0.7194, "step": 2074 }, { "epoch": 0.0605821727832764, "grad_norm": 0.7264307365544601, "learning_rate": 3.0283129013426737e-05, "loss": 0.7077, "step": 2075 }, { "epoch": 0.06061136901112376, "grad_norm": 0.7278321162888844, "learning_rate": 3.0297723292469356e-05, "loss": 0.6507, "step": 2076 }, { "epoch": 0.06064056523897113, "grad_norm": 0.8639039440470689, "learning_rate": 3.0312317571511965e-05, "loss": 0.7297, "step": 2077 }, { "epoch": 0.06066976146681849, "grad_norm": 0.7343923427355029, "learning_rate": 3.0326911850554584e-05, "loss": 0.6882, "step": 2078 }, { "epoch": 0.06069895769466585, "grad_norm": 0.9579961266373432, "learning_rate": 3.0341506129597197e-05, "loss": 0.7106, "step": 2079 }, { "epoch": 0.06072815392251321, "grad_norm": 0.7806957307355733, "learning_rate": 3.0356100408639816e-05, "loss": 0.686, "step": 2080 }, { "epoch": 0.06075735015036057, "grad_norm": 0.7034297552348027, "learning_rate": 3.037069468768243e-05, "loss": 0.6046, "step": 2081 }, { "epoch": 0.060786546378207934, "grad_norm": 0.6997442629684538, "learning_rate": 3.0385288966725044e-05, "loss": 0.6768, "step": 2082 }, { "epoch": 0.060815742606055295, "grad_norm": 0.722352017722439, "learning_rate": 3.0399883245767664e-05, "loss": 0.701, "step": 2083 }, { "epoch": 0.060844938833902656, "grad_norm": 0.7692281669402976, "learning_rate": 3.0414477524810276e-05, "loss": 0.7044, "step": 2084 }, { "epoch": 0.060874135061750025, "grad_norm": 0.7155817311222504, "learning_rate": 3.0429071803852892e-05, "loss": 0.7306, "step": 2085 }, { "epoch": 0.060903331289597386, "grad_norm": 0.7861109070832832, "learning_rate": 3.0443666082895504e-05, "loss": 0.8086, "step": 2086 }, { "epoch": 0.06093252751744475, "grad_norm": 0.7632714150339758, "learning_rate": 3.0458260361938124e-05, "loss": 0.6911, "step": 2087 }, { "epoch": 0.06096172374529211, "grad_norm": 0.6822398119377743, "learning_rate": 3.0472854640980736e-05, "loss": 0.6365, "step": 2088 }, { "epoch": 0.06099091997313947, "grad_norm": 0.775985547494895, "learning_rate": 3.0487448920023355e-05, "loss": 0.7611, "step": 2089 }, { "epoch": 0.06102011620098683, "grad_norm": 0.6941023634215424, "learning_rate": 3.0502043199065964e-05, "loss": 0.6356, "step": 2090 }, { "epoch": 0.06104931242883419, "grad_norm": 0.8336996955250623, "learning_rate": 3.0516637478108584e-05, "loss": 0.7403, "step": 2091 }, { "epoch": 0.06107850865668156, "grad_norm": 0.7219131456713733, "learning_rate": 3.05312317571512e-05, "loss": 0.6596, "step": 2092 }, { "epoch": 0.06110770488452892, "grad_norm": 0.6727483787951934, "learning_rate": 3.054582603619381e-05, "loss": 0.6221, "step": 2093 }, { "epoch": 0.06113690111237628, "grad_norm": 0.8610997396355908, "learning_rate": 3.0560420315236424e-05, "loss": 0.7463, "step": 2094 }, { "epoch": 0.061166097340223644, "grad_norm": 0.7499981354636985, "learning_rate": 3.0575014594279044e-05, "loss": 0.7181, "step": 2095 }, { "epoch": 0.061195293568071005, "grad_norm": 0.8924465060589355, "learning_rate": 3.0589608873321656e-05, "loss": 0.7443, "step": 2096 }, { "epoch": 0.061224489795918366, "grad_norm": 0.8874151328514684, "learning_rate": 3.0604203152364275e-05, "loss": 0.7417, "step": 2097 }, { "epoch": 0.06125368602376573, "grad_norm": 0.753487931897459, "learning_rate": 3.0618797431406895e-05, "loss": 0.7729, "step": 2098 }, { "epoch": 0.06128288225161309, "grad_norm": 0.7643345110376707, "learning_rate": 3.063339171044951e-05, "loss": 0.7585, "step": 2099 }, { "epoch": 0.061312078479460456, "grad_norm": 0.7400256044429192, "learning_rate": 3.064798598949212e-05, "loss": 0.6609, "step": 2100 }, { "epoch": 0.06134127470730782, "grad_norm": 0.7344354796188449, "learning_rate": 3.066258026853473e-05, "loss": 0.7372, "step": 2101 }, { "epoch": 0.06137047093515518, "grad_norm": 0.7536374342112037, "learning_rate": 3.067717454757735e-05, "loss": 0.7355, "step": 2102 }, { "epoch": 0.06139966716300254, "grad_norm": 0.7023244696421925, "learning_rate": 3.0691768826619964e-05, "loss": 0.6787, "step": 2103 }, { "epoch": 0.0614288633908499, "grad_norm": 0.7846440542665143, "learning_rate": 3.070636310566258e-05, "loss": 0.7712, "step": 2104 }, { "epoch": 0.06145805961869726, "grad_norm": 1.3901702403788956, "learning_rate": 3.0720957384705195e-05, "loss": 0.6993, "step": 2105 }, { "epoch": 0.061487255846544624, "grad_norm": 0.7252662232212637, "learning_rate": 3.0735551663747815e-05, "loss": 0.6316, "step": 2106 }, { "epoch": 0.06151645207439199, "grad_norm": 0.750825471257857, "learning_rate": 3.075014594279043e-05, "loss": 0.7284, "step": 2107 }, { "epoch": 0.06154564830223935, "grad_norm": 0.7926450835649395, "learning_rate": 3.0764740221833046e-05, "loss": 0.7695, "step": 2108 }, { "epoch": 0.061574844530086714, "grad_norm": 0.6845201093280677, "learning_rate": 3.077933450087566e-05, "loss": 0.6115, "step": 2109 }, { "epoch": 0.061604040757934075, "grad_norm": 0.7822344643489233, "learning_rate": 3.079392877991827e-05, "loss": 0.8111, "step": 2110 }, { "epoch": 0.06163323698578144, "grad_norm": 0.7270867168762585, "learning_rate": 3.0808523058960884e-05, "loss": 0.6826, "step": 2111 }, { "epoch": 0.0616624332136288, "grad_norm": 1.5268494048123882, "learning_rate": 3.08231173380035e-05, "loss": 0.8389, "step": 2112 }, { "epoch": 0.06169162944147616, "grad_norm": 0.8083186213747882, "learning_rate": 3.0837711617046116e-05, "loss": 0.8289, "step": 2113 }, { "epoch": 0.06172082566932352, "grad_norm": 0.7499683291162741, "learning_rate": 3.0852305896088735e-05, "loss": 0.7502, "step": 2114 }, { "epoch": 0.06175002189717089, "grad_norm": 0.728707553655982, "learning_rate": 3.0866900175131354e-05, "loss": 0.6701, "step": 2115 }, { "epoch": 0.06177921812501825, "grad_norm": 0.7159176834703356, "learning_rate": 3.0881494454173966e-05, "loss": 0.6669, "step": 2116 }, { "epoch": 0.06180841435286561, "grad_norm": 0.7571765821415625, "learning_rate": 3.0896088733216586e-05, "loss": 0.6906, "step": 2117 }, { "epoch": 0.06183761058071297, "grad_norm": 0.7725364492455834, "learning_rate": 3.09106830122592e-05, "loss": 0.5958, "step": 2118 }, { "epoch": 0.06186680680856033, "grad_norm": 0.7703217466764205, "learning_rate": 3.092527729130181e-05, "loss": 0.7525, "step": 2119 }, { "epoch": 0.061896003036407694, "grad_norm": 0.759724218353662, "learning_rate": 3.093987157034442e-05, "loss": 0.7769, "step": 2120 }, { "epoch": 0.061925199264255056, "grad_norm": 0.6840328091537279, "learning_rate": 3.095446584938704e-05, "loss": 0.6845, "step": 2121 }, { "epoch": 0.061954395492102424, "grad_norm": 0.7629992057479197, "learning_rate": 3.0969060128429655e-05, "loss": 0.7789, "step": 2122 }, { "epoch": 0.061983591719949785, "grad_norm": 1.083076889357988, "learning_rate": 3.0983654407472274e-05, "loss": 0.736, "step": 2123 }, { "epoch": 0.062012787947797146, "grad_norm": 0.732649539374123, "learning_rate": 3.0998248686514886e-05, "loss": 0.6944, "step": 2124 }, { "epoch": 0.06204198417564451, "grad_norm": 1.5996106969567883, "learning_rate": 3.1012842965557506e-05, "loss": 0.6991, "step": 2125 }, { "epoch": 0.06207118040349187, "grad_norm": 0.8195423883358242, "learning_rate": 3.102743724460012e-05, "loss": 0.7732, "step": 2126 }, { "epoch": 0.06210037663133923, "grad_norm": 0.7337397614586669, "learning_rate": 3.104203152364273e-05, "loss": 0.7513, "step": 2127 }, { "epoch": 0.06212957285918659, "grad_norm": 0.7551158927165335, "learning_rate": 3.105662580268534e-05, "loss": 0.7212, "step": 2128 }, { "epoch": 0.06215876908703395, "grad_norm": 0.8378457896434054, "learning_rate": 3.107122008172796e-05, "loss": 0.6668, "step": 2129 }, { "epoch": 0.06218796531488132, "grad_norm": 0.8168010785128936, "learning_rate": 3.108581436077058e-05, "loss": 0.7159, "step": 2130 }, { "epoch": 0.06221716154272868, "grad_norm": 0.6830980057083025, "learning_rate": 3.1100408639813194e-05, "loss": 0.6127, "step": 2131 }, { "epoch": 0.06224635777057604, "grad_norm": 0.7416021296859183, "learning_rate": 3.111500291885581e-05, "loss": 0.6794, "step": 2132 }, { "epoch": 0.062275553998423404, "grad_norm": 0.8266117931750788, "learning_rate": 3.1129597197898426e-05, "loss": 0.6398, "step": 2133 }, { "epoch": 0.062304750226270765, "grad_norm": 0.7521719218992531, "learning_rate": 3.1144191476941045e-05, "loss": 0.651, "step": 2134 }, { "epoch": 0.062333946454118126, "grad_norm": 0.7663659690551207, "learning_rate": 3.115878575598366e-05, "loss": 0.7305, "step": 2135 }, { "epoch": 0.06236314268196549, "grad_norm": 0.7666549496169437, "learning_rate": 3.117338003502627e-05, "loss": 0.7844, "step": 2136 }, { "epoch": 0.062392338909812856, "grad_norm": 0.7151857031231192, "learning_rate": 3.118797431406888e-05, "loss": 0.675, "step": 2137 }, { "epoch": 0.06242153513766022, "grad_norm": 0.6620293988544347, "learning_rate": 3.12025685931115e-05, "loss": 0.6365, "step": 2138 }, { "epoch": 0.06245073136550758, "grad_norm": 0.7634520851175445, "learning_rate": 3.1217162872154114e-05, "loss": 0.7652, "step": 2139 }, { "epoch": 0.06247992759335494, "grad_norm": 0.7625696948785727, "learning_rate": 3.123175715119673e-05, "loss": 0.7049, "step": 2140 }, { "epoch": 0.0625091238212023, "grad_norm": 0.7509664885924696, "learning_rate": 3.1246351430239346e-05, "loss": 0.6076, "step": 2141 }, { "epoch": 0.06253832004904966, "grad_norm": 0.8020105875746713, "learning_rate": 3.1260945709281965e-05, "loss": 0.7519, "step": 2142 }, { "epoch": 0.06256751627689702, "grad_norm": 11.97015287958235, "learning_rate": 3.127553998832458e-05, "loss": 1.2674, "step": 2143 }, { "epoch": 0.06259671250474438, "grad_norm": 0.7713416987269319, "learning_rate": 3.12901342673672e-05, "loss": 0.7789, "step": 2144 }, { "epoch": 0.06262590873259175, "grad_norm": 0.9322686185011747, "learning_rate": 3.130472854640981e-05, "loss": 0.7281, "step": 2145 }, { "epoch": 0.0626551049604391, "grad_norm": 0.7546766331874366, "learning_rate": 3.131932282545242e-05, "loss": 0.7438, "step": 2146 }, { "epoch": 0.06268430118828647, "grad_norm": 0.7232971714927127, "learning_rate": 3.133391710449504e-05, "loss": 0.7397, "step": 2147 }, { "epoch": 0.06271349741613383, "grad_norm": 0.707038256912639, "learning_rate": 3.1348511383537653e-05, "loss": 0.6241, "step": 2148 }, { "epoch": 0.0627426936439812, "grad_norm": 0.834242588821123, "learning_rate": 3.136310566258027e-05, "loss": 0.7723, "step": 2149 }, { "epoch": 0.06277188987182857, "grad_norm": 0.7934066196110999, "learning_rate": 3.1377699941622885e-05, "loss": 0.7988, "step": 2150 }, { "epoch": 0.06280108609967593, "grad_norm": 0.7423677467565757, "learning_rate": 3.1392294220665504e-05, "loss": 0.7273, "step": 2151 }, { "epoch": 0.06283028232752329, "grad_norm": 0.7158283037888267, "learning_rate": 3.140688849970812e-05, "loss": 0.6403, "step": 2152 }, { "epoch": 0.06285947855537065, "grad_norm": 0.7288829373857261, "learning_rate": 3.142148277875073e-05, "loss": 0.6817, "step": 2153 }, { "epoch": 0.06288867478321801, "grad_norm": 0.8203875213147279, "learning_rate": 3.143607705779334e-05, "loss": 0.8126, "step": 2154 }, { "epoch": 0.06291787101106537, "grad_norm": 0.6787888534255128, "learning_rate": 3.145067133683596e-05, "loss": 0.6862, "step": 2155 }, { "epoch": 0.06294706723891273, "grad_norm": 0.7568006692019974, "learning_rate": 3.1465265615878573e-05, "loss": 0.7876, "step": 2156 }, { "epoch": 0.0629762634667601, "grad_norm": 0.7135550009477591, "learning_rate": 3.147985989492119e-05, "loss": 0.7026, "step": 2157 }, { "epoch": 0.06300545969460745, "grad_norm": 0.7044913180909073, "learning_rate": 3.1494454173963805e-05, "loss": 0.6232, "step": 2158 }, { "epoch": 0.06303465592245482, "grad_norm": 0.7119534170531542, "learning_rate": 3.1509048453006424e-05, "loss": 0.664, "step": 2159 }, { "epoch": 0.06306385215030218, "grad_norm": 0.7578765304121924, "learning_rate": 3.152364273204904e-05, "loss": 0.7622, "step": 2160 }, { "epoch": 0.06309304837814954, "grad_norm": 0.8117875924236317, "learning_rate": 3.1538237011091656e-05, "loss": 0.6833, "step": 2161 }, { "epoch": 0.0631222446059969, "grad_norm": 0.8490206269471903, "learning_rate": 3.155283129013427e-05, "loss": 0.7909, "step": 2162 }, { "epoch": 0.06315144083384426, "grad_norm": 0.6916291837425771, "learning_rate": 3.156742556917688e-05, "loss": 0.5973, "step": 2163 }, { "epoch": 0.06318063706169164, "grad_norm": 0.6684795028758825, "learning_rate": 3.15820198482195e-05, "loss": 0.6427, "step": 2164 }, { "epoch": 0.063209833289539, "grad_norm": 0.708771609592653, "learning_rate": 3.159661412726211e-05, "loss": 0.7091, "step": 2165 }, { "epoch": 0.06323902951738636, "grad_norm": 1.0407811061981505, "learning_rate": 3.161120840630473e-05, "loss": 0.8755, "step": 2166 }, { "epoch": 0.06326822574523372, "grad_norm": 0.8821933032055232, "learning_rate": 3.1625802685347344e-05, "loss": 0.7802, "step": 2167 }, { "epoch": 0.06329742197308108, "grad_norm": 1.0919501711751347, "learning_rate": 3.1640396964389964e-05, "loss": 0.6722, "step": 2168 }, { "epoch": 0.06332661820092844, "grad_norm": 0.7907728769162233, "learning_rate": 3.1654991243432576e-05, "loss": 0.7837, "step": 2169 }, { "epoch": 0.0633558144287758, "grad_norm": 0.6842740718453733, "learning_rate": 3.1669585522475195e-05, "loss": 0.6191, "step": 2170 }, { "epoch": 0.06338501065662316, "grad_norm": 0.6965042229987203, "learning_rate": 3.168417980151781e-05, "loss": 0.6636, "step": 2171 }, { "epoch": 0.06341420688447053, "grad_norm": 0.8001418429045591, "learning_rate": 3.169877408056042e-05, "loss": 0.8193, "step": 2172 }, { "epoch": 0.06344340311231789, "grad_norm": 0.7858018701610716, "learning_rate": 3.171336835960303e-05, "loss": 0.7158, "step": 2173 }, { "epoch": 0.06347259934016525, "grad_norm": 0.7694126218182739, "learning_rate": 3.172796263864565e-05, "loss": 0.7186, "step": 2174 }, { "epoch": 0.06350179556801261, "grad_norm": 0.7956091004296617, "learning_rate": 3.1742556917688265e-05, "loss": 0.746, "step": 2175 }, { "epoch": 0.06353099179585997, "grad_norm": 0.787392705724804, "learning_rate": 3.1757151196730884e-05, "loss": 0.7334, "step": 2176 }, { "epoch": 0.06356018802370733, "grad_norm": 0.7972763655786258, "learning_rate": 3.1771745475773496e-05, "loss": 0.7126, "step": 2177 }, { "epoch": 0.06358938425155469, "grad_norm": 0.8270455349707253, "learning_rate": 3.1786339754816115e-05, "loss": 0.6909, "step": 2178 }, { "epoch": 0.06361858047940207, "grad_norm": 0.7727218332054328, "learning_rate": 3.1800934033858735e-05, "loss": 0.7454, "step": 2179 }, { "epoch": 0.06364777670724943, "grad_norm": 0.8023024724581729, "learning_rate": 3.181552831290134e-05, "loss": 0.8016, "step": 2180 }, { "epoch": 0.06367697293509679, "grad_norm": 0.6965883601537713, "learning_rate": 3.183012259194396e-05, "loss": 0.5991, "step": 2181 }, { "epoch": 0.06370616916294415, "grad_norm": 0.7970152723909383, "learning_rate": 3.184471687098657e-05, "loss": 0.7657, "step": 2182 }, { "epoch": 0.06373536539079151, "grad_norm": 0.81871031912872, "learning_rate": 3.185931115002919e-05, "loss": 0.775, "step": 2183 }, { "epoch": 0.06376456161863887, "grad_norm": 0.6742987759506563, "learning_rate": 3.1873905429071804e-05, "loss": 0.6645, "step": 2184 }, { "epoch": 0.06379375784648623, "grad_norm": 0.8014383401177414, "learning_rate": 3.188849970811442e-05, "loss": 0.755, "step": 2185 }, { "epoch": 0.0638229540743336, "grad_norm": 1.0151691602795816, "learning_rate": 3.1903093987157036e-05, "loss": 0.7608, "step": 2186 }, { "epoch": 0.06385215030218096, "grad_norm": 0.8084164008938375, "learning_rate": 3.1917688266199655e-05, "loss": 0.8458, "step": 2187 }, { "epoch": 0.06388134653002832, "grad_norm": 0.8116220037930674, "learning_rate": 3.193228254524227e-05, "loss": 0.7896, "step": 2188 }, { "epoch": 0.06391054275787568, "grad_norm": 0.8648797291688374, "learning_rate": 3.194687682428488e-05, "loss": 0.7772, "step": 2189 }, { "epoch": 0.06393973898572304, "grad_norm": 0.6804991536275495, "learning_rate": 3.196147110332749e-05, "loss": 0.6705, "step": 2190 }, { "epoch": 0.0639689352135704, "grad_norm": 0.8221279915584825, "learning_rate": 3.197606538237011e-05, "loss": 0.8229, "step": 2191 }, { "epoch": 0.06399813144141776, "grad_norm": 0.7238680900206487, "learning_rate": 3.1990659661412724e-05, "loss": 0.6964, "step": 2192 }, { "epoch": 0.06402732766926512, "grad_norm": 0.7491344358771698, "learning_rate": 3.200525394045534e-05, "loss": 0.7566, "step": 2193 }, { "epoch": 0.0640565238971125, "grad_norm": 0.7233330705233616, "learning_rate": 3.201984821949796e-05, "loss": 0.6998, "step": 2194 }, { "epoch": 0.06408572012495986, "grad_norm": 0.7547643071185489, "learning_rate": 3.2034442498540575e-05, "loss": 0.7544, "step": 2195 }, { "epoch": 0.06411491635280722, "grad_norm": 0.8364595642358702, "learning_rate": 3.2049036777583194e-05, "loss": 0.786, "step": 2196 }, { "epoch": 0.06414411258065458, "grad_norm": 0.7079782752397871, "learning_rate": 3.2063631056625807e-05, "loss": 0.6917, "step": 2197 }, { "epoch": 0.06417330880850194, "grad_norm": 0.6994735642081105, "learning_rate": 3.207822533566842e-05, "loss": 0.5989, "step": 2198 }, { "epoch": 0.0642025050363493, "grad_norm": 0.752039286138087, "learning_rate": 3.209281961471103e-05, "loss": 0.7913, "step": 2199 }, { "epoch": 0.06423170126419667, "grad_norm": 0.7080860366586379, "learning_rate": 3.210741389375365e-05, "loss": 0.662, "step": 2200 }, { "epoch": 0.06426089749204403, "grad_norm": 0.7695235752898736, "learning_rate": 3.212200817279626e-05, "loss": 0.7624, "step": 2201 }, { "epoch": 0.06429009371989139, "grad_norm": 0.724307675513624, "learning_rate": 3.213660245183888e-05, "loss": 0.6812, "step": 2202 }, { "epoch": 0.06431928994773875, "grad_norm": 0.7950497477602805, "learning_rate": 3.2151196730881495e-05, "loss": 0.7314, "step": 2203 }, { "epoch": 0.06434848617558611, "grad_norm": 0.7367897705874416, "learning_rate": 3.2165791009924114e-05, "loss": 0.7328, "step": 2204 }, { "epoch": 0.06437768240343347, "grad_norm": 0.7021164811017293, "learning_rate": 3.2180385288966727e-05, "loss": 0.6617, "step": 2205 }, { "epoch": 0.06440687863128083, "grad_norm": 0.7534238263646929, "learning_rate": 3.2194979568009346e-05, "loss": 0.6957, "step": 2206 }, { "epoch": 0.0644360748591282, "grad_norm": 0.6628497575274479, "learning_rate": 3.220957384705195e-05, "loss": 0.6242, "step": 2207 }, { "epoch": 0.06446527108697556, "grad_norm": 0.7655491310157221, "learning_rate": 3.222416812609457e-05, "loss": 0.7701, "step": 2208 }, { "epoch": 0.06449446731482293, "grad_norm": 0.7051788193514958, "learning_rate": 3.223876240513718e-05, "loss": 0.703, "step": 2209 }, { "epoch": 0.06452366354267029, "grad_norm": 0.8095797402203171, "learning_rate": 3.22533566841798e-05, "loss": 0.7072, "step": 2210 }, { "epoch": 0.06455285977051765, "grad_norm": 0.7536983461951586, "learning_rate": 3.226795096322242e-05, "loss": 0.7813, "step": 2211 }, { "epoch": 0.06458205599836501, "grad_norm": 0.735752182911469, "learning_rate": 3.2282545242265034e-05, "loss": 0.6626, "step": 2212 }, { "epoch": 0.06461125222621238, "grad_norm": 0.7571366986905462, "learning_rate": 3.2297139521307653e-05, "loss": 0.6692, "step": 2213 }, { "epoch": 0.06464044845405974, "grad_norm": 0.8318410349517839, "learning_rate": 3.2311733800350266e-05, "loss": 0.7512, "step": 2214 }, { "epoch": 0.0646696446819071, "grad_norm": 0.8531457872259959, "learning_rate": 3.232632807939288e-05, "loss": 0.7603, "step": 2215 }, { "epoch": 0.06469884090975446, "grad_norm": 0.8641865568307276, "learning_rate": 3.234092235843549e-05, "loss": 0.615, "step": 2216 }, { "epoch": 0.06472803713760182, "grad_norm": 0.7608863521094025, "learning_rate": 3.235551663747811e-05, "loss": 0.6698, "step": 2217 }, { "epoch": 0.06475723336544918, "grad_norm": 0.7623735294183884, "learning_rate": 3.237011091652072e-05, "loss": 0.706, "step": 2218 }, { "epoch": 0.06478642959329654, "grad_norm": 0.621999573872806, "learning_rate": 3.238470519556334e-05, "loss": 0.575, "step": 2219 }, { "epoch": 0.0648156258211439, "grad_norm": 0.9733773211216624, "learning_rate": 3.2399299474605954e-05, "loss": 0.7111, "step": 2220 }, { "epoch": 0.06484482204899127, "grad_norm": 0.7688301316115632, "learning_rate": 3.2413893753648573e-05, "loss": 0.6959, "step": 2221 }, { "epoch": 0.06487401827683863, "grad_norm": 0.7442925454062049, "learning_rate": 3.2428488032691186e-05, "loss": 0.7183, "step": 2222 }, { "epoch": 0.06490321450468599, "grad_norm": 0.7227786128955478, "learning_rate": 3.2443082311733805e-05, "loss": 0.7084, "step": 2223 }, { "epoch": 0.06493241073253336, "grad_norm": 0.9422256511221171, "learning_rate": 3.245767659077642e-05, "loss": 0.7251, "step": 2224 }, { "epoch": 0.06496160696038072, "grad_norm": 0.7762678471615452, "learning_rate": 3.247227086981903e-05, "loss": 0.6929, "step": 2225 }, { "epoch": 0.06499080318822809, "grad_norm": 0.8903597504634393, "learning_rate": 3.248686514886165e-05, "loss": 0.6881, "step": 2226 }, { "epoch": 0.06501999941607545, "grad_norm": 0.8042060981934607, "learning_rate": 3.250145942790426e-05, "loss": 0.7039, "step": 2227 }, { "epoch": 0.06504919564392281, "grad_norm": 1.050796908269176, "learning_rate": 3.251605370694688e-05, "loss": 0.785, "step": 2228 }, { "epoch": 0.06507839187177017, "grad_norm": 0.6721270101887975, "learning_rate": 3.2530647985989493e-05, "loss": 0.6476, "step": 2229 }, { "epoch": 0.06510758809961753, "grad_norm": 0.7523650072042116, "learning_rate": 3.254524226503211e-05, "loss": 0.773, "step": 2230 }, { "epoch": 0.06513678432746489, "grad_norm": 0.689482094362461, "learning_rate": 3.2559836544074725e-05, "loss": 0.6548, "step": 2231 }, { "epoch": 0.06516598055531225, "grad_norm": 0.7925638614981539, "learning_rate": 3.2574430823117344e-05, "loss": 0.7889, "step": 2232 }, { "epoch": 0.06519517678315961, "grad_norm": 0.6826387744174212, "learning_rate": 3.258902510215996e-05, "loss": 0.626, "step": 2233 }, { "epoch": 0.06522437301100698, "grad_norm": 0.6677815291198844, "learning_rate": 3.260361938120257e-05, "loss": 0.6366, "step": 2234 }, { "epoch": 0.06525356923885434, "grad_norm": 0.6900558872536697, "learning_rate": 3.261821366024518e-05, "loss": 0.6667, "step": 2235 }, { "epoch": 0.0652827654667017, "grad_norm": 0.6972144912850787, "learning_rate": 3.26328079392878e-05, "loss": 0.6609, "step": 2236 }, { "epoch": 0.06531196169454906, "grad_norm": 0.6214076176402149, "learning_rate": 3.2647402218330414e-05, "loss": 0.6057, "step": 2237 }, { "epoch": 0.06534115792239642, "grad_norm": 0.7960982390998345, "learning_rate": 3.266199649737303e-05, "loss": 0.8094, "step": 2238 }, { "epoch": 0.0653703541502438, "grad_norm": 0.8240251404804927, "learning_rate": 3.2676590776415645e-05, "loss": 0.8345, "step": 2239 }, { "epoch": 0.06539955037809116, "grad_norm": 0.7181651947229405, "learning_rate": 3.2691185055458264e-05, "loss": 0.6925, "step": 2240 }, { "epoch": 0.06542874660593852, "grad_norm": 0.7559387879536532, "learning_rate": 3.270577933450088e-05, "loss": 0.736, "step": 2241 }, { "epoch": 0.06545794283378588, "grad_norm": 0.696027519281209, "learning_rate": 3.272037361354349e-05, "loss": 0.6745, "step": 2242 }, { "epoch": 0.06548713906163324, "grad_norm": 0.6563823904207204, "learning_rate": 3.273496789258611e-05, "loss": 0.6199, "step": 2243 }, { "epoch": 0.0655163352894806, "grad_norm": 0.817327114236448, "learning_rate": 3.274956217162872e-05, "loss": 0.8137, "step": 2244 }, { "epoch": 0.06554553151732796, "grad_norm": 0.6478898513981246, "learning_rate": 3.276415645067134e-05, "loss": 0.5877, "step": 2245 }, { "epoch": 0.06557472774517532, "grad_norm": 0.7383893749229551, "learning_rate": 3.277875072971395e-05, "loss": 0.6907, "step": 2246 }, { "epoch": 0.06560392397302268, "grad_norm": 0.872265925039356, "learning_rate": 3.279334500875657e-05, "loss": 0.6833, "step": 2247 }, { "epoch": 0.06563312020087005, "grad_norm": 1.13842369861354, "learning_rate": 3.2807939287799185e-05, "loss": 0.6994, "step": 2248 }, { "epoch": 0.06566231642871741, "grad_norm": 0.7222292198528695, "learning_rate": 3.2822533566841804e-05, "loss": 0.7307, "step": 2249 }, { "epoch": 0.06569151265656477, "grad_norm": 0.7177004647609797, "learning_rate": 3.2837127845884416e-05, "loss": 0.6647, "step": 2250 }, { "epoch": 0.06572070888441213, "grad_norm": 0.7250829201217737, "learning_rate": 3.285172212492703e-05, "loss": 0.664, "step": 2251 }, { "epoch": 0.06574990511225949, "grad_norm": 0.8080415056693211, "learning_rate": 3.286631640396964e-05, "loss": 0.7036, "step": 2252 }, { "epoch": 0.06577910134010685, "grad_norm": 0.7800626368047854, "learning_rate": 3.288091068301226e-05, "loss": 0.6987, "step": 2253 }, { "epoch": 0.06580829756795423, "grad_norm": 0.7582049725433452, "learning_rate": 3.289550496205487e-05, "loss": 0.7441, "step": 2254 }, { "epoch": 0.06583749379580159, "grad_norm": 0.873307518826082, "learning_rate": 3.291009924109749e-05, "loss": 0.6442, "step": 2255 }, { "epoch": 0.06586669002364895, "grad_norm": 0.7170500334733213, "learning_rate": 3.2924693520140105e-05, "loss": 0.6647, "step": 2256 }, { "epoch": 0.06589588625149631, "grad_norm": 0.7323046524132489, "learning_rate": 3.2939287799182724e-05, "loss": 0.7093, "step": 2257 }, { "epoch": 0.06592508247934367, "grad_norm": 0.7766573972444634, "learning_rate": 3.295388207822534e-05, "loss": 0.6676, "step": 2258 }, { "epoch": 0.06595427870719103, "grad_norm": 0.7246572054859932, "learning_rate": 3.2968476357267956e-05, "loss": 0.6863, "step": 2259 }, { "epoch": 0.0659834749350384, "grad_norm": 0.7522650609975773, "learning_rate": 3.298307063631057e-05, "loss": 0.7456, "step": 2260 }, { "epoch": 0.06601267116288576, "grad_norm": 0.6974194092445696, "learning_rate": 3.299766491535318e-05, "loss": 0.6824, "step": 2261 }, { "epoch": 0.06604186739073312, "grad_norm": 0.6454669388167174, "learning_rate": 3.30122591943958e-05, "loss": 0.6544, "step": 2262 }, { "epoch": 0.06607106361858048, "grad_norm": 0.6831016307680231, "learning_rate": 3.302685347343841e-05, "loss": 0.5999, "step": 2263 }, { "epoch": 0.06610025984642784, "grad_norm": 0.7971083201954579, "learning_rate": 3.304144775248103e-05, "loss": 0.7625, "step": 2264 }, { "epoch": 0.0661294560742752, "grad_norm": 0.7138534001237993, "learning_rate": 3.3056042031523644e-05, "loss": 0.6576, "step": 2265 }, { "epoch": 0.06615865230212256, "grad_norm": 0.6755778619709838, "learning_rate": 3.307063631056626e-05, "loss": 0.5794, "step": 2266 }, { "epoch": 0.06618784852996992, "grad_norm": 0.6947964257777832, "learning_rate": 3.3085230589608876e-05, "loss": 0.6537, "step": 2267 }, { "epoch": 0.06621704475781728, "grad_norm": 0.7304880439593324, "learning_rate": 3.309982486865149e-05, "loss": 0.6551, "step": 2268 }, { "epoch": 0.06624624098566466, "grad_norm": 0.7162531576411725, "learning_rate": 3.31144191476941e-05, "loss": 0.7041, "step": 2269 }, { "epoch": 0.06627543721351202, "grad_norm": 0.7491565281893544, "learning_rate": 3.312901342673672e-05, "loss": 0.7536, "step": 2270 }, { "epoch": 0.06630463344135938, "grad_norm": 0.7037000415156637, "learning_rate": 3.314360770577933e-05, "loss": 0.7097, "step": 2271 }, { "epoch": 0.06633382966920674, "grad_norm": 0.876236705304497, "learning_rate": 3.315820198482195e-05, "loss": 0.7642, "step": 2272 }, { "epoch": 0.0663630258970541, "grad_norm": 0.8006851280746902, "learning_rate": 3.3172796263864564e-05, "loss": 0.7202, "step": 2273 }, { "epoch": 0.06639222212490146, "grad_norm": 0.8312820253580956, "learning_rate": 3.318739054290718e-05, "loss": 0.8395, "step": 2274 }, { "epoch": 0.06642141835274883, "grad_norm": 0.8012287712393078, "learning_rate": 3.32019848219498e-05, "loss": 0.6568, "step": 2275 }, { "epoch": 0.06645061458059619, "grad_norm": 0.7478143456731708, "learning_rate": 3.3216579100992415e-05, "loss": 0.7266, "step": 2276 }, { "epoch": 0.06647981080844355, "grad_norm": 0.8725541124725389, "learning_rate": 3.323117338003503e-05, "loss": 0.8356, "step": 2277 }, { "epoch": 0.06650900703629091, "grad_norm": 0.6718954400241065, "learning_rate": 3.324576765907764e-05, "loss": 0.6756, "step": 2278 }, { "epoch": 0.06653820326413827, "grad_norm": 0.8033979785734207, "learning_rate": 3.326036193812026e-05, "loss": 0.7579, "step": 2279 }, { "epoch": 0.06656739949198563, "grad_norm": 0.7653007918936494, "learning_rate": 3.327495621716287e-05, "loss": 0.7239, "step": 2280 }, { "epoch": 0.066596595719833, "grad_norm": 0.772377196398108, "learning_rate": 3.328955049620549e-05, "loss": 0.7028, "step": 2281 }, { "epoch": 0.06662579194768035, "grad_norm": 0.6801825310600387, "learning_rate": 3.33041447752481e-05, "loss": 0.6539, "step": 2282 }, { "epoch": 0.06665498817552772, "grad_norm": 0.7604001522881256, "learning_rate": 3.331873905429072e-05, "loss": 0.7403, "step": 2283 }, { "epoch": 0.06668418440337509, "grad_norm": 0.7515045222461417, "learning_rate": 3.3333333333333335e-05, "loss": 0.7298, "step": 2284 }, { "epoch": 0.06671338063122245, "grad_norm": 0.8677326773177171, "learning_rate": 3.3347927612375954e-05, "loss": 0.6879, "step": 2285 }, { "epoch": 0.06674257685906981, "grad_norm": 0.6391771683445873, "learning_rate": 3.336252189141857e-05, "loss": 0.6351, "step": 2286 }, { "epoch": 0.06677177308691717, "grad_norm": 0.6834177164787654, "learning_rate": 3.337711617046118e-05, "loss": 0.6582, "step": 2287 }, { "epoch": 0.06680096931476454, "grad_norm": 0.6712692848162338, "learning_rate": 3.339171044950379e-05, "loss": 0.6442, "step": 2288 }, { "epoch": 0.0668301655426119, "grad_norm": 0.7021733825927473, "learning_rate": 3.340630472854641e-05, "loss": 0.6949, "step": 2289 }, { "epoch": 0.06685936177045926, "grad_norm": 0.7324346426198555, "learning_rate": 3.342089900758903e-05, "loss": 0.6876, "step": 2290 }, { "epoch": 0.06688855799830662, "grad_norm": 0.7915803963265177, "learning_rate": 3.343549328663164e-05, "loss": 0.7861, "step": 2291 }, { "epoch": 0.06691775422615398, "grad_norm": 0.8485628140728282, "learning_rate": 3.345008756567426e-05, "loss": 0.5989, "step": 2292 }, { "epoch": 0.06694695045400134, "grad_norm": 0.8046228990426139, "learning_rate": 3.3464681844716874e-05, "loss": 0.7485, "step": 2293 }, { "epoch": 0.0669761466818487, "grad_norm": 0.7487755546609911, "learning_rate": 3.347927612375949e-05, "loss": 0.6713, "step": 2294 }, { "epoch": 0.06700534290969606, "grad_norm": 0.9190400795098749, "learning_rate": 3.34938704028021e-05, "loss": 0.7307, "step": 2295 }, { "epoch": 0.06703453913754343, "grad_norm": 0.6931291221876795, "learning_rate": 3.350846468184472e-05, "loss": 0.64, "step": 2296 }, { "epoch": 0.06706373536539079, "grad_norm": 0.7111055178316223, "learning_rate": 3.352305896088733e-05, "loss": 0.6852, "step": 2297 }, { "epoch": 0.06709293159323815, "grad_norm": 0.7790636134017412, "learning_rate": 3.353765323992995e-05, "loss": 0.7745, "step": 2298 }, { "epoch": 0.06712212782108552, "grad_norm": 0.68108574669559, "learning_rate": 3.355224751897256e-05, "loss": 0.6741, "step": 2299 }, { "epoch": 0.06715132404893288, "grad_norm": 0.7558149501998421, "learning_rate": 3.356684179801518e-05, "loss": 0.7232, "step": 2300 }, { "epoch": 0.06718052027678025, "grad_norm": 1.0687854739776295, "learning_rate": 3.3581436077057794e-05, "loss": 0.8148, "step": 2301 }, { "epoch": 0.0672097165046276, "grad_norm": 0.7378946628040478, "learning_rate": 3.3596030356100414e-05, "loss": 0.6935, "step": 2302 }, { "epoch": 0.06723891273247497, "grad_norm": 0.734499916152112, "learning_rate": 3.3610624635143026e-05, "loss": 0.688, "step": 2303 }, { "epoch": 0.06726810896032233, "grad_norm": 0.7110941042009755, "learning_rate": 3.362521891418564e-05, "loss": 0.712, "step": 2304 }, { "epoch": 0.06729730518816969, "grad_norm": 0.7311817288519561, "learning_rate": 3.363981319322825e-05, "loss": 0.6389, "step": 2305 }, { "epoch": 0.06732650141601705, "grad_norm": 0.798700839738964, "learning_rate": 3.365440747227087e-05, "loss": 0.7727, "step": 2306 }, { "epoch": 0.06735569764386441, "grad_norm": 0.7033166051129558, "learning_rate": 3.366900175131349e-05, "loss": 0.6921, "step": 2307 }, { "epoch": 0.06738489387171177, "grad_norm": 0.7369931442417635, "learning_rate": 3.36835960303561e-05, "loss": 0.7193, "step": 2308 }, { "epoch": 0.06741409009955913, "grad_norm": 0.7849155862202463, "learning_rate": 3.369819030939872e-05, "loss": 0.7678, "step": 2309 }, { "epoch": 0.0674432863274065, "grad_norm": 0.870155910847465, "learning_rate": 3.3712784588441334e-05, "loss": 0.6878, "step": 2310 }, { "epoch": 0.06747248255525386, "grad_norm": 0.6687789839637285, "learning_rate": 3.372737886748395e-05, "loss": 0.6344, "step": 2311 }, { "epoch": 0.06750167878310122, "grad_norm": 0.7295774866532074, "learning_rate": 3.3741973146526565e-05, "loss": 0.7476, "step": 2312 }, { "epoch": 0.06753087501094858, "grad_norm": 0.815565782695129, "learning_rate": 3.375656742556918e-05, "loss": 0.7331, "step": 2313 }, { "epoch": 0.06756007123879594, "grad_norm": 1.0201523503274226, "learning_rate": 3.377116170461179e-05, "loss": 0.8613, "step": 2314 }, { "epoch": 0.06758926746664332, "grad_norm": 0.6901735581192255, "learning_rate": 3.378575598365441e-05, "loss": 0.6091, "step": 2315 }, { "epoch": 0.06761846369449068, "grad_norm": 0.8971278247132902, "learning_rate": 3.380035026269702e-05, "loss": 0.7093, "step": 2316 }, { "epoch": 0.06764765992233804, "grad_norm": 0.7103989784299505, "learning_rate": 3.381494454173964e-05, "loss": 0.6938, "step": 2317 }, { "epoch": 0.0676768561501854, "grad_norm": 2.9269904574714674, "learning_rate": 3.3829538820782254e-05, "loss": 0.8182, "step": 2318 }, { "epoch": 0.06770605237803276, "grad_norm": 0.8060006258446334, "learning_rate": 3.384413309982487e-05, "loss": 0.8483, "step": 2319 }, { "epoch": 0.06773524860588012, "grad_norm": 0.7447656759062202, "learning_rate": 3.3858727378867485e-05, "loss": 0.7082, "step": 2320 }, { "epoch": 0.06776444483372748, "grad_norm": 0.7344156838295194, "learning_rate": 3.38733216579101e-05, "loss": 0.6075, "step": 2321 }, { "epoch": 0.06779364106157484, "grad_norm": 0.8571098573505704, "learning_rate": 3.388791593695272e-05, "loss": 0.7833, "step": 2322 }, { "epoch": 0.0678228372894222, "grad_norm": 0.7606728253372375, "learning_rate": 3.390251021599533e-05, "loss": 0.7403, "step": 2323 }, { "epoch": 0.06785203351726957, "grad_norm": 0.6999017492460222, "learning_rate": 3.391710449503795e-05, "loss": 0.6898, "step": 2324 }, { "epoch": 0.06788122974511693, "grad_norm": 0.8421136133354492, "learning_rate": 3.393169877408056e-05, "loss": 0.8181, "step": 2325 }, { "epoch": 0.06791042597296429, "grad_norm": 0.706771094836013, "learning_rate": 3.394629305312318e-05, "loss": 0.6694, "step": 2326 }, { "epoch": 0.06793962220081165, "grad_norm": 0.735327545109543, "learning_rate": 3.396088733216579e-05, "loss": 0.7172, "step": 2327 }, { "epoch": 0.06796881842865901, "grad_norm": 0.7316404116148107, "learning_rate": 3.397548161120841e-05, "loss": 0.6493, "step": 2328 }, { "epoch": 0.06799801465650637, "grad_norm": 0.8165546371619177, "learning_rate": 3.3990075890251025e-05, "loss": 0.6758, "step": 2329 }, { "epoch": 0.06802721088435375, "grad_norm": 0.6806535284849261, "learning_rate": 3.400467016929364e-05, "loss": 0.6254, "step": 2330 }, { "epoch": 0.06805640711220111, "grad_norm": 0.7515164134137542, "learning_rate": 3.401926444833625e-05, "loss": 0.6952, "step": 2331 }, { "epoch": 0.06808560334004847, "grad_norm": 0.7025778951748654, "learning_rate": 3.403385872737887e-05, "loss": 0.6354, "step": 2332 }, { "epoch": 0.06811479956789583, "grad_norm": 0.7717138140918478, "learning_rate": 3.404845300642148e-05, "loss": 0.7682, "step": 2333 }, { "epoch": 0.06814399579574319, "grad_norm": 0.6177095233172556, "learning_rate": 3.40630472854641e-05, "loss": 0.5318, "step": 2334 }, { "epoch": 0.06817319202359055, "grad_norm": 0.6784821066265346, "learning_rate": 3.407764156450671e-05, "loss": 0.6141, "step": 2335 }, { "epoch": 0.06820238825143791, "grad_norm": 0.7083887307065327, "learning_rate": 3.409223584354933e-05, "loss": 0.6655, "step": 2336 }, { "epoch": 0.06823158447928528, "grad_norm": 0.7160002210339611, "learning_rate": 3.4106830122591945e-05, "loss": 0.6556, "step": 2337 }, { "epoch": 0.06826078070713264, "grad_norm": 0.7258368747046045, "learning_rate": 3.4121424401634564e-05, "loss": 0.7482, "step": 2338 }, { "epoch": 0.06828997693498, "grad_norm": 0.7187323193727249, "learning_rate": 3.4136018680677176e-05, "loss": 0.6393, "step": 2339 }, { "epoch": 0.06831917316282736, "grad_norm": 0.7809927091440283, "learning_rate": 3.415061295971979e-05, "loss": 0.7698, "step": 2340 }, { "epoch": 0.06834836939067472, "grad_norm": 0.7758860529609162, "learning_rate": 3.416520723876241e-05, "loss": 0.731, "step": 2341 }, { "epoch": 0.06837756561852208, "grad_norm": 0.7049885925588651, "learning_rate": 3.417980151780502e-05, "loss": 0.6682, "step": 2342 }, { "epoch": 0.06840676184636944, "grad_norm": 0.7668321763673687, "learning_rate": 3.419439579684764e-05, "loss": 0.6701, "step": 2343 }, { "epoch": 0.0684359580742168, "grad_norm": 0.7292566451606158, "learning_rate": 3.420899007589025e-05, "loss": 0.7195, "step": 2344 }, { "epoch": 0.06846515430206418, "grad_norm": 0.7750973671841836, "learning_rate": 3.422358435493287e-05, "loss": 0.7468, "step": 2345 }, { "epoch": 0.06849435052991154, "grad_norm": 0.6955771427291817, "learning_rate": 3.4238178633975484e-05, "loss": 0.6592, "step": 2346 }, { "epoch": 0.0685235467577589, "grad_norm": 0.6961652443847979, "learning_rate": 3.4252772913018096e-05, "loss": 0.6447, "step": 2347 }, { "epoch": 0.06855274298560626, "grad_norm": 0.6983125437938188, "learning_rate": 3.426736719206071e-05, "loss": 0.6378, "step": 2348 }, { "epoch": 0.06858193921345362, "grad_norm": 0.7667012064697599, "learning_rate": 3.428196147110333e-05, "loss": 0.7659, "step": 2349 }, { "epoch": 0.06861113544130099, "grad_norm": 0.7051004207903763, "learning_rate": 3.429655575014594e-05, "loss": 0.6683, "step": 2350 }, { "epoch": 0.06864033166914835, "grad_norm": 0.7094679043383136, "learning_rate": 3.431115002918856e-05, "loss": 0.5846, "step": 2351 }, { "epoch": 0.06866952789699571, "grad_norm": 0.7543723090154048, "learning_rate": 3.432574430823117e-05, "loss": 0.7168, "step": 2352 }, { "epoch": 0.06869872412484307, "grad_norm": 0.8284062132215733, "learning_rate": 3.434033858727379e-05, "loss": 0.7873, "step": 2353 }, { "epoch": 0.06872792035269043, "grad_norm": 0.6833522999382388, "learning_rate": 3.435493286631641e-05, "loss": 0.6193, "step": 2354 }, { "epoch": 0.06875711658053779, "grad_norm": 0.686307932887237, "learning_rate": 3.436952714535902e-05, "loss": 0.645, "step": 2355 }, { "epoch": 0.06878631280838515, "grad_norm": 0.7762855175124777, "learning_rate": 3.4384121424401636e-05, "loss": 0.7491, "step": 2356 }, { "epoch": 0.06881550903623251, "grad_norm": 0.7787324818915715, "learning_rate": 3.439871570344425e-05, "loss": 0.7964, "step": 2357 }, { "epoch": 0.06884470526407988, "grad_norm": 0.726267020003285, "learning_rate": 3.441330998248687e-05, "loss": 0.7363, "step": 2358 }, { "epoch": 0.06887390149192724, "grad_norm": 0.6708158524357544, "learning_rate": 3.442790426152948e-05, "loss": 0.7026, "step": 2359 }, { "epoch": 0.06890309771977461, "grad_norm": 0.7465037755667853, "learning_rate": 3.44424985405721e-05, "loss": 0.7191, "step": 2360 }, { "epoch": 0.06893229394762197, "grad_norm": 0.7357365906197821, "learning_rate": 3.445709281961471e-05, "loss": 0.6601, "step": 2361 }, { "epoch": 0.06896149017546933, "grad_norm": 0.6869504356087326, "learning_rate": 3.447168709865733e-05, "loss": 0.5988, "step": 2362 }, { "epoch": 0.0689906864033167, "grad_norm": 0.7352326966589238, "learning_rate": 3.448628137769994e-05, "loss": 0.6452, "step": 2363 }, { "epoch": 0.06901988263116406, "grad_norm": 0.7522678676664795, "learning_rate": 3.450087565674256e-05, "loss": 0.7587, "step": 2364 }, { "epoch": 0.06904907885901142, "grad_norm": 0.7716386729665498, "learning_rate": 3.4515469935785175e-05, "loss": 0.7856, "step": 2365 }, { "epoch": 0.06907827508685878, "grad_norm": 0.7358076224298907, "learning_rate": 3.453006421482779e-05, "loss": 0.7616, "step": 2366 }, { "epoch": 0.06910747131470614, "grad_norm": 0.6812971143619514, "learning_rate": 3.45446584938704e-05, "loss": 0.6458, "step": 2367 }, { "epoch": 0.0691366675425535, "grad_norm": 0.6805464449064813, "learning_rate": 3.455925277291302e-05, "loss": 0.6425, "step": 2368 }, { "epoch": 0.06916586377040086, "grad_norm": 0.6788551391609845, "learning_rate": 3.457384705195563e-05, "loss": 0.673, "step": 2369 }, { "epoch": 0.06919505999824822, "grad_norm": 0.8056346340434887, "learning_rate": 3.458844133099825e-05, "loss": 0.7326, "step": 2370 }, { "epoch": 0.06922425622609558, "grad_norm": 0.719452777435608, "learning_rate": 3.460303561004087e-05, "loss": 0.7332, "step": 2371 }, { "epoch": 0.06925345245394295, "grad_norm": 0.6968876409722624, "learning_rate": 3.461762988908348e-05, "loss": 0.6616, "step": 2372 }, { "epoch": 0.06928264868179031, "grad_norm": 0.6949833924520284, "learning_rate": 3.4632224168126095e-05, "loss": 0.6542, "step": 2373 }, { "epoch": 0.06931184490963767, "grad_norm": 0.7564689839809563, "learning_rate": 3.464681844716871e-05, "loss": 0.6888, "step": 2374 }, { "epoch": 0.06934104113748504, "grad_norm": 0.7175980497960572, "learning_rate": 3.466141272621133e-05, "loss": 0.754, "step": 2375 }, { "epoch": 0.0693702373653324, "grad_norm": 0.6634244194444763, "learning_rate": 3.467600700525394e-05, "loss": 0.5859, "step": 2376 }, { "epoch": 0.06939943359317977, "grad_norm": 0.7759412192359006, "learning_rate": 3.469060128429656e-05, "loss": 0.7128, "step": 2377 }, { "epoch": 0.06942862982102713, "grad_norm": 0.7268443718067356, "learning_rate": 3.470519556333917e-05, "loss": 0.6366, "step": 2378 }, { "epoch": 0.06945782604887449, "grad_norm": 0.6421470084893085, "learning_rate": 3.471978984238179e-05, "loss": 0.5723, "step": 2379 }, { "epoch": 0.06948702227672185, "grad_norm": 0.7631105437616322, "learning_rate": 3.47343841214244e-05, "loss": 0.7428, "step": 2380 }, { "epoch": 0.06951621850456921, "grad_norm": 0.7949703932263059, "learning_rate": 3.474897840046702e-05, "loss": 0.7386, "step": 2381 }, { "epoch": 0.06954541473241657, "grad_norm": 1.363236775980861, "learning_rate": 3.4763572679509634e-05, "loss": 0.7283, "step": 2382 }, { "epoch": 0.06957461096026393, "grad_norm": 0.7770756198315075, "learning_rate": 3.477816695855225e-05, "loss": 0.7062, "step": 2383 }, { "epoch": 0.0696038071881113, "grad_norm": 0.9543612756837097, "learning_rate": 3.479276123759486e-05, "loss": 0.8006, "step": 2384 }, { "epoch": 0.06963300341595866, "grad_norm": 0.7516950065154715, "learning_rate": 3.480735551663748e-05, "loss": 0.7093, "step": 2385 }, { "epoch": 0.06966219964380602, "grad_norm": 0.7388290681717822, "learning_rate": 3.48219497956801e-05, "loss": 0.7692, "step": 2386 }, { "epoch": 0.06969139587165338, "grad_norm": 0.6815266598286451, "learning_rate": 3.483654407472271e-05, "loss": 0.7055, "step": 2387 }, { "epoch": 0.06972059209950074, "grad_norm": 0.723897896761063, "learning_rate": 3.485113835376533e-05, "loss": 0.6388, "step": 2388 }, { "epoch": 0.0697497883273481, "grad_norm": 0.7255307010871038, "learning_rate": 3.486573263280794e-05, "loss": 0.7124, "step": 2389 }, { "epoch": 0.06977898455519548, "grad_norm": 0.7511313059725191, "learning_rate": 3.488032691185056e-05, "loss": 0.6878, "step": 2390 }, { "epoch": 0.06980818078304284, "grad_norm": 0.715811131247369, "learning_rate": 3.4894921190893174e-05, "loss": 0.6862, "step": 2391 }, { "epoch": 0.0698373770108902, "grad_norm": 0.7552761393506647, "learning_rate": 3.4909515469935786e-05, "loss": 0.6871, "step": 2392 }, { "epoch": 0.06986657323873756, "grad_norm": 0.7000279310375079, "learning_rate": 3.49241097489784e-05, "loss": 0.6849, "step": 2393 }, { "epoch": 0.06989576946658492, "grad_norm": 0.7520584022530078, "learning_rate": 3.493870402802102e-05, "loss": 0.7019, "step": 2394 }, { "epoch": 0.06992496569443228, "grad_norm": 0.7232320988606273, "learning_rate": 3.495329830706363e-05, "loss": 0.6314, "step": 2395 }, { "epoch": 0.06995416192227964, "grad_norm": 0.7195813727522783, "learning_rate": 3.496789258610625e-05, "loss": 0.6492, "step": 2396 }, { "epoch": 0.069983358150127, "grad_norm": 0.7047309320542212, "learning_rate": 3.498248686514886e-05, "loss": 0.6409, "step": 2397 }, { "epoch": 0.07001255437797436, "grad_norm": 0.9434939013029261, "learning_rate": 3.499708114419148e-05, "loss": 0.7565, "step": 2398 }, { "epoch": 0.07004175060582173, "grad_norm": 0.8110933107223928, "learning_rate": 3.5011675423234094e-05, "loss": 0.6923, "step": 2399 }, { "epoch": 0.07007094683366909, "grad_norm": 0.750305780338925, "learning_rate": 3.5026269702276706e-05, "loss": 0.675, "step": 2400 }, { "epoch": 0.07010014306151645, "grad_norm": 0.7122924366477804, "learning_rate": 3.504086398131932e-05, "loss": 0.7109, "step": 2401 }, { "epoch": 0.07012933928936381, "grad_norm": 0.7036356255583971, "learning_rate": 3.505545826036194e-05, "loss": 0.6627, "step": 2402 }, { "epoch": 0.07015853551721117, "grad_norm": 0.7680744727543365, "learning_rate": 3.507005253940456e-05, "loss": 0.7166, "step": 2403 }, { "epoch": 0.07018773174505853, "grad_norm": 0.901645371485971, "learning_rate": 3.508464681844717e-05, "loss": 0.7155, "step": 2404 }, { "epoch": 0.07021692797290591, "grad_norm": 0.7541958985516589, "learning_rate": 3.509924109748979e-05, "loss": 0.6493, "step": 2405 }, { "epoch": 0.07024612420075327, "grad_norm": 0.669499591175821, "learning_rate": 3.51138353765324e-05, "loss": 0.6508, "step": 2406 }, { "epoch": 0.07027532042860063, "grad_norm": 0.7360292340210765, "learning_rate": 3.512842965557502e-05, "loss": 0.7103, "step": 2407 }, { "epoch": 0.07030451665644799, "grad_norm": 0.7978278855823279, "learning_rate": 3.514302393461763e-05, "loss": 0.7696, "step": 2408 }, { "epoch": 0.07033371288429535, "grad_norm": 0.7902005189689222, "learning_rate": 3.5157618213660245e-05, "loss": 0.7519, "step": 2409 }, { "epoch": 0.07036290911214271, "grad_norm": 0.7421294432742267, "learning_rate": 3.517221249270286e-05, "loss": 0.6976, "step": 2410 }, { "epoch": 0.07039210533999007, "grad_norm": 0.6852985100789182, "learning_rate": 3.518680677174548e-05, "loss": 0.7052, "step": 2411 }, { "epoch": 0.07042130156783744, "grad_norm": 0.6966076817890717, "learning_rate": 3.520140105078809e-05, "loss": 0.6632, "step": 2412 }, { "epoch": 0.0704504977956848, "grad_norm": 0.7313686839091058, "learning_rate": 3.521599532983071e-05, "loss": 0.7461, "step": 2413 }, { "epoch": 0.07047969402353216, "grad_norm": 0.6933674827989038, "learning_rate": 3.523058960887332e-05, "loss": 0.6829, "step": 2414 }, { "epoch": 0.07050889025137952, "grad_norm": 0.7832826906047036, "learning_rate": 3.524518388791594e-05, "loss": 0.7994, "step": 2415 }, { "epoch": 0.07053808647922688, "grad_norm": 0.8218556933834603, "learning_rate": 3.525977816695855e-05, "loss": 0.729, "step": 2416 }, { "epoch": 0.07056728270707424, "grad_norm": 0.690526887008567, "learning_rate": 3.527437244600117e-05, "loss": 0.6418, "step": 2417 }, { "epoch": 0.0705964789349216, "grad_norm": 0.7727963116039342, "learning_rate": 3.5288966725043785e-05, "loss": 0.6672, "step": 2418 }, { "epoch": 0.07062567516276896, "grad_norm": 0.7509291560299646, "learning_rate": 3.53035610040864e-05, "loss": 0.709, "step": 2419 }, { "epoch": 0.07065487139061634, "grad_norm": 0.6628848788807281, "learning_rate": 3.5318155283129016e-05, "loss": 0.6311, "step": 2420 }, { "epoch": 0.0706840676184637, "grad_norm": 0.8029905244123521, "learning_rate": 3.533274956217163e-05, "loss": 0.726, "step": 2421 }, { "epoch": 0.07071326384631106, "grad_norm": 0.7360005530664878, "learning_rate": 3.534734384121425e-05, "loss": 0.7274, "step": 2422 }, { "epoch": 0.07074246007415842, "grad_norm": 0.737310141531055, "learning_rate": 3.536193812025686e-05, "loss": 0.7634, "step": 2423 }, { "epoch": 0.07077165630200578, "grad_norm": 0.7507191101363182, "learning_rate": 3.537653239929948e-05, "loss": 0.7707, "step": 2424 }, { "epoch": 0.07080085252985314, "grad_norm": 0.8154210562351205, "learning_rate": 3.539112667834209e-05, "loss": 0.7637, "step": 2425 }, { "epoch": 0.0708300487577005, "grad_norm": 0.7133784465983369, "learning_rate": 3.540572095738471e-05, "loss": 0.6347, "step": 2426 }, { "epoch": 0.07085924498554787, "grad_norm": 0.7605561907562918, "learning_rate": 3.542031523642732e-05, "loss": 0.708, "step": 2427 }, { "epoch": 0.07088844121339523, "grad_norm": 0.7345893616886001, "learning_rate": 3.5434909515469936e-05, "loss": 0.7125, "step": 2428 }, { "epoch": 0.07091763744124259, "grad_norm": 0.709395309723574, "learning_rate": 3.544950379451255e-05, "loss": 0.5849, "step": 2429 }, { "epoch": 0.07094683366908995, "grad_norm": 0.7509673108562123, "learning_rate": 3.546409807355517e-05, "loss": 0.7042, "step": 2430 }, { "epoch": 0.07097602989693731, "grad_norm": 0.8024570479289648, "learning_rate": 3.547869235259778e-05, "loss": 0.7246, "step": 2431 }, { "epoch": 0.07100522612478467, "grad_norm": 0.7689390288326542, "learning_rate": 3.54932866316404e-05, "loss": 0.7248, "step": 2432 }, { "epoch": 0.07103442235263203, "grad_norm": 0.7151027438352568, "learning_rate": 3.550788091068301e-05, "loss": 0.6617, "step": 2433 }, { "epoch": 0.0710636185804794, "grad_norm": 0.7219427761286475, "learning_rate": 3.552247518972563e-05, "loss": 0.6155, "step": 2434 }, { "epoch": 0.07109281480832677, "grad_norm": 0.6898990575425964, "learning_rate": 3.5537069468768244e-05, "loss": 0.7202, "step": 2435 }, { "epoch": 0.07112201103617413, "grad_norm": 0.726956410902517, "learning_rate": 3.5551663747810857e-05, "loss": 0.7388, "step": 2436 }, { "epoch": 0.0711512072640215, "grad_norm": 0.7167595927757541, "learning_rate": 3.5566258026853476e-05, "loss": 0.7163, "step": 2437 }, { "epoch": 0.07118040349186885, "grad_norm": 0.7113589924386241, "learning_rate": 3.558085230589609e-05, "loss": 0.6427, "step": 2438 }, { "epoch": 0.07120959971971622, "grad_norm": 0.7196875533022122, "learning_rate": 3.559544658493871e-05, "loss": 0.6857, "step": 2439 }, { "epoch": 0.07123879594756358, "grad_norm": 0.7278171114836085, "learning_rate": 3.561004086398132e-05, "loss": 0.6641, "step": 2440 }, { "epoch": 0.07126799217541094, "grad_norm": 0.990880872360085, "learning_rate": 3.562463514302394e-05, "loss": 0.7546, "step": 2441 }, { "epoch": 0.0712971884032583, "grad_norm": 0.8513206712486745, "learning_rate": 3.563922942206655e-05, "loss": 0.7749, "step": 2442 }, { "epoch": 0.07132638463110566, "grad_norm": 0.7964220252751858, "learning_rate": 3.565382370110917e-05, "loss": 0.6794, "step": 2443 }, { "epoch": 0.07135558085895302, "grad_norm": 0.9897169824088377, "learning_rate": 3.566841798015178e-05, "loss": 0.807, "step": 2444 }, { "epoch": 0.07138477708680038, "grad_norm": 0.8002101290153734, "learning_rate": 3.5683012259194396e-05, "loss": 0.6926, "step": 2445 }, { "epoch": 0.07141397331464774, "grad_norm": 0.6906589555972941, "learning_rate": 3.569760653823701e-05, "loss": 0.5905, "step": 2446 }, { "epoch": 0.0714431695424951, "grad_norm": 0.6916860391956565, "learning_rate": 3.571220081727963e-05, "loss": 0.6418, "step": 2447 }, { "epoch": 0.07147236577034247, "grad_norm": 0.8271632718874454, "learning_rate": 3.572679509632224e-05, "loss": 0.7392, "step": 2448 }, { "epoch": 0.07150156199818983, "grad_norm": 0.8757431789450403, "learning_rate": 3.574138937536486e-05, "loss": 0.7268, "step": 2449 }, { "epoch": 0.0715307582260372, "grad_norm": 0.7034188378596902, "learning_rate": 3.575598365440748e-05, "loss": 0.6839, "step": 2450 }, { "epoch": 0.07155995445388456, "grad_norm": 0.7036807620863024, "learning_rate": 3.577057793345009e-05, "loss": 0.6528, "step": 2451 }, { "epoch": 0.07158915068173193, "grad_norm": 0.7049837146218259, "learning_rate": 3.578517221249271e-05, "loss": 0.6564, "step": 2452 }, { "epoch": 0.07161834690957929, "grad_norm": 0.7275288381690351, "learning_rate": 3.5799766491535316e-05, "loss": 0.7741, "step": 2453 }, { "epoch": 0.07164754313742665, "grad_norm": 0.6669379944600157, "learning_rate": 3.5814360770577935e-05, "loss": 0.6512, "step": 2454 }, { "epoch": 0.07167673936527401, "grad_norm": 0.7169227649853576, "learning_rate": 3.582895504962055e-05, "loss": 0.6819, "step": 2455 }, { "epoch": 0.07170593559312137, "grad_norm": 0.7641633241415186, "learning_rate": 3.584354932866317e-05, "loss": 0.6632, "step": 2456 }, { "epoch": 0.07173513182096873, "grad_norm": 0.8027133666522249, "learning_rate": 3.585814360770578e-05, "loss": 0.7557, "step": 2457 }, { "epoch": 0.07176432804881609, "grad_norm": 0.7522126571876181, "learning_rate": 3.58727378867484e-05, "loss": 0.758, "step": 2458 }, { "epoch": 0.07179352427666345, "grad_norm": 0.7069927837273662, "learning_rate": 3.588733216579101e-05, "loss": 0.6821, "step": 2459 }, { "epoch": 0.07182272050451081, "grad_norm": 0.6983179826694901, "learning_rate": 3.590192644483363e-05, "loss": 0.7389, "step": 2460 }, { "epoch": 0.07185191673235818, "grad_norm": 0.662755361063353, "learning_rate": 3.591652072387624e-05, "loss": 0.6391, "step": 2461 }, { "epoch": 0.07188111296020554, "grad_norm": 0.7031539126870898, "learning_rate": 3.5931115002918855e-05, "loss": 0.7035, "step": 2462 }, { "epoch": 0.0719103091880529, "grad_norm": 0.640022520811532, "learning_rate": 3.594570928196147e-05, "loss": 0.6043, "step": 2463 }, { "epoch": 0.07193950541590026, "grad_norm": 0.7761373918546948, "learning_rate": 3.596030356100409e-05, "loss": 0.7474, "step": 2464 }, { "epoch": 0.07196870164374763, "grad_norm": 0.7085205792287487, "learning_rate": 3.59748978400467e-05, "loss": 0.6603, "step": 2465 }, { "epoch": 0.071997897871595, "grad_norm": 0.706162475898177, "learning_rate": 3.598949211908932e-05, "loss": 0.6687, "step": 2466 }, { "epoch": 0.07202709409944236, "grad_norm": 0.7686602193400559, "learning_rate": 3.600408639813194e-05, "loss": 0.7607, "step": 2467 }, { "epoch": 0.07205629032728972, "grad_norm": 0.9495197831998932, "learning_rate": 3.601868067717455e-05, "loss": 0.7895, "step": 2468 }, { "epoch": 0.07208548655513708, "grad_norm": 0.7646290030109515, "learning_rate": 3.603327495621717e-05, "loss": 0.7172, "step": 2469 }, { "epoch": 0.07211468278298444, "grad_norm": 0.6515625184279527, "learning_rate": 3.604786923525978e-05, "loss": 0.6975, "step": 2470 }, { "epoch": 0.0721438790108318, "grad_norm": 0.713510957685935, "learning_rate": 3.6062463514302394e-05, "loss": 0.6793, "step": 2471 }, { "epoch": 0.07217307523867916, "grad_norm": 0.7297497275060748, "learning_rate": 3.607705779334501e-05, "loss": 0.7033, "step": 2472 }, { "epoch": 0.07220227146652652, "grad_norm": 0.6994620438142858, "learning_rate": 3.6091652072387626e-05, "loss": 0.7161, "step": 2473 }, { "epoch": 0.07223146769437389, "grad_norm": 0.7625432017704616, "learning_rate": 3.610624635143024e-05, "loss": 0.7227, "step": 2474 }, { "epoch": 0.07226066392222125, "grad_norm": 0.7005225037636746, "learning_rate": 3.612084063047286e-05, "loss": 0.6612, "step": 2475 }, { "epoch": 0.07228986015006861, "grad_norm": 0.6854678851875442, "learning_rate": 3.613543490951547e-05, "loss": 0.6618, "step": 2476 }, { "epoch": 0.07231905637791597, "grad_norm": 0.7213724802102216, "learning_rate": 3.615002918855809e-05, "loss": 0.7055, "step": 2477 }, { "epoch": 0.07234825260576333, "grad_norm": 0.9738016035500723, "learning_rate": 3.61646234676007e-05, "loss": 0.8075, "step": 2478 }, { "epoch": 0.07237744883361069, "grad_norm": 0.7260153886481178, "learning_rate": 3.617921774664332e-05, "loss": 0.6128, "step": 2479 }, { "epoch": 0.07240664506145807, "grad_norm": 0.6926366505488875, "learning_rate": 3.619381202568593e-05, "loss": 0.64, "step": 2480 }, { "epoch": 0.07243584128930543, "grad_norm": 0.7478047070557247, "learning_rate": 3.6208406304728546e-05, "loss": 0.697, "step": 2481 }, { "epoch": 0.07246503751715279, "grad_norm": 0.8092080828873202, "learning_rate": 3.6223000583771165e-05, "loss": 0.779, "step": 2482 }, { "epoch": 0.07249423374500015, "grad_norm": 0.7079404635535297, "learning_rate": 3.623759486281378e-05, "loss": 0.6933, "step": 2483 }, { "epoch": 0.07252342997284751, "grad_norm": 0.893397011189462, "learning_rate": 3.62521891418564e-05, "loss": 0.7883, "step": 2484 }, { "epoch": 0.07255262620069487, "grad_norm": 0.7616870647961413, "learning_rate": 3.626678342089901e-05, "loss": 0.7768, "step": 2485 }, { "epoch": 0.07258182242854223, "grad_norm": 0.7694092076461605, "learning_rate": 3.628137769994163e-05, "loss": 0.6821, "step": 2486 }, { "epoch": 0.0726110186563896, "grad_norm": 0.7302007721282999, "learning_rate": 3.629597197898424e-05, "loss": 0.7431, "step": 2487 }, { "epoch": 0.07264021488423696, "grad_norm": 0.6683670918613054, "learning_rate": 3.6310566258026854e-05, "loss": 0.6732, "step": 2488 }, { "epoch": 0.07266941111208432, "grad_norm": 0.7496800773611814, "learning_rate": 3.6325160537069466e-05, "loss": 0.7552, "step": 2489 }, { "epoch": 0.07269860733993168, "grad_norm": 0.996183219561031, "learning_rate": 3.6339754816112086e-05, "loss": 0.6958, "step": 2490 }, { "epoch": 0.07272780356777904, "grad_norm": 0.664881182973869, "learning_rate": 3.63543490951547e-05, "loss": 0.608, "step": 2491 }, { "epoch": 0.0727569997956264, "grad_norm": 0.7192594940340128, "learning_rate": 3.636894337419732e-05, "loss": 0.6966, "step": 2492 }, { "epoch": 0.07278619602347376, "grad_norm": 0.6865967571241519, "learning_rate": 3.638353765323993e-05, "loss": 0.6881, "step": 2493 }, { "epoch": 0.07281539225132112, "grad_norm": 0.7069962165408511, "learning_rate": 3.639813193228255e-05, "loss": 0.7278, "step": 2494 }, { "epoch": 0.0728445884791685, "grad_norm": 0.7254982530488797, "learning_rate": 3.641272621132516e-05, "loss": 0.6467, "step": 2495 }, { "epoch": 0.07287378470701586, "grad_norm": 0.6721398723237956, "learning_rate": 3.642732049036778e-05, "loss": 0.638, "step": 2496 }, { "epoch": 0.07290298093486322, "grad_norm": 0.7369322914956107, "learning_rate": 3.644191476941039e-05, "loss": 0.604, "step": 2497 }, { "epoch": 0.07293217716271058, "grad_norm": 0.7312942988525686, "learning_rate": 3.6456509048453006e-05, "loss": 0.6627, "step": 2498 }, { "epoch": 0.07296137339055794, "grad_norm": 0.6955239998556548, "learning_rate": 3.6471103327495625e-05, "loss": 0.6331, "step": 2499 }, { "epoch": 0.0729905696184053, "grad_norm": 0.9180796960006089, "learning_rate": 3.648569760653824e-05, "loss": 0.6499, "step": 2500 }, { "epoch": 0.07301976584625267, "grad_norm": 0.6688988319796257, "learning_rate": 3.6500291885580857e-05, "loss": 0.6172, "step": 2501 }, { "epoch": 0.07304896207410003, "grad_norm": 0.7593773529487191, "learning_rate": 3.651488616462347e-05, "loss": 0.8132, "step": 2502 }, { "epoch": 0.07307815830194739, "grad_norm": 0.8118211266481138, "learning_rate": 3.652948044366609e-05, "loss": 0.7487, "step": 2503 }, { "epoch": 0.07310735452979475, "grad_norm": 0.7773025938892703, "learning_rate": 3.65440747227087e-05, "loss": 0.6276, "step": 2504 }, { "epoch": 0.07313655075764211, "grad_norm": 0.8137984220965448, "learning_rate": 3.655866900175132e-05, "loss": 0.7789, "step": 2505 }, { "epoch": 0.07316574698548947, "grad_norm": 0.7652036371640716, "learning_rate": 3.657326328079393e-05, "loss": 0.7741, "step": 2506 }, { "epoch": 0.07319494321333683, "grad_norm": 0.7275325401059941, "learning_rate": 3.6587857559836545e-05, "loss": 0.7289, "step": 2507 }, { "epoch": 0.0732241394411842, "grad_norm": 0.9435248852269017, "learning_rate": 3.660245183887916e-05, "loss": 0.6616, "step": 2508 }, { "epoch": 0.07325333566903156, "grad_norm": 0.7111230672225964, "learning_rate": 3.6617046117921777e-05, "loss": 0.7328, "step": 2509 }, { "epoch": 0.07328253189687892, "grad_norm": 0.7125032494691552, "learning_rate": 3.663164039696439e-05, "loss": 0.77, "step": 2510 }, { "epoch": 0.07331172812472629, "grad_norm": 0.8116884553579266, "learning_rate": 3.664623467600701e-05, "loss": 0.7812, "step": 2511 }, { "epoch": 0.07334092435257365, "grad_norm": 0.6640593988413711, "learning_rate": 3.666082895504962e-05, "loss": 0.6274, "step": 2512 }, { "epoch": 0.07337012058042101, "grad_norm": 0.7203904779918281, "learning_rate": 3.667542323409224e-05, "loss": 0.6622, "step": 2513 }, { "epoch": 0.07339931680826838, "grad_norm": 0.6792951698412392, "learning_rate": 3.669001751313485e-05, "loss": 0.6691, "step": 2514 }, { "epoch": 0.07342851303611574, "grad_norm": 0.8328422091253674, "learning_rate": 3.6704611792177465e-05, "loss": 0.7108, "step": 2515 }, { "epoch": 0.0734577092639631, "grad_norm": 0.7813119202097196, "learning_rate": 3.6719206071220084e-05, "loss": 0.7679, "step": 2516 }, { "epoch": 0.07348690549181046, "grad_norm": 0.687991629905847, "learning_rate": 3.6733800350262697e-05, "loss": 0.6895, "step": 2517 }, { "epoch": 0.07351610171965782, "grad_norm": 0.6461892202667635, "learning_rate": 3.6748394629305316e-05, "loss": 0.6354, "step": 2518 }, { "epoch": 0.07354529794750518, "grad_norm": 0.6996325100177874, "learning_rate": 3.676298890834793e-05, "loss": 0.7394, "step": 2519 }, { "epoch": 0.07357449417535254, "grad_norm": 0.7006434762315448, "learning_rate": 3.677758318739055e-05, "loss": 0.6836, "step": 2520 }, { "epoch": 0.0736036904031999, "grad_norm": 0.7622880275763726, "learning_rate": 3.679217746643316e-05, "loss": 0.7295, "step": 2521 }, { "epoch": 0.07363288663104726, "grad_norm": 0.6834221201067672, "learning_rate": 3.680677174547578e-05, "loss": 0.6806, "step": 2522 }, { "epoch": 0.07366208285889463, "grad_norm": 0.786483630638586, "learning_rate": 3.682136602451839e-05, "loss": 0.7433, "step": 2523 }, { "epoch": 0.07369127908674199, "grad_norm": 0.7338190813065004, "learning_rate": 3.6835960303561004e-05, "loss": 0.7261, "step": 2524 }, { "epoch": 0.07372047531458935, "grad_norm": 0.7475824195946261, "learning_rate": 3.685055458260362e-05, "loss": 0.7885, "step": 2525 }, { "epoch": 0.07374967154243672, "grad_norm": 0.7295947156692486, "learning_rate": 3.6865148861646236e-05, "loss": 0.7624, "step": 2526 }, { "epoch": 0.07377886777028408, "grad_norm": 0.7144450408363627, "learning_rate": 3.687974314068885e-05, "loss": 0.7114, "step": 2527 }, { "epoch": 0.07380806399813145, "grad_norm": 0.7358069165714477, "learning_rate": 3.689433741973147e-05, "loss": 0.7665, "step": 2528 }, { "epoch": 0.0738372602259788, "grad_norm": 0.781528048651016, "learning_rate": 3.690893169877408e-05, "loss": 0.7215, "step": 2529 }, { "epoch": 0.07386645645382617, "grad_norm": 0.7008116799221411, "learning_rate": 3.69235259778167e-05, "loss": 0.6793, "step": 2530 }, { "epoch": 0.07389565268167353, "grad_norm": 0.7276814657246138, "learning_rate": 3.693812025685932e-05, "loss": 0.7341, "step": 2531 }, { "epoch": 0.07392484890952089, "grad_norm": 0.6483673704225807, "learning_rate": 3.695271453590193e-05, "loss": 0.6038, "step": 2532 }, { "epoch": 0.07395404513736825, "grad_norm": 0.7477710099976084, "learning_rate": 3.6967308814944543e-05, "loss": 0.7114, "step": 2533 }, { "epoch": 0.07398324136521561, "grad_norm": 0.6725945671145713, "learning_rate": 3.6981903093987156e-05, "loss": 0.6508, "step": 2534 }, { "epoch": 0.07401243759306297, "grad_norm": 0.6586546880926698, "learning_rate": 3.6996497373029775e-05, "loss": 0.619, "step": 2535 }, { "epoch": 0.07404163382091034, "grad_norm": 0.6778539148659095, "learning_rate": 3.701109165207239e-05, "loss": 0.6321, "step": 2536 }, { "epoch": 0.0740708300487577, "grad_norm": 0.7598453464743722, "learning_rate": 3.702568593111501e-05, "loss": 0.7491, "step": 2537 }, { "epoch": 0.07410002627660506, "grad_norm": 0.7276089452914074, "learning_rate": 3.704028021015762e-05, "loss": 0.7395, "step": 2538 }, { "epoch": 0.07412922250445242, "grad_norm": 0.8990043268348704, "learning_rate": 3.705487448920024e-05, "loss": 0.7178, "step": 2539 }, { "epoch": 0.07415841873229978, "grad_norm": 0.7544308710424862, "learning_rate": 3.706946876824285e-05, "loss": 0.7468, "step": 2540 }, { "epoch": 0.07418761496014716, "grad_norm": 0.7711323203240833, "learning_rate": 3.7084063047285464e-05, "loss": 0.7055, "step": 2541 }, { "epoch": 0.07421681118799452, "grad_norm": 0.6896594334083193, "learning_rate": 3.7098657326328076e-05, "loss": 0.6731, "step": 2542 }, { "epoch": 0.07424600741584188, "grad_norm": 0.7685549805677787, "learning_rate": 3.7113251605370695e-05, "loss": 0.7485, "step": 2543 }, { "epoch": 0.07427520364368924, "grad_norm": 0.6942770232323517, "learning_rate": 3.712784588441331e-05, "loss": 0.718, "step": 2544 }, { "epoch": 0.0743043998715366, "grad_norm": 0.7767761664167768, "learning_rate": 3.714244016345593e-05, "loss": 0.7451, "step": 2545 }, { "epoch": 0.07433359609938396, "grad_norm": 0.966480953664359, "learning_rate": 3.7157034442498546e-05, "loss": 0.6908, "step": 2546 }, { "epoch": 0.07436279232723132, "grad_norm": 0.7007384381949628, "learning_rate": 3.717162872154116e-05, "loss": 0.7754, "step": 2547 }, { "epoch": 0.07439198855507868, "grad_norm": 0.7464178613477294, "learning_rate": 3.718622300058378e-05, "loss": 0.8073, "step": 2548 }, { "epoch": 0.07442118478292604, "grad_norm": 0.717889979708108, "learning_rate": 3.720081727962639e-05, "loss": 0.6758, "step": 2549 }, { "epoch": 0.0744503810107734, "grad_norm": 0.678054073063429, "learning_rate": 3.7215411558669e-05, "loss": 0.691, "step": 2550 }, { "epoch": 0.07447957723862077, "grad_norm": 0.7006538436987972, "learning_rate": 3.7230005837711615e-05, "loss": 0.668, "step": 2551 }, { "epoch": 0.07450877346646813, "grad_norm": 0.6535218797147876, "learning_rate": 3.7244600116754235e-05, "loss": 0.6401, "step": 2552 }, { "epoch": 0.07453796969431549, "grad_norm": 0.6651077382557575, "learning_rate": 3.725919439579685e-05, "loss": 0.68, "step": 2553 }, { "epoch": 0.07456716592216285, "grad_norm": 0.6985488754874132, "learning_rate": 3.7273788674839466e-05, "loss": 0.7, "step": 2554 }, { "epoch": 0.07459636215001021, "grad_norm": 0.6235089531982531, "learning_rate": 3.728838295388208e-05, "loss": 0.5152, "step": 2555 }, { "epoch": 0.07462555837785759, "grad_norm": 0.6615293602981868, "learning_rate": 3.73029772329247e-05, "loss": 0.6729, "step": 2556 }, { "epoch": 0.07465475460570495, "grad_norm": 0.7842896967346014, "learning_rate": 3.731757151196731e-05, "loss": 0.7586, "step": 2557 }, { "epoch": 0.07468395083355231, "grad_norm": 0.7031011131703319, "learning_rate": 3.733216579100993e-05, "loss": 0.7018, "step": 2558 }, { "epoch": 0.07471314706139967, "grad_norm": 0.7061270696890699, "learning_rate": 3.734676007005254e-05, "loss": 0.6585, "step": 2559 }, { "epoch": 0.07474234328924703, "grad_norm": 1.2590213331563245, "learning_rate": 3.7361354349095155e-05, "loss": 0.7712, "step": 2560 }, { "epoch": 0.07477153951709439, "grad_norm": 0.7284135917855701, "learning_rate": 3.737594862813777e-05, "loss": 0.7287, "step": 2561 }, { "epoch": 0.07480073574494175, "grad_norm": 0.6517456891428836, "learning_rate": 3.7390542907180386e-05, "loss": 0.5914, "step": 2562 }, { "epoch": 0.07482993197278912, "grad_norm": 0.7314438150556799, "learning_rate": 3.7405137186223006e-05, "loss": 0.7256, "step": 2563 }, { "epoch": 0.07485912820063648, "grad_norm": 0.6550472841179106, "learning_rate": 3.741973146526562e-05, "loss": 0.64, "step": 2564 }, { "epoch": 0.07488832442848384, "grad_norm": 0.9944393935568206, "learning_rate": 3.743432574430824e-05, "loss": 0.7908, "step": 2565 }, { "epoch": 0.0749175206563312, "grad_norm": 0.6903658541762575, "learning_rate": 3.744892002335085e-05, "loss": 0.6719, "step": 2566 }, { "epoch": 0.07494671688417856, "grad_norm": 0.7112307834946908, "learning_rate": 3.746351430239346e-05, "loss": 0.6799, "step": 2567 }, { "epoch": 0.07497591311202592, "grad_norm": 0.7165920655933395, "learning_rate": 3.7478108581436075e-05, "loss": 0.667, "step": 2568 }, { "epoch": 0.07500510933987328, "grad_norm": 0.7324301344500203, "learning_rate": 3.7492702860478694e-05, "loss": 0.7726, "step": 2569 }, { "epoch": 0.07503430556772064, "grad_norm": 0.6651806215911424, "learning_rate": 3.7507297139521306e-05, "loss": 0.6222, "step": 2570 }, { "epoch": 0.07506350179556802, "grad_norm": 0.7434334779870708, "learning_rate": 3.7521891418563926e-05, "loss": 0.76, "step": 2571 }, { "epoch": 0.07509269802341538, "grad_norm": 0.7005651783998279, "learning_rate": 3.753648569760654e-05, "loss": 0.7121, "step": 2572 }, { "epoch": 0.07512189425126274, "grad_norm": 0.7149859595760332, "learning_rate": 3.755107997664916e-05, "loss": 0.7031, "step": 2573 }, { "epoch": 0.0751510904791101, "grad_norm": 0.651821810408574, "learning_rate": 3.756567425569177e-05, "loss": 0.593, "step": 2574 }, { "epoch": 0.07518028670695746, "grad_norm": 0.6854343322790198, "learning_rate": 3.758026853473439e-05, "loss": 0.6657, "step": 2575 }, { "epoch": 0.07520948293480482, "grad_norm": 0.6855700501341487, "learning_rate": 3.7594862813777e-05, "loss": 0.6885, "step": 2576 }, { "epoch": 0.07523867916265219, "grad_norm": 1.2389582368295908, "learning_rate": 3.7609457092819614e-05, "loss": 0.6713, "step": 2577 }, { "epoch": 0.07526787539049955, "grad_norm": 0.614468198873394, "learning_rate": 3.762405137186223e-05, "loss": 0.5552, "step": 2578 }, { "epoch": 0.07529707161834691, "grad_norm": 0.7584996834132531, "learning_rate": 3.7638645650904846e-05, "loss": 0.781, "step": 2579 }, { "epoch": 0.07532626784619427, "grad_norm": 0.8210591405932235, "learning_rate": 3.7653239929947465e-05, "loss": 0.7944, "step": 2580 }, { "epoch": 0.07535546407404163, "grad_norm": 0.8973388345023857, "learning_rate": 3.766783420899008e-05, "loss": 0.828, "step": 2581 }, { "epoch": 0.07538466030188899, "grad_norm": 0.7781949818343644, "learning_rate": 3.7682428488032697e-05, "loss": 0.7655, "step": 2582 }, { "epoch": 0.07541385652973635, "grad_norm": 0.6995704774466777, "learning_rate": 3.769702276707531e-05, "loss": 0.7253, "step": 2583 }, { "epoch": 0.07544305275758371, "grad_norm": 0.6540138536125264, "learning_rate": 3.771161704611793e-05, "loss": 0.6453, "step": 2584 }, { "epoch": 0.07547224898543108, "grad_norm": 0.7369993555201305, "learning_rate": 3.772621132516054e-05, "loss": 0.6259, "step": 2585 }, { "epoch": 0.07550144521327845, "grad_norm": 0.7886554586006441, "learning_rate": 3.774080560420315e-05, "loss": 0.7733, "step": 2586 }, { "epoch": 0.07553064144112581, "grad_norm": 1.2836295021146535, "learning_rate": 3.7755399883245766e-05, "loss": 0.7254, "step": 2587 }, { "epoch": 0.07555983766897317, "grad_norm": 0.6777664741838411, "learning_rate": 3.7769994162288385e-05, "loss": 0.7036, "step": 2588 }, { "epoch": 0.07558903389682053, "grad_norm": 0.6662471845195871, "learning_rate": 3.7784588441331e-05, "loss": 0.6655, "step": 2589 }, { "epoch": 0.0756182301246679, "grad_norm": 0.6839696899022593, "learning_rate": 3.779918272037362e-05, "loss": 0.68, "step": 2590 }, { "epoch": 0.07564742635251526, "grad_norm": 0.7390296313332738, "learning_rate": 3.781377699941623e-05, "loss": 0.6955, "step": 2591 }, { "epoch": 0.07567662258036262, "grad_norm": 0.7509475773580784, "learning_rate": 3.782837127845885e-05, "loss": 0.795, "step": 2592 }, { "epoch": 0.07570581880820998, "grad_norm": 0.7160566030544204, "learning_rate": 3.784296555750146e-05, "loss": 0.7381, "step": 2593 }, { "epoch": 0.07573501503605734, "grad_norm": 0.6806642054681494, "learning_rate": 3.785755983654407e-05, "loss": 0.584, "step": 2594 }, { "epoch": 0.0757642112639047, "grad_norm": 0.6360534690330668, "learning_rate": 3.787215411558669e-05, "loss": 0.6566, "step": 2595 }, { "epoch": 0.07579340749175206, "grad_norm": 0.6028152305136105, "learning_rate": 3.7886748394629305e-05, "loss": 0.5981, "step": 2596 }, { "epoch": 0.07582260371959942, "grad_norm": 0.7568254271528312, "learning_rate": 3.7901342673671924e-05, "loss": 0.7346, "step": 2597 }, { "epoch": 0.07585179994744679, "grad_norm": 0.724270682184342, "learning_rate": 3.791593695271454e-05, "loss": 0.7085, "step": 2598 }, { "epoch": 0.07588099617529415, "grad_norm": 0.6839482645596349, "learning_rate": 3.7930531231757156e-05, "loss": 0.665, "step": 2599 }, { "epoch": 0.07591019240314151, "grad_norm": 0.6698156004742274, "learning_rate": 3.794512551079977e-05, "loss": 0.6923, "step": 2600 }, { "epoch": 0.07593938863098888, "grad_norm": 0.7662554292058171, "learning_rate": 3.795971978984239e-05, "loss": 0.7309, "step": 2601 }, { "epoch": 0.07596858485883624, "grad_norm": 0.6998335259522891, "learning_rate": 3.7974314068885e-05, "loss": 0.7102, "step": 2602 }, { "epoch": 0.0759977810866836, "grad_norm": 0.7397314643612961, "learning_rate": 3.798890834792761e-05, "loss": 0.6896, "step": 2603 }, { "epoch": 0.07602697731453097, "grad_norm": 0.825677457613417, "learning_rate": 3.8003502626970225e-05, "loss": 0.8117, "step": 2604 }, { "epoch": 0.07605617354237833, "grad_norm": 0.8171183466078664, "learning_rate": 3.8018096906012844e-05, "loss": 0.7779, "step": 2605 }, { "epoch": 0.07608536977022569, "grad_norm": 0.6495751978781111, "learning_rate": 3.803269118505546e-05, "loss": 0.6329, "step": 2606 }, { "epoch": 0.07611456599807305, "grad_norm": 0.8563939855040891, "learning_rate": 3.8047285464098076e-05, "loss": 0.6722, "step": 2607 }, { "epoch": 0.07614376222592041, "grad_norm": 0.7003688765480706, "learning_rate": 3.806187974314069e-05, "loss": 0.673, "step": 2608 }, { "epoch": 0.07617295845376777, "grad_norm": 0.7468226729318957, "learning_rate": 3.807647402218331e-05, "loss": 0.7567, "step": 2609 }, { "epoch": 0.07620215468161513, "grad_norm": 0.7177668996540256, "learning_rate": 3.809106830122593e-05, "loss": 0.7265, "step": 2610 }, { "epoch": 0.0762313509094625, "grad_norm": 0.7188628871991637, "learning_rate": 3.810566258026854e-05, "loss": 0.6752, "step": 2611 }, { "epoch": 0.07626054713730986, "grad_norm": 0.7687774348515433, "learning_rate": 3.812025685931115e-05, "loss": 0.7572, "step": 2612 }, { "epoch": 0.07628974336515722, "grad_norm": 0.7952805206512631, "learning_rate": 3.8134851138353764e-05, "loss": 0.7956, "step": 2613 }, { "epoch": 0.07631893959300458, "grad_norm": 1.1540493353078962, "learning_rate": 3.8149445417396384e-05, "loss": 0.6828, "step": 2614 }, { "epoch": 0.07634813582085194, "grad_norm": 0.7482923739329865, "learning_rate": 3.8164039696438996e-05, "loss": 0.6039, "step": 2615 }, { "epoch": 0.07637733204869931, "grad_norm": 0.7370769892444518, "learning_rate": 3.8178633975481615e-05, "loss": 0.7519, "step": 2616 }, { "epoch": 0.07640652827654668, "grad_norm": 0.7359346612901719, "learning_rate": 3.819322825452423e-05, "loss": 0.734, "step": 2617 }, { "epoch": 0.07643572450439404, "grad_norm": 0.6650334537062399, "learning_rate": 3.820782253356685e-05, "loss": 0.6582, "step": 2618 }, { "epoch": 0.0764649207322414, "grad_norm": 0.8475576161704066, "learning_rate": 3.822241681260946e-05, "loss": 0.7833, "step": 2619 }, { "epoch": 0.07649411696008876, "grad_norm": 0.7827038694548758, "learning_rate": 3.823701109165207e-05, "loss": 0.7877, "step": 2620 }, { "epoch": 0.07652331318793612, "grad_norm": 1.10007958315132, "learning_rate": 3.8251605370694684e-05, "loss": 0.7668, "step": 2621 }, { "epoch": 0.07655250941578348, "grad_norm": 0.6439585044559166, "learning_rate": 3.8266199649737304e-05, "loss": 0.603, "step": 2622 }, { "epoch": 0.07658170564363084, "grad_norm": 0.8326189785369924, "learning_rate": 3.8280793928779916e-05, "loss": 0.7813, "step": 2623 }, { "epoch": 0.0766109018714782, "grad_norm": 0.7246583165903969, "learning_rate": 3.8295388207822535e-05, "loss": 0.7792, "step": 2624 }, { "epoch": 0.07664009809932557, "grad_norm": 0.715517721881428, "learning_rate": 3.830998248686515e-05, "loss": 0.7097, "step": 2625 }, { "epoch": 0.07666929432717293, "grad_norm": 0.7357144598620028, "learning_rate": 3.832457676590777e-05, "loss": 0.7673, "step": 2626 }, { "epoch": 0.07669849055502029, "grad_norm": 0.7698692846053516, "learning_rate": 3.8339171044950386e-05, "loss": 0.6869, "step": 2627 }, { "epoch": 0.07672768678286765, "grad_norm": 0.6652689965955433, "learning_rate": 3.8353765323993e-05, "loss": 0.681, "step": 2628 }, { "epoch": 0.07675688301071501, "grad_norm": 0.8287212284616716, "learning_rate": 3.836835960303561e-05, "loss": 0.7638, "step": 2629 }, { "epoch": 0.07678607923856237, "grad_norm": 0.8739029889389752, "learning_rate": 3.8382953882078224e-05, "loss": 0.8035, "step": 2630 }, { "epoch": 0.07681527546640975, "grad_norm": 0.6699335990463252, "learning_rate": 3.839754816112084e-05, "loss": 0.6136, "step": 2631 }, { "epoch": 0.07684447169425711, "grad_norm": 0.8218654512696966, "learning_rate": 3.8412142440163455e-05, "loss": 0.707, "step": 2632 }, { "epoch": 0.07687366792210447, "grad_norm": 0.6975612465728803, "learning_rate": 3.8426736719206075e-05, "loss": 0.6921, "step": 2633 }, { "epoch": 0.07690286414995183, "grad_norm": 0.6950624723211458, "learning_rate": 3.844133099824869e-05, "loss": 0.7116, "step": 2634 }, { "epoch": 0.07693206037779919, "grad_norm": 0.6798305060611157, "learning_rate": 3.8455925277291306e-05, "loss": 0.6608, "step": 2635 }, { "epoch": 0.07696125660564655, "grad_norm": 0.675314361123149, "learning_rate": 3.847051955633392e-05, "loss": 0.722, "step": 2636 }, { "epoch": 0.07699045283349391, "grad_norm": 0.9063153017797123, "learning_rate": 3.848511383537654e-05, "loss": 0.8336, "step": 2637 }, { "epoch": 0.07701964906134127, "grad_norm": 0.9301922828639073, "learning_rate": 3.849970811441915e-05, "loss": 0.6668, "step": 2638 }, { "epoch": 0.07704884528918864, "grad_norm": 0.7424969389228224, "learning_rate": 3.851430239346176e-05, "loss": 0.7657, "step": 2639 }, { "epoch": 0.077078041517036, "grad_norm": 0.7734630142478656, "learning_rate": 3.8528896672504375e-05, "loss": 0.8158, "step": 2640 }, { "epoch": 0.07710723774488336, "grad_norm": 0.6532193566376056, "learning_rate": 3.8543490951546995e-05, "loss": 0.6237, "step": 2641 }, { "epoch": 0.07713643397273072, "grad_norm": 0.7344381523261906, "learning_rate": 3.8558085230589614e-05, "loss": 0.7852, "step": 2642 }, { "epoch": 0.07716563020057808, "grad_norm": 0.6762614332372989, "learning_rate": 3.8572679509632226e-05, "loss": 0.6497, "step": 2643 }, { "epoch": 0.07719482642842544, "grad_norm": 0.7632035395334315, "learning_rate": 3.8587273788674846e-05, "loss": 0.728, "step": 2644 }, { "epoch": 0.0772240226562728, "grad_norm": 1.044705300946007, "learning_rate": 3.860186806771746e-05, "loss": 0.768, "step": 2645 }, { "epoch": 0.07725321888412018, "grad_norm": 0.7600393808052953, "learning_rate": 3.861646234676007e-05, "loss": 0.7187, "step": 2646 }, { "epoch": 0.07728241511196754, "grad_norm": 0.7203231452663053, "learning_rate": 3.863105662580268e-05, "loss": 0.6932, "step": 2647 }, { "epoch": 0.0773116113398149, "grad_norm": 1.0749611454814514, "learning_rate": 3.86456509048453e-05, "loss": 0.7334, "step": 2648 }, { "epoch": 0.07734080756766226, "grad_norm": 0.7964378075644727, "learning_rate": 3.8660245183887915e-05, "loss": 0.8345, "step": 2649 }, { "epoch": 0.07737000379550962, "grad_norm": 0.7390844289061622, "learning_rate": 3.8674839462930534e-05, "loss": 0.7168, "step": 2650 }, { "epoch": 0.07739920002335698, "grad_norm": 0.7308884660032469, "learning_rate": 3.8689433741973146e-05, "loss": 0.7894, "step": 2651 }, { "epoch": 0.07742839625120435, "grad_norm": 0.700962304243876, "learning_rate": 3.8704028021015766e-05, "loss": 0.7046, "step": 2652 }, { "epoch": 0.0774575924790517, "grad_norm": 0.6732857678566584, "learning_rate": 3.871862230005838e-05, "loss": 0.6277, "step": 2653 }, { "epoch": 0.07748678870689907, "grad_norm": 0.7737261312584477, "learning_rate": 3.8733216579101e-05, "loss": 0.7616, "step": 2654 }, { "epoch": 0.07751598493474643, "grad_norm": 0.7489433006709408, "learning_rate": 3.874781085814361e-05, "loss": 0.6373, "step": 2655 }, { "epoch": 0.07754518116259379, "grad_norm": 0.7311149343187313, "learning_rate": 3.876240513718622e-05, "loss": 0.7168, "step": 2656 }, { "epoch": 0.07757437739044115, "grad_norm": 0.6615884850477698, "learning_rate": 3.8776999416228835e-05, "loss": 0.6341, "step": 2657 }, { "epoch": 0.07760357361828851, "grad_norm": 0.695346091353081, "learning_rate": 3.8791593695271454e-05, "loss": 0.709, "step": 2658 }, { "epoch": 0.07763276984613587, "grad_norm": 0.7364365942736801, "learning_rate": 3.880618797431407e-05, "loss": 0.7635, "step": 2659 }, { "epoch": 0.07766196607398324, "grad_norm": 0.6693082354769863, "learning_rate": 3.8820782253356686e-05, "loss": 0.6274, "step": 2660 }, { "epoch": 0.07769116230183061, "grad_norm": 0.688985479971934, "learning_rate": 3.8835376532399305e-05, "loss": 0.6685, "step": 2661 }, { "epoch": 0.07772035852967797, "grad_norm": 0.8545257205717437, "learning_rate": 3.884997081144192e-05, "loss": 0.9019, "step": 2662 }, { "epoch": 0.07774955475752533, "grad_norm": 0.722640849177666, "learning_rate": 3.886456509048454e-05, "loss": 0.6101, "step": 2663 }, { "epoch": 0.0777787509853727, "grad_norm": 0.697160062318775, "learning_rate": 3.887915936952715e-05, "loss": 0.7012, "step": 2664 }, { "epoch": 0.07780794721322006, "grad_norm": 0.8127300754640951, "learning_rate": 3.889375364856976e-05, "loss": 0.7296, "step": 2665 }, { "epoch": 0.07783714344106742, "grad_norm": 0.7121024106819994, "learning_rate": 3.8908347927612374e-05, "loss": 0.6104, "step": 2666 }, { "epoch": 0.07786633966891478, "grad_norm": 0.7401843464316112, "learning_rate": 3.892294220665499e-05, "loss": 0.7652, "step": 2667 }, { "epoch": 0.07789553589676214, "grad_norm": 0.7978759709796377, "learning_rate": 3.8937536485697606e-05, "loss": 0.7043, "step": 2668 }, { "epoch": 0.0779247321246095, "grad_norm": 0.7258618088710955, "learning_rate": 3.8952130764740225e-05, "loss": 0.7296, "step": 2669 }, { "epoch": 0.07795392835245686, "grad_norm": 0.7932541592182604, "learning_rate": 3.896672504378284e-05, "loss": 0.7279, "step": 2670 }, { "epoch": 0.07798312458030422, "grad_norm": 0.7089416895266335, "learning_rate": 3.898131932282546e-05, "loss": 0.673, "step": 2671 }, { "epoch": 0.07801232080815158, "grad_norm": 0.6918631426684086, "learning_rate": 3.899591360186807e-05, "loss": 0.6761, "step": 2672 }, { "epoch": 0.07804151703599894, "grad_norm": 0.8116982967853421, "learning_rate": 3.901050788091068e-05, "loss": 0.6115, "step": 2673 }, { "epoch": 0.0780707132638463, "grad_norm": 0.7101160497201374, "learning_rate": 3.90251021599533e-05, "loss": 0.7062, "step": 2674 }, { "epoch": 0.07809990949169367, "grad_norm": 0.7931372491653538, "learning_rate": 3.903969643899591e-05, "loss": 0.787, "step": 2675 }, { "epoch": 0.07812910571954104, "grad_norm": 0.7861993515266756, "learning_rate": 3.905429071803853e-05, "loss": 0.6982, "step": 2676 }, { "epoch": 0.0781583019473884, "grad_norm": 0.8129785333195264, "learning_rate": 3.9068884997081145e-05, "loss": 0.7568, "step": 2677 }, { "epoch": 0.07818749817523576, "grad_norm": 0.651569883056794, "learning_rate": 3.9083479276123764e-05, "loss": 0.6605, "step": 2678 }, { "epoch": 0.07821669440308313, "grad_norm": 0.8176099477192259, "learning_rate": 3.909807355516638e-05, "loss": 0.5687, "step": 2679 }, { "epoch": 0.07824589063093049, "grad_norm": 0.7836049357379616, "learning_rate": 3.9112667834208996e-05, "loss": 0.7561, "step": 2680 }, { "epoch": 0.07827508685877785, "grad_norm": 0.7006750977521581, "learning_rate": 3.912726211325161e-05, "loss": 0.7593, "step": 2681 }, { "epoch": 0.07830428308662521, "grad_norm": 0.6827879849062419, "learning_rate": 3.914185639229422e-05, "loss": 0.6518, "step": 2682 }, { "epoch": 0.07833347931447257, "grad_norm": 0.6533930283294946, "learning_rate": 3.915645067133683e-05, "loss": 0.6833, "step": 2683 }, { "epoch": 0.07836267554231993, "grad_norm": 0.7919244710825948, "learning_rate": 3.917104495037945e-05, "loss": 0.7879, "step": 2684 }, { "epoch": 0.07839187177016729, "grad_norm": 0.704566308944402, "learning_rate": 3.9185639229422065e-05, "loss": 0.6931, "step": 2685 }, { "epoch": 0.07842106799801465, "grad_norm": 0.7666839117990485, "learning_rate": 3.9200233508464684e-05, "loss": 0.7846, "step": 2686 }, { "epoch": 0.07845026422586202, "grad_norm": 0.7791044277295975, "learning_rate": 3.92148277875073e-05, "loss": 0.8399, "step": 2687 }, { "epoch": 0.07847946045370938, "grad_norm": 0.7081243750425064, "learning_rate": 3.9229422066549916e-05, "loss": 0.6291, "step": 2688 }, { "epoch": 0.07850865668155674, "grad_norm": 0.7355457937173088, "learning_rate": 3.924401634559253e-05, "loss": 0.7451, "step": 2689 }, { "epoch": 0.0785378529094041, "grad_norm": 0.8021858509512712, "learning_rate": 3.925861062463515e-05, "loss": 0.7939, "step": 2690 }, { "epoch": 0.07856704913725147, "grad_norm": 0.6492443286746836, "learning_rate": 3.927320490367776e-05, "loss": 0.5878, "step": 2691 }, { "epoch": 0.07859624536509884, "grad_norm": 0.6921566580672459, "learning_rate": 3.928779918272037e-05, "loss": 0.7058, "step": 2692 }, { "epoch": 0.0786254415929462, "grad_norm": 0.7408907018905646, "learning_rate": 3.930239346176299e-05, "loss": 0.7918, "step": 2693 }, { "epoch": 0.07865463782079356, "grad_norm": 0.7007080538602052, "learning_rate": 3.9316987740805604e-05, "loss": 0.7632, "step": 2694 }, { "epoch": 0.07868383404864092, "grad_norm": 0.7662793854887275, "learning_rate": 3.9331582019848224e-05, "loss": 0.6968, "step": 2695 }, { "epoch": 0.07871303027648828, "grad_norm": 0.7272417188741529, "learning_rate": 3.9346176298890836e-05, "loss": 0.7401, "step": 2696 }, { "epoch": 0.07874222650433564, "grad_norm": 0.7146964597166862, "learning_rate": 3.9360770577933455e-05, "loss": 0.6577, "step": 2697 }, { "epoch": 0.078771422732183, "grad_norm": 0.707183617230124, "learning_rate": 3.937536485697607e-05, "loss": 0.7133, "step": 2698 }, { "epoch": 0.07880061896003036, "grad_norm": 0.66897305787875, "learning_rate": 3.938995913601869e-05, "loss": 0.6762, "step": 2699 }, { "epoch": 0.07882981518787772, "grad_norm": 0.6973252652388442, "learning_rate": 3.940455341506129e-05, "loss": 0.6552, "step": 2700 }, { "epoch": 0.07885901141572509, "grad_norm": 0.7104714725170496, "learning_rate": 3.941914769410391e-05, "loss": 0.6725, "step": 2701 }, { "epoch": 0.07888820764357245, "grad_norm": 0.7134106070723443, "learning_rate": 3.9433741973146524e-05, "loss": 0.7338, "step": 2702 }, { "epoch": 0.07891740387141981, "grad_norm": 0.7235762387025504, "learning_rate": 3.9448336252189144e-05, "loss": 0.7022, "step": 2703 }, { "epoch": 0.07894660009926717, "grad_norm": 0.7636721239727947, "learning_rate": 3.9462930531231756e-05, "loss": 0.7444, "step": 2704 }, { "epoch": 0.07897579632711453, "grad_norm": 0.7104223611268642, "learning_rate": 3.9477524810274375e-05, "loss": 0.6466, "step": 2705 }, { "epoch": 0.07900499255496189, "grad_norm": 0.6913714701241904, "learning_rate": 3.9492119089316995e-05, "loss": 0.6628, "step": 2706 }, { "epoch": 0.07903418878280927, "grad_norm": 0.6432963516981569, "learning_rate": 3.950671336835961e-05, "loss": 0.6004, "step": 2707 }, { "epoch": 0.07906338501065663, "grad_norm": 0.732513718292814, "learning_rate": 3.952130764740222e-05, "loss": 0.6648, "step": 2708 }, { "epoch": 0.07909258123850399, "grad_norm": 0.6627967366697458, "learning_rate": 3.953590192644483e-05, "loss": 0.6424, "step": 2709 }, { "epoch": 0.07912177746635135, "grad_norm": 0.6336266114323823, "learning_rate": 3.955049620548745e-05, "loss": 0.6086, "step": 2710 }, { "epoch": 0.07915097369419871, "grad_norm": 0.6735623251510587, "learning_rate": 3.9565090484530064e-05, "loss": 0.6458, "step": 2711 }, { "epoch": 0.07918016992204607, "grad_norm": 0.6801366632436534, "learning_rate": 3.957968476357268e-05, "loss": 0.6908, "step": 2712 }, { "epoch": 0.07920936614989343, "grad_norm": 0.7140549773243852, "learning_rate": 3.9594279042615295e-05, "loss": 0.7024, "step": 2713 }, { "epoch": 0.0792385623777408, "grad_norm": 0.7544602314040034, "learning_rate": 3.9608873321657915e-05, "loss": 0.8089, "step": 2714 }, { "epoch": 0.07926775860558816, "grad_norm": 0.6658490474905906, "learning_rate": 3.962346760070053e-05, "loss": 0.6908, "step": 2715 }, { "epoch": 0.07929695483343552, "grad_norm": 0.7476958487267242, "learning_rate": 3.9638061879743146e-05, "loss": 0.7206, "step": 2716 }, { "epoch": 0.07932615106128288, "grad_norm": 0.7148716352845924, "learning_rate": 3.965265615878576e-05, "loss": 0.7225, "step": 2717 }, { "epoch": 0.07935534728913024, "grad_norm": 0.7132255338595967, "learning_rate": 3.966725043782837e-05, "loss": 0.6607, "step": 2718 }, { "epoch": 0.0793845435169776, "grad_norm": 0.8191385723062065, "learning_rate": 3.9681844716870984e-05, "loss": 0.754, "step": 2719 }, { "epoch": 0.07941373974482496, "grad_norm": 0.7578050556641529, "learning_rate": 3.96964389959136e-05, "loss": 0.7923, "step": 2720 }, { "epoch": 0.07944293597267232, "grad_norm": 0.7080684692894491, "learning_rate": 3.9711033274956215e-05, "loss": 0.686, "step": 2721 }, { "epoch": 0.0794721322005197, "grad_norm": 0.6891699067297966, "learning_rate": 3.9725627553998835e-05, "loss": 0.7364, "step": 2722 }, { "epoch": 0.07950132842836706, "grad_norm": 0.7241645119278448, "learning_rate": 3.9740221833041454e-05, "loss": 0.7757, "step": 2723 }, { "epoch": 0.07953052465621442, "grad_norm": 0.6593343082305319, "learning_rate": 3.9754816112084066e-05, "loss": 0.6739, "step": 2724 }, { "epoch": 0.07955972088406178, "grad_norm": 0.7352577842368219, "learning_rate": 3.9769410391126686e-05, "loss": 0.7719, "step": 2725 }, { "epoch": 0.07958891711190914, "grad_norm": 0.7188730020933416, "learning_rate": 3.97840046701693e-05, "loss": 0.6853, "step": 2726 }, { "epoch": 0.0796181133397565, "grad_norm": 0.688079297453886, "learning_rate": 3.979859894921191e-05, "loss": 0.7216, "step": 2727 }, { "epoch": 0.07964730956760387, "grad_norm": 0.8104642892985948, "learning_rate": 3.981319322825452e-05, "loss": 0.8667, "step": 2728 }, { "epoch": 0.07967650579545123, "grad_norm": 0.8801548291714182, "learning_rate": 3.982778750729714e-05, "loss": 0.7869, "step": 2729 }, { "epoch": 0.07970570202329859, "grad_norm": 0.7967631375549717, "learning_rate": 3.9842381786339755e-05, "loss": 0.7695, "step": 2730 }, { "epoch": 0.07973489825114595, "grad_norm": 0.6215072310541098, "learning_rate": 3.9856976065382374e-05, "loss": 0.5706, "step": 2731 }, { "epoch": 0.07976409447899331, "grad_norm": 0.6180830035112928, "learning_rate": 3.9871570344424986e-05, "loss": 0.5584, "step": 2732 }, { "epoch": 0.07979329070684067, "grad_norm": 0.6808514004075745, "learning_rate": 3.9886164623467606e-05, "loss": 0.666, "step": 2733 }, { "epoch": 0.07982248693468803, "grad_norm": 0.7703620319050039, "learning_rate": 3.990075890251022e-05, "loss": 0.7115, "step": 2734 }, { "epoch": 0.0798516831625354, "grad_norm": 0.710246318453286, "learning_rate": 3.991535318155283e-05, "loss": 0.686, "step": 2735 }, { "epoch": 0.07988087939038276, "grad_norm": 0.6790873114952859, "learning_rate": 3.992994746059544e-05, "loss": 0.6858, "step": 2736 }, { "epoch": 0.07991007561823013, "grad_norm": 0.8497792237788065, "learning_rate": 3.994454173963806e-05, "loss": 0.7668, "step": 2737 }, { "epoch": 0.07993927184607749, "grad_norm": 0.7370900603143813, "learning_rate": 3.995913601868068e-05, "loss": 0.7275, "step": 2738 }, { "epoch": 0.07996846807392485, "grad_norm": 0.7087265253507474, "learning_rate": 3.9973730297723294e-05, "loss": 0.6929, "step": 2739 }, { "epoch": 0.07999766430177221, "grad_norm": 0.7263301079883266, "learning_rate": 3.998832457676591e-05, "loss": 0.7656, "step": 2740 }, { "epoch": 0.08002686052961958, "grad_norm": 0.7462641856112191, "learning_rate": 4.0002918855808526e-05, "loss": 0.6877, "step": 2741 }, { "epoch": 0.08005605675746694, "grad_norm": 0.7713131123858987, "learning_rate": 4.0017513134851145e-05, "loss": 0.8049, "step": 2742 }, { "epoch": 0.0800852529853143, "grad_norm": 0.8140009400254217, "learning_rate": 4.003210741389376e-05, "loss": 0.7231, "step": 2743 }, { "epoch": 0.08011444921316166, "grad_norm": 0.6975568251770328, "learning_rate": 4.004670169293637e-05, "loss": 0.7199, "step": 2744 }, { "epoch": 0.08014364544100902, "grad_norm": 0.8172268320396114, "learning_rate": 4.006129597197898e-05, "loss": 0.7501, "step": 2745 }, { "epoch": 0.08017284166885638, "grad_norm": 0.7289593849090802, "learning_rate": 4.00758902510216e-05, "loss": 0.7532, "step": 2746 }, { "epoch": 0.08020203789670374, "grad_norm": 0.6979838846440638, "learning_rate": 4.0090484530064214e-05, "loss": 0.694, "step": 2747 }, { "epoch": 0.0802312341245511, "grad_norm": 0.6915522777571183, "learning_rate": 4.010507880910683e-05, "loss": 0.7098, "step": 2748 }, { "epoch": 0.08026043035239847, "grad_norm": 0.7068352117390017, "learning_rate": 4.0119673088149446e-05, "loss": 0.6709, "step": 2749 }, { "epoch": 0.08028962658024583, "grad_norm": 0.7386221439725991, "learning_rate": 4.0134267367192065e-05, "loss": 0.731, "step": 2750 }, { "epoch": 0.08031882280809319, "grad_norm": 0.7043441769641059, "learning_rate": 4.014886164623468e-05, "loss": 0.7373, "step": 2751 }, { "epoch": 0.08034801903594056, "grad_norm": 0.6759768625253696, "learning_rate": 4.01634559252773e-05, "loss": 0.706, "step": 2752 }, { "epoch": 0.08037721526378792, "grad_norm": 0.6725827211248346, "learning_rate": 4.017805020431991e-05, "loss": 0.6391, "step": 2753 }, { "epoch": 0.08040641149163529, "grad_norm": 0.7064999944218665, "learning_rate": 4.019264448336252e-05, "loss": 0.7048, "step": 2754 }, { "epoch": 0.08043560771948265, "grad_norm": 0.6729993349381896, "learning_rate": 4.020723876240514e-05, "loss": 0.6489, "step": 2755 }, { "epoch": 0.08046480394733001, "grad_norm": 0.7254498890723753, "learning_rate": 4.0221833041447753e-05, "loss": 0.7359, "step": 2756 }, { "epoch": 0.08049400017517737, "grad_norm": 0.7330970611437555, "learning_rate": 4.023642732049037e-05, "loss": 0.6964, "step": 2757 }, { "epoch": 0.08052319640302473, "grad_norm": 0.7370020552784958, "learning_rate": 4.0251021599532985e-05, "loss": 0.6993, "step": 2758 }, { "epoch": 0.08055239263087209, "grad_norm": 0.6672726203845228, "learning_rate": 4.0265615878575604e-05, "loss": 0.6249, "step": 2759 }, { "epoch": 0.08058158885871945, "grad_norm": 0.7381013677027102, "learning_rate": 4.028021015761822e-05, "loss": 0.7823, "step": 2760 }, { "epoch": 0.08061078508656681, "grad_norm": 0.7367990945962827, "learning_rate": 4.029480443666083e-05, "loss": 0.6762, "step": 2761 }, { "epoch": 0.08063998131441417, "grad_norm": 0.6922155766467848, "learning_rate": 4.030939871570344e-05, "loss": 0.6716, "step": 2762 }, { "epoch": 0.08066917754226154, "grad_norm": 0.8951011463969032, "learning_rate": 4.032399299474606e-05, "loss": 0.7344, "step": 2763 }, { "epoch": 0.0806983737701089, "grad_norm": 0.7052784244639934, "learning_rate": 4.0338587273788673e-05, "loss": 0.7055, "step": 2764 }, { "epoch": 0.08072756999795626, "grad_norm": 0.6892613361092481, "learning_rate": 4.035318155283129e-05, "loss": 0.672, "step": 2765 }, { "epoch": 0.08075676622580362, "grad_norm": 0.718882777286368, "learning_rate": 4.0367775831873905e-05, "loss": 0.7801, "step": 2766 }, { "epoch": 0.080785962453651, "grad_norm": 0.6229405370970882, "learning_rate": 4.0382370110916524e-05, "loss": 0.5787, "step": 2767 }, { "epoch": 0.08081515868149836, "grad_norm": 0.6806792515852392, "learning_rate": 4.039696438995914e-05, "loss": 0.7094, "step": 2768 }, { "epoch": 0.08084435490934572, "grad_norm": 0.7660292954708838, "learning_rate": 4.0411558669001756e-05, "loss": 0.6819, "step": 2769 }, { "epoch": 0.08087355113719308, "grad_norm": 0.7012717108077076, "learning_rate": 4.042615294804437e-05, "loss": 0.6597, "step": 2770 }, { "epoch": 0.08090274736504044, "grad_norm": 0.6926280403145587, "learning_rate": 4.044074722708698e-05, "loss": 0.7132, "step": 2771 }, { "epoch": 0.0809319435928878, "grad_norm": 0.7163753075615982, "learning_rate": 4.04553415061296e-05, "loss": 0.7055, "step": 2772 }, { "epoch": 0.08096113982073516, "grad_norm": 0.7298631712131324, "learning_rate": 4.046993578517221e-05, "loss": 0.7007, "step": 2773 }, { "epoch": 0.08099033604858252, "grad_norm": 0.9361800352983574, "learning_rate": 4.048453006421483e-05, "loss": 0.8274, "step": 2774 }, { "epoch": 0.08101953227642988, "grad_norm": 0.7580972925208543, "learning_rate": 4.0499124343257444e-05, "loss": 0.6962, "step": 2775 }, { "epoch": 0.08104872850427725, "grad_norm": 0.7655137229991222, "learning_rate": 4.0513718622300064e-05, "loss": 0.7685, "step": 2776 }, { "epoch": 0.0810779247321246, "grad_norm": 0.7212274026073102, "learning_rate": 4.0528312901342676e-05, "loss": 0.7593, "step": 2777 }, { "epoch": 0.08110712095997197, "grad_norm": 0.860148391498036, "learning_rate": 4.0542907180385295e-05, "loss": 0.7723, "step": 2778 }, { "epoch": 0.08113631718781933, "grad_norm": 0.7880395843540248, "learning_rate": 4.055750145942791e-05, "loss": 0.7207, "step": 2779 }, { "epoch": 0.08116551341566669, "grad_norm": 0.7928406292777567, "learning_rate": 4.057209573847052e-05, "loss": 0.708, "step": 2780 }, { "epoch": 0.08119470964351405, "grad_norm": 0.6943539883544806, "learning_rate": 4.058669001751313e-05, "loss": 0.7024, "step": 2781 }, { "epoch": 0.08122390587136143, "grad_norm": 0.7361304439872951, "learning_rate": 4.060128429655575e-05, "loss": 0.6969, "step": 2782 }, { "epoch": 0.08125310209920879, "grad_norm": 0.8379470289551573, "learning_rate": 4.0615878575598364e-05, "loss": 0.7697, "step": 2783 }, { "epoch": 0.08128229832705615, "grad_norm": 0.6548897756143146, "learning_rate": 4.0630472854640984e-05, "loss": 0.6666, "step": 2784 }, { "epoch": 0.08131149455490351, "grad_norm": 0.7181888519853027, "learning_rate": 4.0645067133683596e-05, "loss": 0.6889, "step": 2785 }, { "epoch": 0.08134069078275087, "grad_norm": 0.7286069758675832, "learning_rate": 4.0659661412726215e-05, "loss": 0.6173, "step": 2786 }, { "epoch": 0.08136988701059823, "grad_norm": 0.8130685262202676, "learning_rate": 4.067425569176883e-05, "loss": 0.7418, "step": 2787 }, { "epoch": 0.0813990832384456, "grad_norm": 0.7226894228974267, "learning_rate": 4.068884997081144e-05, "loss": 0.709, "step": 2788 }, { "epoch": 0.08142827946629295, "grad_norm": 0.8336854562124186, "learning_rate": 4.070344424985406e-05, "loss": 0.7889, "step": 2789 }, { "epoch": 0.08145747569414032, "grad_norm": 0.9662731816032134, "learning_rate": 4.071803852889667e-05, "loss": 0.7789, "step": 2790 }, { "epoch": 0.08148667192198768, "grad_norm": 0.6888313799898861, "learning_rate": 4.073263280793929e-05, "loss": 0.6875, "step": 2791 }, { "epoch": 0.08151586814983504, "grad_norm": 0.7231526969278996, "learning_rate": 4.0747227086981904e-05, "loss": 0.7529, "step": 2792 }, { "epoch": 0.0815450643776824, "grad_norm": 0.7493791170175153, "learning_rate": 4.076182136602452e-05, "loss": 0.6871, "step": 2793 }, { "epoch": 0.08157426060552976, "grad_norm": 0.7895367584145565, "learning_rate": 4.0776415645067135e-05, "loss": 0.6772, "step": 2794 }, { "epoch": 0.08160345683337712, "grad_norm": 0.9750840991013043, "learning_rate": 4.0791009924109755e-05, "loss": 0.6593, "step": 2795 }, { "epoch": 0.08163265306122448, "grad_norm": 0.7687160185755224, "learning_rate": 4.080560420315237e-05, "loss": 0.7226, "step": 2796 }, { "epoch": 0.08166184928907186, "grad_norm": 0.6790212791965207, "learning_rate": 4.082019848219498e-05, "loss": 0.6376, "step": 2797 }, { "epoch": 0.08169104551691922, "grad_norm": 0.7891139071186256, "learning_rate": 4.083479276123759e-05, "loss": 0.7833, "step": 2798 }, { "epoch": 0.08172024174476658, "grad_norm": 0.6786124698981922, "learning_rate": 4.084938704028021e-05, "loss": 0.6662, "step": 2799 }, { "epoch": 0.08174943797261394, "grad_norm": 0.6705311208035033, "learning_rate": 4.0863981319322824e-05, "loss": 0.7132, "step": 2800 }, { "epoch": 0.0817786342004613, "grad_norm": 0.7324753441793365, "learning_rate": 4.087857559836544e-05, "loss": 0.734, "step": 2801 }, { "epoch": 0.08180783042830866, "grad_norm": 0.6800646737544499, "learning_rate": 4.089316987740806e-05, "loss": 0.6883, "step": 2802 }, { "epoch": 0.08183702665615603, "grad_norm": 0.6895170713384966, "learning_rate": 4.0907764156450675e-05, "loss": 0.6191, "step": 2803 }, { "epoch": 0.08186622288400339, "grad_norm": 0.6873309603415354, "learning_rate": 4.0922358435493294e-05, "loss": 0.6887, "step": 2804 }, { "epoch": 0.08189541911185075, "grad_norm": 0.8777777828467371, "learning_rate": 4.0936952714535906e-05, "loss": 0.675, "step": 2805 }, { "epoch": 0.08192461533969811, "grad_norm": 0.8673119672512987, "learning_rate": 4.095154699357852e-05, "loss": 0.8032, "step": 2806 }, { "epoch": 0.08195381156754547, "grad_norm": 0.7334495513686698, "learning_rate": 4.096614127262113e-05, "loss": 0.7385, "step": 2807 }, { "epoch": 0.08198300779539283, "grad_norm": 0.690680474378738, "learning_rate": 4.098073555166375e-05, "loss": 0.6058, "step": 2808 }, { "epoch": 0.08201220402324019, "grad_norm": 0.6939993494448354, "learning_rate": 4.099532983070636e-05, "loss": 0.7279, "step": 2809 }, { "epoch": 0.08204140025108755, "grad_norm": 1.034696917263078, "learning_rate": 4.100992410974898e-05, "loss": 0.6693, "step": 2810 }, { "epoch": 0.08207059647893492, "grad_norm": 0.7401467691110996, "learning_rate": 4.1024518388791595e-05, "loss": 0.6873, "step": 2811 }, { "epoch": 0.08209979270678229, "grad_norm": 0.7229948476250002, "learning_rate": 4.1039112667834214e-05, "loss": 0.7406, "step": 2812 }, { "epoch": 0.08212898893462965, "grad_norm": 0.8022897345633117, "learning_rate": 4.1053706946876827e-05, "loss": 0.6607, "step": 2813 }, { "epoch": 0.08215818516247701, "grad_norm": 0.6974641148449868, "learning_rate": 4.106830122591944e-05, "loss": 0.7075, "step": 2814 }, { "epoch": 0.08218738139032437, "grad_norm": 0.7020340367524557, "learning_rate": 4.108289550496205e-05, "loss": 0.7487, "step": 2815 }, { "epoch": 0.08221657761817174, "grad_norm": 0.7157031183886593, "learning_rate": 4.109748978400467e-05, "loss": 0.5924, "step": 2816 }, { "epoch": 0.0822457738460191, "grad_norm": 0.6804175963627163, "learning_rate": 4.111208406304728e-05, "loss": 0.6637, "step": 2817 }, { "epoch": 0.08227497007386646, "grad_norm": 1.0042383499951835, "learning_rate": 4.11266783420899e-05, "loss": 0.7441, "step": 2818 }, { "epoch": 0.08230416630171382, "grad_norm": 0.7791597906330096, "learning_rate": 4.114127262113252e-05, "loss": 0.8218, "step": 2819 }, { "epoch": 0.08233336252956118, "grad_norm": 0.7709852235228414, "learning_rate": 4.1155866900175134e-05, "loss": 0.6453, "step": 2820 }, { "epoch": 0.08236255875740854, "grad_norm": 0.6199026237495838, "learning_rate": 4.117046117921775e-05, "loss": 0.5941, "step": 2821 }, { "epoch": 0.0823917549852559, "grad_norm": 0.6967294822865777, "learning_rate": 4.1185055458260366e-05, "loss": 0.613, "step": 2822 }, { "epoch": 0.08242095121310326, "grad_norm": 0.7097997601893822, "learning_rate": 4.119964973730298e-05, "loss": 0.7208, "step": 2823 }, { "epoch": 0.08245014744095062, "grad_norm": 0.7355113608433751, "learning_rate": 4.121424401634559e-05, "loss": 0.7499, "step": 2824 }, { "epoch": 0.08247934366879799, "grad_norm": 0.6701681367081991, "learning_rate": 4.122883829538821e-05, "loss": 0.6217, "step": 2825 }, { "epoch": 0.08250853989664535, "grad_norm": 0.8550318715136815, "learning_rate": 4.124343257443082e-05, "loss": 0.7096, "step": 2826 }, { "epoch": 0.08253773612449272, "grad_norm": 0.7854977655230014, "learning_rate": 4.125802685347344e-05, "loss": 0.7817, "step": 2827 }, { "epoch": 0.08256693235234008, "grad_norm": 0.7955654776143807, "learning_rate": 4.1272621132516054e-05, "loss": 0.7353, "step": 2828 }, { "epoch": 0.08259612858018744, "grad_norm": 0.7175727891138886, "learning_rate": 4.1287215411558673e-05, "loss": 0.7251, "step": 2829 }, { "epoch": 0.0826253248080348, "grad_norm": 0.6478623154442343, "learning_rate": 4.1301809690601286e-05, "loss": 0.652, "step": 2830 }, { "epoch": 0.08265452103588217, "grad_norm": 0.799468629279649, "learning_rate": 4.1316403969643905e-05, "loss": 0.7129, "step": 2831 }, { "epoch": 0.08268371726372953, "grad_norm": 0.6549204355549131, "learning_rate": 4.133099824868652e-05, "loss": 0.6183, "step": 2832 }, { "epoch": 0.08271291349157689, "grad_norm": 0.6954350568285164, "learning_rate": 4.134559252772913e-05, "loss": 0.6914, "step": 2833 }, { "epoch": 0.08274210971942425, "grad_norm": 0.6802679553965709, "learning_rate": 4.136018680677175e-05, "loss": 0.7072, "step": 2834 }, { "epoch": 0.08277130594727161, "grad_norm": 0.7623126396409038, "learning_rate": 4.137478108581436e-05, "loss": 0.7757, "step": 2835 }, { "epoch": 0.08280050217511897, "grad_norm": 0.7217165341428721, "learning_rate": 4.138937536485698e-05, "loss": 0.6796, "step": 2836 }, { "epoch": 0.08282969840296633, "grad_norm": 0.8151148554953689, "learning_rate": 4.1403969643899593e-05, "loss": 0.7909, "step": 2837 }, { "epoch": 0.0828588946308137, "grad_norm": 0.6991034659043894, "learning_rate": 4.141856392294221e-05, "loss": 0.6575, "step": 2838 }, { "epoch": 0.08288809085866106, "grad_norm": 0.6684989768947265, "learning_rate": 4.1433158201984825e-05, "loss": 0.6131, "step": 2839 }, { "epoch": 0.08291728708650842, "grad_norm": 0.6623336692001203, "learning_rate": 4.144775248102744e-05, "loss": 0.619, "step": 2840 }, { "epoch": 0.08294648331435578, "grad_norm": 0.6555208252661393, "learning_rate": 4.146234676007005e-05, "loss": 0.6342, "step": 2841 }, { "epoch": 0.08297567954220315, "grad_norm": 0.725737280090963, "learning_rate": 4.147694103911267e-05, "loss": 0.7697, "step": 2842 }, { "epoch": 0.08300487577005052, "grad_norm": 0.6554038375651066, "learning_rate": 4.149153531815528e-05, "loss": 0.6363, "step": 2843 }, { "epoch": 0.08303407199789788, "grad_norm": 0.6962727223346233, "learning_rate": 4.15061295971979e-05, "loss": 0.6978, "step": 2844 }, { "epoch": 0.08306326822574524, "grad_norm": 0.7020014507579629, "learning_rate": 4.1520723876240514e-05, "loss": 0.6824, "step": 2845 }, { "epoch": 0.0830924644535926, "grad_norm": 0.6380712979654292, "learning_rate": 4.153531815528313e-05, "loss": 0.6316, "step": 2846 }, { "epoch": 0.08312166068143996, "grad_norm": 0.6993667919750416, "learning_rate": 4.1549912434325745e-05, "loss": 0.7145, "step": 2847 }, { "epoch": 0.08315085690928732, "grad_norm": 0.7020167357999347, "learning_rate": 4.1564506713368364e-05, "loss": 0.7098, "step": 2848 }, { "epoch": 0.08318005313713468, "grad_norm": 0.7644374069741469, "learning_rate": 4.157910099241098e-05, "loss": 0.816, "step": 2849 }, { "epoch": 0.08320924936498204, "grad_norm": 0.7848197608783276, "learning_rate": 4.159369527145359e-05, "loss": 0.7418, "step": 2850 }, { "epoch": 0.0832384455928294, "grad_norm": 0.664325619092215, "learning_rate": 4.160828955049621e-05, "loss": 0.6298, "step": 2851 }, { "epoch": 0.08326764182067677, "grad_norm": 0.6354697949113892, "learning_rate": 4.162288382953882e-05, "loss": 0.6225, "step": 2852 }, { "epoch": 0.08329683804852413, "grad_norm": 1.3822300526260833, "learning_rate": 4.163747810858144e-05, "loss": 0.7585, "step": 2853 }, { "epoch": 0.08332603427637149, "grad_norm": 0.748338010272026, "learning_rate": 4.165207238762405e-05, "loss": 0.8132, "step": 2854 }, { "epoch": 0.08335523050421885, "grad_norm": 0.6740371910773437, "learning_rate": 4.166666666666667e-05, "loss": 0.7026, "step": 2855 }, { "epoch": 0.08338442673206621, "grad_norm": 0.8293020819033898, "learning_rate": 4.1681260945709285e-05, "loss": 0.7557, "step": 2856 }, { "epoch": 0.08341362295991359, "grad_norm": 0.7654902861790613, "learning_rate": 4.1695855224751904e-05, "loss": 0.8099, "step": 2857 }, { "epoch": 0.08344281918776095, "grad_norm": 0.616984943693633, "learning_rate": 4.1710449503794516e-05, "loss": 0.6049, "step": 2858 }, { "epoch": 0.08347201541560831, "grad_norm": 0.6935861117272943, "learning_rate": 4.172504378283713e-05, "loss": 0.6594, "step": 2859 }, { "epoch": 0.08350121164345567, "grad_norm": 0.7569953111887182, "learning_rate": 4.173963806187974e-05, "loss": 0.7476, "step": 2860 }, { "epoch": 0.08353040787130303, "grad_norm": 0.7734787442310314, "learning_rate": 4.175423234092236e-05, "loss": 0.6467, "step": 2861 }, { "epoch": 0.08355960409915039, "grad_norm": 0.6554644001683378, "learning_rate": 4.176882661996497e-05, "loss": 0.6556, "step": 2862 }, { "epoch": 0.08358880032699775, "grad_norm": 0.8082264228340562, "learning_rate": 4.178342089900759e-05, "loss": 0.6036, "step": 2863 }, { "epoch": 0.08361799655484511, "grad_norm": 0.6551250604119075, "learning_rate": 4.1798015178050205e-05, "loss": 0.6483, "step": 2864 }, { "epoch": 0.08364719278269248, "grad_norm": 0.7404342240404398, "learning_rate": 4.1812609457092824e-05, "loss": 0.6979, "step": 2865 }, { "epoch": 0.08367638901053984, "grad_norm": 0.6906691688586767, "learning_rate": 4.1827203736135436e-05, "loss": 0.6605, "step": 2866 }, { "epoch": 0.0837055852383872, "grad_norm": 0.7168921835585975, "learning_rate": 4.184179801517805e-05, "loss": 0.6922, "step": 2867 }, { "epoch": 0.08373478146623456, "grad_norm": 0.7116778746937973, "learning_rate": 4.185639229422067e-05, "loss": 0.727, "step": 2868 }, { "epoch": 0.08376397769408192, "grad_norm": 0.6806400921459885, "learning_rate": 4.187098657326328e-05, "loss": 0.6604, "step": 2869 }, { "epoch": 0.08379317392192928, "grad_norm": 0.7006254680562606, "learning_rate": 4.18855808523059e-05, "loss": 0.7294, "step": 2870 }, { "epoch": 0.08382237014977664, "grad_norm": 0.6739152636386252, "learning_rate": 4.190017513134851e-05, "loss": 0.7089, "step": 2871 }, { "epoch": 0.08385156637762402, "grad_norm": 0.7334285588449797, "learning_rate": 4.191476941039113e-05, "loss": 0.6809, "step": 2872 }, { "epoch": 0.08388076260547138, "grad_norm": 0.68105935123548, "learning_rate": 4.1929363689433744e-05, "loss": 0.667, "step": 2873 }, { "epoch": 0.08390995883331874, "grad_norm": 0.6713159659677013, "learning_rate": 4.194395796847636e-05, "loss": 0.6445, "step": 2874 }, { "epoch": 0.0839391550611661, "grad_norm": 0.6872450219518578, "learning_rate": 4.1958552247518976e-05, "loss": 0.6978, "step": 2875 }, { "epoch": 0.08396835128901346, "grad_norm": 0.7077540823285489, "learning_rate": 4.197314652656159e-05, "loss": 0.6998, "step": 2876 }, { "epoch": 0.08399754751686082, "grad_norm": 0.6876824487606525, "learning_rate": 4.19877408056042e-05, "loss": 0.6761, "step": 2877 }, { "epoch": 0.08402674374470818, "grad_norm": 0.6796742746083227, "learning_rate": 4.200233508464682e-05, "loss": 0.6565, "step": 2878 }, { "epoch": 0.08405593997255555, "grad_norm": 0.6270396697169092, "learning_rate": 4.201692936368943e-05, "loss": 0.6232, "step": 2879 }, { "epoch": 0.08408513620040291, "grad_norm": 0.7178358993773618, "learning_rate": 4.203152364273205e-05, "loss": 0.6892, "step": 2880 }, { "epoch": 0.08411433242825027, "grad_norm": 1.0926404478307572, "learning_rate": 4.2046117921774664e-05, "loss": 0.7735, "step": 2881 }, { "epoch": 0.08414352865609763, "grad_norm": 0.6653670918582477, "learning_rate": 4.206071220081728e-05, "loss": 0.6671, "step": 2882 }, { "epoch": 0.08417272488394499, "grad_norm": 0.7806604966171377, "learning_rate": 4.20753064798599e-05, "loss": 0.715, "step": 2883 }, { "epoch": 0.08420192111179235, "grad_norm": 0.6605838009845735, "learning_rate": 4.2089900758902515e-05, "loss": 0.5917, "step": 2884 }, { "epoch": 0.08423111733963971, "grad_norm": 0.7226068759542389, "learning_rate": 4.210449503794513e-05, "loss": 0.7488, "step": 2885 }, { "epoch": 0.08426031356748707, "grad_norm": 0.6825160269449556, "learning_rate": 4.211908931698774e-05, "loss": 0.67, "step": 2886 }, { "epoch": 0.08428950979533445, "grad_norm": 0.6389851557148309, "learning_rate": 4.213368359603036e-05, "loss": 0.6861, "step": 2887 }, { "epoch": 0.08431870602318181, "grad_norm": 0.7849217437375788, "learning_rate": 4.214827787507297e-05, "loss": 0.8337, "step": 2888 }, { "epoch": 0.08434790225102917, "grad_norm": 0.7031814533792269, "learning_rate": 4.216287215411559e-05, "loss": 0.7174, "step": 2889 }, { "epoch": 0.08437709847887653, "grad_norm": 0.736154872812953, "learning_rate": 4.21774664331582e-05, "loss": 0.7782, "step": 2890 }, { "epoch": 0.0844062947067239, "grad_norm": 0.6956219687879384, "learning_rate": 4.219206071220082e-05, "loss": 0.7413, "step": 2891 }, { "epoch": 0.08443549093457126, "grad_norm": 0.6763269270325114, "learning_rate": 4.2206654991243435e-05, "loss": 0.6594, "step": 2892 }, { "epoch": 0.08446468716241862, "grad_norm": 0.7623247452357285, "learning_rate": 4.222124927028605e-05, "loss": 0.6697, "step": 2893 }, { "epoch": 0.08449388339026598, "grad_norm": 0.8000069352728068, "learning_rate": 4.223584354932866e-05, "loss": 0.6954, "step": 2894 }, { "epoch": 0.08452307961811334, "grad_norm": 0.6150150008334111, "learning_rate": 4.225043782837128e-05, "loss": 0.5739, "step": 2895 }, { "epoch": 0.0845522758459607, "grad_norm": 0.7128844177608679, "learning_rate": 4.226503210741389e-05, "loss": 0.6791, "step": 2896 }, { "epoch": 0.08458147207380806, "grad_norm": 0.8105481121110528, "learning_rate": 4.227962638645651e-05, "loss": 0.7055, "step": 2897 }, { "epoch": 0.08461066830165542, "grad_norm": 0.8212199858341903, "learning_rate": 4.229422066549913e-05, "loss": 0.8145, "step": 2898 }, { "epoch": 0.08463986452950278, "grad_norm": 0.6569712627909252, "learning_rate": 4.230881494454174e-05, "loss": 0.6372, "step": 2899 }, { "epoch": 0.08466906075735015, "grad_norm": 0.7424766557818556, "learning_rate": 4.232340922358436e-05, "loss": 0.749, "step": 2900 }, { "epoch": 0.0846982569851975, "grad_norm": 0.7030401772111919, "learning_rate": 4.2338003502626974e-05, "loss": 0.707, "step": 2901 }, { "epoch": 0.08472745321304488, "grad_norm": 0.6674118308152243, "learning_rate": 4.235259778166959e-05, "loss": 0.6804, "step": 2902 }, { "epoch": 0.08475664944089224, "grad_norm": 0.8741429761560907, "learning_rate": 4.23671920607122e-05, "loss": 0.7784, "step": 2903 }, { "epoch": 0.0847858456687396, "grad_norm": 0.6764412817001803, "learning_rate": 4.238178633975482e-05, "loss": 0.6641, "step": 2904 }, { "epoch": 0.08481504189658697, "grad_norm": 0.7171861489234661, "learning_rate": 4.239638061879743e-05, "loss": 0.7351, "step": 2905 }, { "epoch": 0.08484423812443433, "grad_norm": 0.801988217488984, "learning_rate": 4.241097489784005e-05, "loss": 0.7501, "step": 2906 }, { "epoch": 0.08487343435228169, "grad_norm": 0.8054616551306899, "learning_rate": 4.242556917688266e-05, "loss": 0.7892, "step": 2907 }, { "epoch": 0.08490263058012905, "grad_norm": 0.7448001955262911, "learning_rate": 4.244016345592528e-05, "loss": 0.7836, "step": 2908 }, { "epoch": 0.08493182680797641, "grad_norm": 0.7426099638496045, "learning_rate": 4.2454757734967894e-05, "loss": 0.6705, "step": 2909 }, { "epoch": 0.08496102303582377, "grad_norm": 0.6999796058987241, "learning_rate": 4.2469352014010513e-05, "loss": 0.7146, "step": 2910 }, { "epoch": 0.08499021926367113, "grad_norm": 0.7520110390429934, "learning_rate": 4.2483946293053126e-05, "loss": 0.6557, "step": 2911 }, { "epoch": 0.0850194154915185, "grad_norm": 0.676707067196616, "learning_rate": 4.249854057209574e-05, "loss": 0.6887, "step": 2912 }, { "epoch": 0.08504861171936585, "grad_norm": 0.6905290534571882, "learning_rate": 4.251313485113835e-05, "loss": 0.7374, "step": 2913 }, { "epoch": 0.08507780794721322, "grad_norm": 0.7255995658054106, "learning_rate": 4.252772913018097e-05, "loss": 0.7301, "step": 2914 }, { "epoch": 0.08510700417506058, "grad_norm": 0.6425776311908356, "learning_rate": 4.254232340922359e-05, "loss": 0.6365, "step": 2915 }, { "epoch": 0.08513620040290794, "grad_norm": 0.6676772013958997, "learning_rate": 4.25569176882662e-05, "loss": 0.5825, "step": 2916 }, { "epoch": 0.0851653966307553, "grad_norm": 0.7526936556010064, "learning_rate": 4.257151196730882e-05, "loss": 0.6486, "step": 2917 }, { "epoch": 0.08519459285860267, "grad_norm": 0.6706488246250161, "learning_rate": 4.2586106246351434e-05, "loss": 0.6099, "step": 2918 }, { "epoch": 0.08522378908645004, "grad_norm": 0.6752740881721546, "learning_rate": 4.260070052539405e-05, "loss": 0.6731, "step": 2919 }, { "epoch": 0.0852529853142974, "grad_norm": 0.669922331957987, "learning_rate": 4.261529480443666e-05, "loss": 0.6482, "step": 2920 }, { "epoch": 0.08528218154214476, "grad_norm": 1.0201120977985734, "learning_rate": 4.262988908347928e-05, "loss": 0.7271, "step": 2921 }, { "epoch": 0.08531137776999212, "grad_norm": 0.6963726089885474, "learning_rate": 4.264448336252189e-05, "loss": 0.6392, "step": 2922 }, { "epoch": 0.08534057399783948, "grad_norm": 0.6548660089558531, "learning_rate": 4.265907764156451e-05, "loss": 0.6783, "step": 2923 }, { "epoch": 0.08536977022568684, "grad_norm": 0.643804610441613, "learning_rate": 4.267367192060712e-05, "loss": 0.5987, "step": 2924 }, { "epoch": 0.0853989664535342, "grad_norm": 0.8282263159883023, "learning_rate": 4.268826619964974e-05, "loss": 0.6863, "step": 2925 }, { "epoch": 0.08542816268138156, "grad_norm": 0.6579091160996081, "learning_rate": 4.2702860478692354e-05, "loss": 0.689, "step": 2926 }, { "epoch": 0.08545735890922893, "grad_norm": 0.631942361933811, "learning_rate": 4.271745475773497e-05, "loss": 0.6082, "step": 2927 }, { "epoch": 0.08548655513707629, "grad_norm": 0.729275001210449, "learning_rate": 4.2732049036777585e-05, "loss": 0.7906, "step": 2928 }, { "epoch": 0.08551575136492365, "grad_norm": 0.7942230522851534, "learning_rate": 4.27466433158202e-05, "loss": 0.7896, "step": 2929 }, { "epoch": 0.08554494759277101, "grad_norm": 0.6967311879211312, "learning_rate": 4.276123759486282e-05, "loss": 0.6874, "step": 2930 }, { "epoch": 0.08557414382061837, "grad_norm": 0.7767886095569813, "learning_rate": 4.277583187390543e-05, "loss": 0.7675, "step": 2931 }, { "epoch": 0.08560334004846573, "grad_norm": 0.7164029866578068, "learning_rate": 4.279042615294805e-05, "loss": 0.6669, "step": 2932 }, { "epoch": 0.0856325362763131, "grad_norm": 0.738894381403613, "learning_rate": 4.280502043199066e-05, "loss": 0.7512, "step": 2933 }, { "epoch": 0.08566173250416047, "grad_norm": 0.6848394270931726, "learning_rate": 4.281961471103328e-05, "loss": 0.6885, "step": 2934 }, { "epoch": 0.08569092873200783, "grad_norm": 0.691695469530442, "learning_rate": 4.283420899007589e-05, "loss": 0.6388, "step": 2935 }, { "epoch": 0.08572012495985519, "grad_norm": 0.6619935265308866, "learning_rate": 4.284880326911851e-05, "loss": 0.6403, "step": 2936 }, { "epoch": 0.08574932118770255, "grad_norm": 0.6566474578089667, "learning_rate": 4.2863397548161125e-05, "loss": 0.5931, "step": 2937 }, { "epoch": 0.08577851741554991, "grad_norm": 0.7861537663512908, "learning_rate": 4.287799182720374e-05, "loss": 0.7511, "step": 2938 }, { "epoch": 0.08580771364339727, "grad_norm": 0.7031098468123896, "learning_rate": 4.289258610624635e-05, "loss": 0.6938, "step": 2939 }, { "epoch": 0.08583690987124463, "grad_norm": 0.7595604980973735, "learning_rate": 4.290718038528897e-05, "loss": 0.616, "step": 2940 }, { "epoch": 0.085866106099092, "grad_norm": 0.647879267212424, "learning_rate": 4.292177466433158e-05, "loss": 0.6382, "step": 2941 }, { "epoch": 0.08589530232693936, "grad_norm": 0.6930273521164424, "learning_rate": 4.29363689433742e-05, "loss": 0.7432, "step": 2942 }, { "epoch": 0.08592449855478672, "grad_norm": 0.7234756544239415, "learning_rate": 4.295096322241681e-05, "loss": 0.69, "step": 2943 }, { "epoch": 0.08595369478263408, "grad_norm": 0.648598508649689, "learning_rate": 4.296555750145943e-05, "loss": 0.6543, "step": 2944 }, { "epoch": 0.08598289101048144, "grad_norm": 0.6561448230341134, "learning_rate": 4.2980151780502045e-05, "loss": 0.6351, "step": 2945 }, { "epoch": 0.0860120872383288, "grad_norm": 0.7438770333017943, "learning_rate": 4.2994746059544664e-05, "loss": 0.723, "step": 2946 }, { "epoch": 0.08604128346617616, "grad_norm": 1.0721241464435098, "learning_rate": 4.3009340338587276e-05, "loss": 0.7345, "step": 2947 }, { "epoch": 0.08607047969402354, "grad_norm": 0.6974342536434456, "learning_rate": 4.302393461762989e-05, "loss": 0.7189, "step": 2948 }, { "epoch": 0.0860996759218709, "grad_norm": 0.7104039311809547, "learning_rate": 4.303852889667251e-05, "loss": 0.7602, "step": 2949 }, { "epoch": 0.08612887214971826, "grad_norm": 0.677642193330725, "learning_rate": 4.305312317571512e-05, "loss": 0.6859, "step": 2950 }, { "epoch": 0.08615806837756562, "grad_norm": 0.6698815351394168, "learning_rate": 4.306771745475774e-05, "loss": 0.645, "step": 2951 }, { "epoch": 0.08618726460541298, "grad_norm": 0.6112630866612215, "learning_rate": 4.308231173380035e-05, "loss": 0.6165, "step": 2952 }, { "epoch": 0.08621646083326034, "grad_norm": 0.7022553160833389, "learning_rate": 4.309690601284297e-05, "loss": 0.6862, "step": 2953 }, { "epoch": 0.0862456570611077, "grad_norm": 0.6355952052265724, "learning_rate": 4.3111500291885584e-05, "loss": 0.6035, "step": 2954 }, { "epoch": 0.08627485328895507, "grad_norm": 0.8642684443742703, "learning_rate": 4.3126094570928196e-05, "loss": 0.7691, "step": 2955 }, { "epoch": 0.08630404951680243, "grad_norm": 0.763230039166667, "learning_rate": 4.314068884997081e-05, "loss": 0.7822, "step": 2956 }, { "epoch": 0.08633324574464979, "grad_norm": 0.7573778945238887, "learning_rate": 4.315528312901343e-05, "loss": 0.7576, "step": 2957 }, { "epoch": 0.08636244197249715, "grad_norm": 0.793476436476658, "learning_rate": 4.316987740805604e-05, "loss": 0.6561, "step": 2958 }, { "epoch": 0.08639163820034451, "grad_norm": 0.7421839484866726, "learning_rate": 4.318447168709866e-05, "loss": 0.8014, "step": 2959 }, { "epoch": 0.08642083442819187, "grad_norm": 0.7641098702492523, "learning_rate": 4.319906596614127e-05, "loss": 0.7661, "step": 2960 }, { "epoch": 0.08645003065603923, "grad_norm": 0.6627379539242948, "learning_rate": 4.321366024518389e-05, "loss": 0.6919, "step": 2961 }, { "epoch": 0.0864792268838866, "grad_norm": 0.6980863066772922, "learning_rate": 4.322825452422651e-05, "loss": 0.769, "step": 2962 }, { "epoch": 0.08650842311173397, "grad_norm": 0.686306089618167, "learning_rate": 4.324284880326912e-05, "loss": 0.7294, "step": 2963 }, { "epoch": 0.08653761933958133, "grad_norm": 0.6768304199239926, "learning_rate": 4.3257443082311736e-05, "loss": 0.6602, "step": 2964 }, { "epoch": 0.08656681556742869, "grad_norm": 0.6533700882756701, "learning_rate": 4.327203736135435e-05, "loss": 0.6407, "step": 2965 }, { "epoch": 0.08659601179527605, "grad_norm": 0.6937983257766752, "learning_rate": 4.328663164039697e-05, "loss": 0.678, "step": 2966 }, { "epoch": 0.08662520802312342, "grad_norm": 0.7018848998186932, "learning_rate": 4.330122591943958e-05, "loss": 0.6303, "step": 2967 }, { "epoch": 0.08665440425097078, "grad_norm": 0.664926664623431, "learning_rate": 4.33158201984822e-05, "loss": 0.7044, "step": 2968 }, { "epoch": 0.08668360047881814, "grad_norm": 0.6583838117545718, "learning_rate": 4.333041447752481e-05, "loss": 0.6768, "step": 2969 }, { "epoch": 0.0867127967066655, "grad_norm": 0.6736030265727998, "learning_rate": 4.334500875656743e-05, "loss": 0.6767, "step": 2970 }, { "epoch": 0.08674199293451286, "grad_norm": 0.6503913899534028, "learning_rate": 4.335960303561004e-05, "loss": 0.6835, "step": 2971 }, { "epoch": 0.08677118916236022, "grad_norm": 0.6339080221107568, "learning_rate": 4.337419731465266e-05, "loss": 0.5823, "step": 2972 }, { "epoch": 0.08680038539020758, "grad_norm": 0.7759325028791653, "learning_rate": 4.338879159369527e-05, "loss": 0.7671, "step": 2973 }, { "epoch": 0.08682958161805494, "grad_norm": 0.6922890214943029, "learning_rate": 4.340338587273789e-05, "loss": 0.711, "step": 2974 }, { "epoch": 0.0868587778459023, "grad_norm": 0.6787941127287547, "learning_rate": 4.34179801517805e-05, "loss": 0.7427, "step": 2975 }, { "epoch": 0.08688797407374967, "grad_norm": 0.6525387939849429, "learning_rate": 4.343257443082312e-05, "loss": 0.7067, "step": 2976 }, { "epoch": 0.08691717030159703, "grad_norm": 0.6898083277244539, "learning_rate": 4.344716870986573e-05, "loss": 0.6865, "step": 2977 }, { "epoch": 0.0869463665294444, "grad_norm": 0.6746299658627569, "learning_rate": 4.346176298890835e-05, "loss": 0.7279, "step": 2978 }, { "epoch": 0.08697556275729176, "grad_norm": 0.7338436730753464, "learning_rate": 4.347635726795097e-05, "loss": 0.7142, "step": 2979 }, { "epoch": 0.08700475898513912, "grad_norm": 0.886962620893384, "learning_rate": 4.349095154699358e-05, "loss": 0.7042, "step": 2980 }, { "epoch": 0.08703395521298649, "grad_norm": 0.672392398073832, "learning_rate": 4.3505545826036195e-05, "loss": 0.6818, "step": 2981 }, { "epoch": 0.08706315144083385, "grad_norm": 0.6432753813373209, "learning_rate": 4.352014010507881e-05, "loss": 0.6262, "step": 2982 }, { "epoch": 0.08709234766868121, "grad_norm": 0.6734399557798983, "learning_rate": 4.353473438412143e-05, "loss": 0.6806, "step": 2983 }, { "epoch": 0.08712154389652857, "grad_norm": 0.6916039027204177, "learning_rate": 4.354932866316404e-05, "loss": 0.7213, "step": 2984 }, { "epoch": 0.08715074012437593, "grad_norm": 0.678657184142648, "learning_rate": 4.356392294220666e-05, "loss": 0.754, "step": 2985 }, { "epoch": 0.08717993635222329, "grad_norm": 0.6354500241932752, "learning_rate": 4.357851722124927e-05, "loss": 0.5871, "step": 2986 }, { "epoch": 0.08720913258007065, "grad_norm": 0.7186647782764224, "learning_rate": 4.359311150029189e-05, "loss": 0.7465, "step": 2987 }, { "epoch": 0.08723832880791801, "grad_norm": 0.6621195331016276, "learning_rate": 4.36077057793345e-05, "loss": 0.6275, "step": 2988 }, { "epoch": 0.08726752503576538, "grad_norm": 0.6887528682562779, "learning_rate": 4.362230005837712e-05, "loss": 0.6487, "step": 2989 }, { "epoch": 0.08729672126361274, "grad_norm": 0.6377066473005248, "learning_rate": 4.3636894337419734e-05, "loss": 0.6575, "step": 2990 }, { "epoch": 0.0873259174914601, "grad_norm": 0.6740471857869078, "learning_rate": 4.365148861646235e-05, "loss": 0.6823, "step": 2991 }, { "epoch": 0.08735511371930746, "grad_norm": 0.7358399225379666, "learning_rate": 4.366608289550496e-05, "loss": 0.7843, "step": 2992 }, { "epoch": 0.08738430994715483, "grad_norm": 0.6803501447088436, "learning_rate": 4.368067717454758e-05, "loss": 0.694, "step": 2993 }, { "epoch": 0.0874135061750022, "grad_norm": 0.6839382881860752, "learning_rate": 4.36952714535902e-05, "loss": 0.6491, "step": 2994 }, { "epoch": 0.08744270240284956, "grad_norm": 0.7352013090179973, "learning_rate": 4.370986573263281e-05, "loss": 0.727, "step": 2995 }, { "epoch": 0.08747189863069692, "grad_norm": 0.6700259119085165, "learning_rate": 4.372446001167543e-05, "loss": 0.6514, "step": 2996 }, { "epoch": 0.08750109485854428, "grad_norm": 0.6900535620499577, "learning_rate": 4.373905429071804e-05, "loss": 0.6575, "step": 2997 }, { "epoch": 0.08753029108639164, "grad_norm": 0.6902767133277108, "learning_rate": 4.375364856976066e-05, "loss": 0.7566, "step": 2998 }, { "epoch": 0.087559487314239, "grad_norm": 0.924537055998943, "learning_rate": 4.3768242848803274e-05, "loss": 0.8923, "step": 2999 }, { "epoch": 0.08758868354208636, "grad_norm": 0.7152524359513801, "learning_rate": 4.3782837127845886e-05, "loss": 0.7374, "step": 3000 }, { "epoch": 0.08761787976993372, "grad_norm": 0.7841630298025472, "learning_rate": 4.37974314068885e-05, "loss": 0.7671, "step": 3001 }, { "epoch": 0.08764707599778108, "grad_norm": 0.692796104143567, "learning_rate": 4.381202568593112e-05, "loss": 0.6783, "step": 3002 }, { "epoch": 0.08767627222562845, "grad_norm": 0.6788822673728697, "learning_rate": 4.382661996497373e-05, "loss": 0.7003, "step": 3003 }, { "epoch": 0.08770546845347581, "grad_norm": 0.7764697849715668, "learning_rate": 4.384121424401635e-05, "loss": 0.7778, "step": 3004 }, { "epoch": 0.08773466468132317, "grad_norm": 0.7274264550862373, "learning_rate": 4.385580852305896e-05, "loss": 0.7672, "step": 3005 }, { "epoch": 0.08776386090917053, "grad_norm": 0.6750902402624065, "learning_rate": 4.387040280210158e-05, "loss": 0.6242, "step": 3006 }, { "epoch": 0.08779305713701789, "grad_norm": 0.6879408941628204, "learning_rate": 4.3884997081144194e-05, "loss": 0.7396, "step": 3007 }, { "epoch": 0.08782225336486527, "grad_norm": 0.7975250465546582, "learning_rate": 4.3899591360186806e-05, "loss": 0.7369, "step": 3008 }, { "epoch": 0.08785144959271263, "grad_norm": 0.7340585201321391, "learning_rate": 4.391418563922942e-05, "loss": 0.7148, "step": 3009 }, { "epoch": 0.08788064582055999, "grad_norm": 0.6987402685888765, "learning_rate": 4.392877991827204e-05, "loss": 0.659, "step": 3010 }, { "epoch": 0.08790984204840735, "grad_norm": 0.8694812273984962, "learning_rate": 4.394337419731466e-05, "loss": 0.6905, "step": 3011 }, { "epoch": 0.08793903827625471, "grad_norm": 0.7196438002944002, "learning_rate": 4.395796847635727e-05, "loss": 0.7884, "step": 3012 }, { "epoch": 0.08796823450410207, "grad_norm": 0.6858922470732636, "learning_rate": 4.397256275539989e-05, "loss": 0.7234, "step": 3013 }, { "epoch": 0.08799743073194943, "grad_norm": 0.731340757063488, "learning_rate": 4.39871570344425e-05, "loss": 0.7252, "step": 3014 }, { "epoch": 0.0880266269597968, "grad_norm": 0.7435031569849968, "learning_rate": 4.400175131348512e-05, "loss": 0.7773, "step": 3015 }, { "epoch": 0.08805582318764416, "grad_norm": 0.7175511032037759, "learning_rate": 4.401634559252773e-05, "loss": 0.6552, "step": 3016 }, { "epoch": 0.08808501941549152, "grad_norm": 0.7021408240469235, "learning_rate": 4.4030939871570345e-05, "loss": 0.7147, "step": 3017 }, { "epoch": 0.08811421564333888, "grad_norm": 0.7191406329457534, "learning_rate": 4.404553415061296e-05, "loss": 0.7114, "step": 3018 }, { "epoch": 0.08814341187118624, "grad_norm": 0.7032790302826741, "learning_rate": 4.406012842965558e-05, "loss": 0.6974, "step": 3019 }, { "epoch": 0.0881726080990336, "grad_norm": 0.692202487604442, "learning_rate": 4.407472270869819e-05, "loss": 0.7136, "step": 3020 }, { "epoch": 0.08820180432688096, "grad_norm": 0.7311190213921351, "learning_rate": 4.408931698774081e-05, "loss": 0.7772, "step": 3021 }, { "epoch": 0.08823100055472832, "grad_norm": 0.6817118836421203, "learning_rate": 4.410391126678342e-05, "loss": 0.6854, "step": 3022 }, { "epoch": 0.0882601967825757, "grad_norm": 0.6487514602504612, "learning_rate": 4.411850554582604e-05, "loss": 0.6719, "step": 3023 }, { "epoch": 0.08828939301042306, "grad_norm": 0.7192647677340428, "learning_rate": 4.413309982486865e-05, "loss": 0.7051, "step": 3024 }, { "epoch": 0.08831858923827042, "grad_norm": 0.7752073138947618, "learning_rate": 4.414769410391127e-05, "loss": 0.6214, "step": 3025 }, { "epoch": 0.08834778546611778, "grad_norm": 0.6623532087044031, "learning_rate": 4.4162288382953885e-05, "loss": 0.6357, "step": 3026 }, { "epoch": 0.08837698169396514, "grad_norm": 0.6422199808977516, "learning_rate": 4.41768826619965e-05, "loss": 0.6447, "step": 3027 }, { "epoch": 0.0884061779218125, "grad_norm": 0.7859516205892438, "learning_rate": 4.4191476941039116e-05, "loss": 0.7963, "step": 3028 }, { "epoch": 0.08843537414965986, "grad_norm": 0.828489347748682, "learning_rate": 4.420607122008173e-05, "loss": 0.8079, "step": 3029 }, { "epoch": 0.08846457037750723, "grad_norm": 0.7312662434973154, "learning_rate": 4.422066549912435e-05, "loss": 0.7574, "step": 3030 }, { "epoch": 0.08849376660535459, "grad_norm": 0.6825575200874469, "learning_rate": 4.423525977816696e-05, "loss": 0.7039, "step": 3031 }, { "epoch": 0.08852296283320195, "grad_norm": 0.7018981441935989, "learning_rate": 4.424985405720958e-05, "loss": 0.7199, "step": 3032 }, { "epoch": 0.08855215906104931, "grad_norm": 0.6941450582692887, "learning_rate": 4.426444833625219e-05, "loss": 0.7189, "step": 3033 }, { "epoch": 0.08858135528889667, "grad_norm": 0.689014424694664, "learning_rate": 4.4279042615294805e-05, "loss": 0.6277, "step": 3034 }, { "epoch": 0.08861055151674403, "grad_norm": 0.7153977910015615, "learning_rate": 4.429363689433742e-05, "loss": 0.7335, "step": 3035 }, { "epoch": 0.0886397477445914, "grad_norm": 0.728209754208087, "learning_rate": 4.4308231173380036e-05, "loss": 0.7289, "step": 3036 }, { "epoch": 0.08866894397243875, "grad_norm": 0.6434226248780891, "learning_rate": 4.432282545242265e-05, "loss": 0.6623, "step": 3037 }, { "epoch": 0.08869814020028613, "grad_norm": 0.69266106485442, "learning_rate": 4.433741973146527e-05, "loss": 0.6886, "step": 3038 }, { "epoch": 0.08872733642813349, "grad_norm": 0.6768381139381962, "learning_rate": 4.435201401050788e-05, "loss": 0.695, "step": 3039 }, { "epoch": 0.08875653265598085, "grad_norm": 0.7891125259360421, "learning_rate": 4.43666082895505e-05, "loss": 0.7378, "step": 3040 }, { "epoch": 0.08878572888382821, "grad_norm": 0.7481291246086198, "learning_rate": 4.438120256859311e-05, "loss": 0.8446, "step": 3041 }, { "epoch": 0.08881492511167557, "grad_norm": 0.695969183148456, "learning_rate": 4.439579684763573e-05, "loss": 0.6972, "step": 3042 }, { "epoch": 0.08884412133952294, "grad_norm": 0.6671806608940336, "learning_rate": 4.4410391126678344e-05, "loss": 0.6944, "step": 3043 }, { "epoch": 0.0888733175673703, "grad_norm": 0.7302911765146411, "learning_rate": 4.4424985405720957e-05, "loss": 0.7416, "step": 3044 }, { "epoch": 0.08890251379521766, "grad_norm": 0.6943209136658981, "learning_rate": 4.4439579684763576e-05, "loss": 0.7508, "step": 3045 }, { "epoch": 0.08893171002306502, "grad_norm": 0.6858117512003612, "learning_rate": 4.445417396380619e-05, "loss": 0.7357, "step": 3046 }, { "epoch": 0.08896090625091238, "grad_norm": 0.65785212969178, "learning_rate": 4.446876824284881e-05, "loss": 0.6392, "step": 3047 }, { "epoch": 0.08899010247875974, "grad_norm": 0.7294028563710133, "learning_rate": 4.448336252189142e-05, "loss": 0.708, "step": 3048 }, { "epoch": 0.0890192987066071, "grad_norm": 0.7149598036496831, "learning_rate": 4.449795680093404e-05, "loss": 0.7128, "step": 3049 }, { "epoch": 0.08904849493445446, "grad_norm": 0.7210275230613248, "learning_rate": 4.451255107997665e-05, "loss": 0.776, "step": 3050 }, { "epoch": 0.08907769116230183, "grad_norm": 0.6597038020696093, "learning_rate": 4.452714535901927e-05, "loss": 0.7321, "step": 3051 }, { "epoch": 0.08910688739014919, "grad_norm": 0.7615616356449436, "learning_rate": 4.454173963806188e-05, "loss": 0.7361, "step": 3052 }, { "epoch": 0.08913608361799656, "grad_norm": 0.6983766790186545, "learning_rate": 4.4556333917104496e-05, "loss": 0.6638, "step": 3053 }, { "epoch": 0.08916527984584392, "grad_norm": 0.6921252349396766, "learning_rate": 4.457092819614711e-05, "loss": 0.6834, "step": 3054 }, { "epoch": 0.08919447607369128, "grad_norm": 0.7583568563098821, "learning_rate": 4.458552247518973e-05, "loss": 0.6997, "step": 3055 }, { "epoch": 0.08922367230153865, "grad_norm": 0.625268450014915, "learning_rate": 4.460011675423234e-05, "loss": 0.599, "step": 3056 }, { "epoch": 0.089252868529386, "grad_norm": 0.6623649319081485, "learning_rate": 4.461471103327496e-05, "loss": 0.6773, "step": 3057 }, { "epoch": 0.08928206475723337, "grad_norm": 0.676702941564524, "learning_rate": 4.462930531231758e-05, "loss": 0.665, "step": 3058 }, { "epoch": 0.08931126098508073, "grad_norm": 0.7595068587103889, "learning_rate": 4.464389959136019e-05, "loss": 0.7808, "step": 3059 }, { "epoch": 0.08934045721292809, "grad_norm": 0.8672702117432575, "learning_rate": 4.46584938704028e-05, "loss": 0.7435, "step": 3060 }, { "epoch": 0.08936965344077545, "grad_norm": 0.6635985369685644, "learning_rate": 4.4673088149445416e-05, "loss": 0.6976, "step": 3061 }, { "epoch": 0.08939884966862281, "grad_norm": 0.7105565090342296, "learning_rate": 4.4687682428488035e-05, "loss": 0.7481, "step": 3062 }, { "epoch": 0.08942804589647017, "grad_norm": 0.6318589531796631, "learning_rate": 4.470227670753065e-05, "loss": 0.6359, "step": 3063 }, { "epoch": 0.08945724212431753, "grad_norm": 0.7911585016559854, "learning_rate": 4.471687098657327e-05, "loss": 0.6525, "step": 3064 }, { "epoch": 0.0894864383521649, "grad_norm": 0.6829557413042153, "learning_rate": 4.473146526561588e-05, "loss": 0.7201, "step": 3065 }, { "epoch": 0.08951563458001226, "grad_norm": 0.6689859238207893, "learning_rate": 4.47460595446585e-05, "loss": 0.6386, "step": 3066 }, { "epoch": 0.08954483080785962, "grad_norm": 0.5949509976725372, "learning_rate": 4.476065382370111e-05, "loss": 0.5846, "step": 3067 }, { "epoch": 0.089574027035707, "grad_norm": 0.6935769378629466, "learning_rate": 4.477524810274373e-05, "loss": 0.7054, "step": 3068 }, { "epoch": 0.08960322326355435, "grad_norm": 0.7042948893924129, "learning_rate": 4.478984238178634e-05, "loss": 0.7096, "step": 3069 }, { "epoch": 0.08963241949140172, "grad_norm": 0.7830059393702924, "learning_rate": 4.4804436660828955e-05, "loss": 0.7392, "step": 3070 }, { "epoch": 0.08966161571924908, "grad_norm": 0.7346888569265986, "learning_rate": 4.481903093987157e-05, "loss": 0.7395, "step": 3071 }, { "epoch": 0.08969081194709644, "grad_norm": 0.7119897048824025, "learning_rate": 4.483362521891419e-05, "loss": 0.7189, "step": 3072 }, { "epoch": 0.0897200081749438, "grad_norm": 0.6217985482889294, "learning_rate": 4.48482194979568e-05, "loss": 0.6588, "step": 3073 }, { "epoch": 0.08974920440279116, "grad_norm": 0.6960310906597474, "learning_rate": 4.486281377699942e-05, "loss": 0.6804, "step": 3074 }, { "epoch": 0.08977840063063852, "grad_norm": 0.7531651491770286, "learning_rate": 4.487740805604204e-05, "loss": 0.6661, "step": 3075 }, { "epoch": 0.08980759685848588, "grad_norm": 0.6771158552057542, "learning_rate": 4.489200233508465e-05, "loss": 0.6761, "step": 3076 }, { "epoch": 0.08983679308633324, "grad_norm": 0.7607570805149712, "learning_rate": 4.490659661412727e-05, "loss": 0.7816, "step": 3077 }, { "epoch": 0.0898659893141806, "grad_norm": 0.8011082549181994, "learning_rate": 4.492119089316988e-05, "loss": 0.7622, "step": 3078 }, { "epoch": 0.08989518554202797, "grad_norm": 0.8020200886178048, "learning_rate": 4.4935785172212494e-05, "loss": 0.8118, "step": 3079 }, { "epoch": 0.08992438176987533, "grad_norm": 0.7135136354867119, "learning_rate": 4.495037945125511e-05, "loss": 0.7021, "step": 3080 }, { "epoch": 0.08995357799772269, "grad_norm": 0.6938900114094009, "learning_rate": 4.4964973730297726e-05, "loss": 0.7089, "step": 3081 }, { "epoch": 0.08998277422557005, "grad_norm": 0.7575912708041238, "learning_rate": 4.497956800934034e-05, "loss": 0.7836, "step": 3082 }, { "epoch": 0.09001197045341743, "grad_norm": 0.8936392837949201, "learning_rate": 4.499416228838296e-05, "loss": 0.7762, "step": 3083 }, { "epoch": 0.09004116668126479, "grad_norm": 0.8239252263805478, "learning_rate": 4.500875656742557e-05, "loss": 0.7615, "step": 3084 }, { "epoch": 0.09007036290911215, "grad_norm": 0.7100498080024824, "learning_rate": 4.502335084646819e-05, "loss": 0.7662, "step": 3085 }, { "epoch": 0.09009955913695951, "grad_norm": 0.6697828698556191, "learning_rate": 4.50379451255108e-05, "loss": 0.6475, "step": 3086 }, { "epoch": 0.09012875536480687, "grad_norm": 0.6571396686106319, "learning_rate": 4.5052539404553414e-05, "loss": 0.5858, "step": 3087 }, { "epoch": 0.09015795159265423, "grad_norm": 0.6825591278399343, "learning_rate": 4.506713368359603e-05, "loss": 0.7033, "step": 3088 }, { "epoch": 0.09018714782050159, "grad_norm": 0.7317382665612719, "learning_rate": 4.5081727962638646e-05, "loss": 0.7372, "step": 3089 }, { "epoch": 0.09021634404834895, "grad_norm": 0.7042876012875284, "learning_rate": 4.5096322241681265e-05, "loss": 0.7196, "step": 3090 }, { "epoch": 0.09024554027619631, "grad_norm": 0.8115746320572372, "learning_rate": 4.511091652072388e-05, "loss": 0.7206, "step": 3091 }, { "epoch": 0.09027473650404368, "grad_norm": 0.6200029418436064, "learning_rate": 4.51255107997665e-05, "loss": 0.6328, "step": 3092 }, { "epoch": 0.09030393273189104, "grad_norm": 0.6766854781577759, "learning_rate": 4.514010507880911e-05, "loss": 0.7062, "step": 3093 }, { "epoch": 0.0903331289597384, "grad_norm": 0.685602155712906, "learning_rate": 4.515469935785173e-05, "loss": 0.6972, "step": 3094 }, { "epoch": 0.09036232518758576, "grad_norm": 0.6855874152621836, "learning_rate": 4.516929363689434e-05, "loss": 0.689, "step": 3095 }, { "epoch": 0.09039152141543312, "grad_norm": 0.6442573854194651, "learning_rate": 4.5183887915936954e-05, "loss": 0.6239, "step": 3096 }, { "epoch": 0.09042071764328048, "grad_norm": 0.7483339335117741, "learning_rate": 4.5198482194979566e-05, "loss": 0.7573, "step": 3097 }, { "epoch": 0.09044991387112786, "grad_norm": 0.681377159862065, "learning_rate": 4.5213076474022185e-05, "loss": 0.7253, "step": 3098 }, { "epoch": 0.09047911009897522, "grad_norm": 0.6853680105784009, "learning_rate": 4.52276707530648e-05, "loss": 0.7297, "step": 3099 }, { "epoch": 0.09050830632682258, "grad_norm": 0.7197369213196644, "learning_rate": 4.524226503210742e-05, "loss": 0.7334, "step": 3100 }, { "epoch": 0.09053750255466994, "grad_norm": 0.7913253862847504, "learning_rate": 4.525685931115003e-05, "loss": 0.7802, "step": 3101 }, { "epoch": 0.0905666987825173, "grad_norm": 0.6758456510283719, "learning_rate": 4.527145359019265e-05, "loss": 0.6384, "step": 3102 }, { "epoch": 0.09059589501036466, "grad_norm": 0.6966276526700815, "learning_rate": 4.528604786923526e-05, "loss": 0.7089, "step": 3103 }, { "epoch": 0.09062509123821202, "grad_norm": 0.6291893157811765, "learning_rate": 4.530064214827788e-05, "loss": 0.6456, "step": 3104 }, { "epoch": 0.09065428746605939, "grad_norm": 0.6869642412324082, "learning_rate": 4.531523642732049e-05, "loss": 0.6939, "step": 3105 }, { "epoch": 0.09068348369390675, "grad_norm": 0.7262581848294076, "learning_rate": 4.5329830706363106e-05, "loss": 0.6964, "step": 3106 }, { "epoch": 0.09071267992175411, "grad_norm": 0.6813974181054426, "learning_rate": 4.5344424985405725e-05, "loss": 0.7025, "step": 3107 }, { "epoch": 0.09074187614960147, "grad_norm": 0.7591596591394111, "learning_rate": 4.535901926444834e-05, "loss": 0.7085, "step": 3108 }, { "epoch": 0.09077107237744883, "grad_norm": 0.6358952892176333, "learning_rate": 4.5373613543490956e-05, "loss": 0.6182, "step": 3109 }, { "epoch": 0.09080026860529619, "grad_norm": 0.638982137141021, "learning_rate": 4.538820782253357e-05, "loss": 0.5974, "step": 3110 }, { "epoch": 0.09082946483314355, "grad_norm": 0.703612991721694, "learning_rate": 4.540280210157619e-05, "loss": 0.6669, "step": 3111 }, { "epoch": 0.09085866106099091, "grad_norm": 0.7965260867475096, "learning_rate": 4.54173963806188e-05, "loss": 0.7106, "step": 3112 }, { "epoch": 0.09088785728883828, "grad_norm": 0.7936916210207604, "learning_rate": 4.543199065966141e-05, "loss": 0.7589, "step": 3113 }, { "epoch": 0.09091705351668565, "grad_norm": 0.7319860613531892, "learning_rate": 4.5446584938704026e-05, "loss": 0.7402, "step": 3114 }, { "epoch": 0.09094624974453301, "grad_norm": 0.7389050915051991, "learning_rate": 4.5461179217746645e-05, "loss": 0.709, "step": 3115 }, { "epoch": 0.09097544597238037, "grad_norm": 0.7635472779634583, "learning_rate": 4.547577349678926e-05, "loss": 0.7729, "step": 3116 }, { "epoch": 0.09100464220022773, "grad_norm": 0.6630904280174892, "learning_rate": 4.5490367775831877e-05, "loss": 0.7332, "step": 3117 }, { "epoch": 0.0910338384280751, "grad_norm": 0.6731565984598931, "learning_rate": 4.550496205487449e-05, "loss": 0.7292, "step": 3118 }, { "epoch": 0.09106303465592246, "grad_norm": 0.8058339954928098, "learning_rate": 4.551955633391711e-05, "loss": 0.7546, "step": 3119 }, { "epoch": 0.09109223088376982, "grad_norm": 0.6932413804748544, "learning_rate": 4.553415061295972e-05, "loss": 0.6877, "step": 3120 }, { "epoch": 0.09112142711161718, "grad_norm": 0.7189362452705274, "learning_rate": 4.554874489200234e-05, "loss": 0.8022, "step": 3121 }, { "epoch": 0.09115062333946454, "grad_norm": 0.70080330207344, "learning_rate": 4.556333917104495e-05, "loss": 0.7434, "step": 3122 }, { "epoch": 0.0911798195673119, "grad_norm": 0.7351781719970287, "learning_rate": 4.5577933450087565e-05, "loss": 0.6852, "step": 3123 }, { "epoch": 0.09120901579515926, "grad_norm": 0.6969300240789068, "learning_rate": 4.5592527729130184e-05, "loss": 0.6867, "step": 3124 }, { "epoch": 0.09123821202300662, "grad_norm": 0.6730950819408468, "learning_rate": 4.5607122008172797e-05, "loss": 0.7564, "step": 3125 }, { "epoch": 0.09126740825085398, "grad_norm": 0.6101960227282273, "learning_rate": 4.5621716287215416e-05, "loss": 0.6316, "step": 3126 }, { "epoch": 0.09129660447870135, "grad_norm": 0.6330828074921142, "learning_rate": 4.563631056625803e-05, "loss": 0.678, "step": 3127 }, { "epoch": 0.09132580070654871, "grad_norm": 0.8094425301324656, "learning_rate": 4.565090484530065e-05, "loss": 0.6908, "step": 3128 }, { "epoch": 0.09135499693439608, "grad_norm": 0.6514007424262592, "learning_rate": 4.566549912434326e-05, "loss": 0.6507, "step": 3129 }, { "epoch": 0.09138419316224344, "grad_norm": 0.7068590786866019, "learning_rate": 4.568009340338588e-05, "loss": 0.7363, "step": 3130 }, { "epoch": 0.0914133893900908, "grad_norm": 0.6865434934565298, "learning_rate": 4.569468768242849e-05, "loss": 0.7242, "step": 3131 }, { "epoch": 0.09144258561793817, "grad_norm": 0.7100891320179522, "learning_rate": 4.5709281961471104e-05, "loss": 0.7206, "step": 3132 }, { "epoch": 0.09147178184578553, "grad_norm": 0.7127847488301285, "learning_rate": 4.572387624051372e-05, "loss": 0.6925, "step": 3133 }, { "epoch": 0.09150097807363289, "grad_norm": 0.7263893284091806, "learning_rate": 4.5738470519556336e-05, "loss": 0.7867, "step": 3134 }, { "epoch": 0.09153017430148025, "grad_norm": 0.6492986030775246, "learning_rate": 4.575306479859895e-05, "loss": 0.6708, "step": 3135 }, { "epoch": 0.09155937052932761, "grad_norm": 0.748751217896195, "learning_rate": 4.576765907764157e-05, "loss": 0.7229, "step": 3136 }, { "epoch": 0.09158856675717497, "grad_norm": 0.7258312280975913, "learning_rate": 4.578225335668418e-05, "loss": 0.747, "step": 3137 }, { "epoch": 0.09161776298502233, "grad_norm": 0.6820818862902539, "learning_rate": 4.57968476357268e-05, "loss": 0.76, "step": 3138 }, { "epoch": 0.0916469592128697, "grad_norm": 0.6732346311868624, "learning_rate": 4.581144191476942e-05, "loss": 0.6564, "step": 3139 }, { "epoch": 0.09167615544071706, "grad_norm": 0.6835887607042187, "learning_rate": 4.5826036193812024e-05, "loss": 0.6784, "step": 3140 }, { "epoch": 0.09170535166856442, "grad_norm": 0.6957586172764607, "learning_rate": 4.5840630472854643e-05, "loss": 0.7372, "step": 3141 }, { "epoch": 0.09173454789641178, "grad_norm": 0.6547200208197994, "learning_rate": 4.5855224751897256e-05, "loss": 0.6568, "step": 3142 }, { "epoch": 0.09176374412425914, "grad_norm": 0.6611796987287502, "learning_rate": 4.5869819030939875e-05, "loss": 0.6636, "step": 3143 }, { "epoch": 0.09179294035210651, "grad_norm": 0.850707862250547, "learning_rate": 4.588441330998249e-05, "loss": 0.8058, "step": 3144 }, { "epoch": 0.09182213657995388, "grad_norm": 0.6907440150499872, "learning_rate": 4.589900758902511e-05, "loss": 0.6856, "step": 3145 }, { "epoch": 0.09185133280780124, "grad_norm": 0.6727273643366183, "learning_rate": 4.591360186806772e-05, "loss": 0.7113, "step": 3146 }, { "epoch": 0.0918805290356486, "grad_norm": 0.7242957178288555, "learning_rate": 4.592819614711034e-05, "loss": 0.7956, "step": 3147 }, { "epoch": 0.09190972526349596, "grad_norm": 0.660353106739495, "learning_rate": 4.594279042615295e-05, "loss": 0.7252, "step": 3148 }, { "epoch": 0.09193892149134332, "grad_norm": 0.7000018668495787, "learning_rate": 4.5957384705195564e-05, "loss": 0.7292, "step": 3149 }, { "epoch": 0.09196811771919068, "grad_norm": 0.6858611865169364, "learning_rate": 4.5971978984238176e-05, "loss": 0.7014, "step": 3150 }, { "epoch": 0.09199731394703804, "grad_norm": 0.7161466353843261, "learning_rate": 4.5986573263280795e-05, "loss": 0.8123, "step": 3151 }, { "epoch": 0.0920265101748854, "grad_norm": 0.6590832188432056, "learning_rate": 4.600116754232341e-05, "loss": 0.6488, "step": 3152 }, { "epoch": 0.09205570640273276, "grad_norm": 0.6420068280970305, "learning_rate": 4.601576182136603e-05, "loss": 0.6841, "step": 3153 }, { "epoch": 0.09208490263058013, "grad_norm": 0.6226367963667757, "learning_rate": 4.6030356100408646e-05, "loss": 0.6611, "step": 3154 }, { "epoch": 0.09211409885842749, "grad_norm": 0.6742991251535324, "learning_rate": 4.604495037945126e-05, "loss": 0.6352, "step": 3155 }, { "epoch": 0.09214329508627485, "grad_norm": 0.6928322701520218, "learning_rate": 4.605954465849388e-05, "loss": 0.6587, "step": 3156 }, { "epoch": 0.09217249131412221, "grad_norm": 0.7396261030152023, "learning_rate": 4.607413893753649e-05, "loss": 0.7789, "step": 3157 }, { "epoch": 0.09220168754196957, "grad_norm": 0.870341171324533, "learning_rate": 4.60887332165791e-05, "loss": 0.765, "step": 3158 }, { "epoch": 0.09223088376981695, "grad_norm": 0.7180428120967964, "learning_rate": 4.6103327495621715e-05, "loss": 0.7546, "step": 3159 }, { "epoch": 0.09226007999766431, "grad_norm": 0.6310168236810157, "learning_rate": 4.6117921774664335e-05, "loss": 0.6785, "step": 3160 }, { "epoch": 0.09228927622551167, "grad_norm": 0.6114899957888945, "learning_rate": 4.613251605370695e-05, "loss": 0.5478, "step": 3161 }, { "epoch": 0.09231847245335903, "grad_norm": 0.7022755806502519, "learning_rate": 4.6147110332749566e-05, "loss": 0.7357, "step": 3162 }, { "epoch": 0.09234766868120639, "grad_norm": 0.9683964861956734, "learning_rate": 4.616170461179218e-05, "loss": 0.8669, "step": 3163 }, { "epoch": 0.09237686490905375, "grad_norm": 0.6190730054335806, "learning_rate": 4.61762988908348e-05, "loss": 0.6177, "step": 3164 }, { "epoch": 0.09240606113690111, "grad_norm": 0.6824314700831849, "learning_rate": 4.619089316987741e-05, "loss": 0.6967, "step": 3165 }, { "epoch": 0.09243525736474847, "grad_norm": 0.7571138907900046, "learning_rate": 4.620548744892002e-05, "loss": 0.801, "step": 3166 }, { "epoch": 0.09246445359259584, "grad_norm": 0.6924539751697282, "learning_rate": 4.6220081727962635e-05, "loss": 0.7122, "step": 3167 }, { "epoch": 0.0924936498204432, "grad_norm": 0.6438097144688834, "learning_rate": 4.6234676007005255e-05, "loss": 0.6302, "step": 3168 }, { "epoch": 0.09252284604829056, "grad_norm": 0.9291735336181134, "learning_rate": 4.624927028604787e-05, "loss": 0.7588, "step": 3169 }, { "epoch": 0.09255204227613792, "grad_norm": 0.6822377660704371, "learning_rate": 4.6263864565090486e-05, "loss": 0.6686, "step": 3170 }, { "epoch": 0.09258123850398528, "grad_norm": 0.6382105527809312, "learning_rate": 4.6278458844133106e-05, "loss": 0.6103, "step": 3171 }, { "epoch": 0.09261043473183264, "grad_norm": 0.7053626727337374, "learning_rate": 4.629305312317572e-05, "loss": 0.7364, "step": 3172 }, { "epoch": 0.09263963095968, "grad_norm": 1.2884034752192828, "learning_rate": 4.630764740221834e-05, "loss": 0.7079, "step": 3173 }, { "epoch": 0.09266882718752738, "grad_norm": 0.6811685933434177, "learning_rate": 4.632224168126095e-05, "loss": 0.698, "step": 3174 }, { "epoch": 0.09269802341537474, "grad_norm": 0.7647890811510324, "learning_rate": 4.633683596030356e-05, "loss": 0.7488, "step": 3175 }, { "epoch": 0.0927272196432221, "grad_norm": 0.6445442736861007, "learning_rate": 4.6351430239346175e-05, "loss": 0.6586, "step": 3176 }, { "epoch": 0.09275641587106946, "grad_norm": 0.6934162252101216, "learning_rate": 4.6366024518388794e-05, "loss": 0.617, "step": 3177 }, { "epoch": 0.09278561209891682, "grad_norm": 1.4031268481741055, "learning_rate": 4.6380618797431406e-05, "loss": 0.7729, "step": 3178 }, { "epoch": 0.09281480832676418, "grad_norm": 0.7262846468570033, "learning_rate": 4.6395213076474026e-05, "loss": 0.7229, "step": 3179 }, { "epoch": 0.09284400455461154, "grad_norm": 0.6955272857386872, "learning_rate": 4.640980735551664e-05, "loss": 0.7126, "step": 3180 }, { "epoch": 0.0928732007824589, "grad_norm": 0.8412404811492492, "learning_rate": 4.642440163455926e-05, "loss": 0.7845, "step": 3181 }, { "epoch": 0.09290239701030627, "grad_norm": 0.7492693833545614, "learning_rate": 4.643899591360187e-05, "loss": 0.7044, "step": 3182 }, { "epoch": 0.09293159323815363, "grad_norm": 0.6487532236979795, "learning_rate": 4.645359019264449e-05, "loss": 0.638, "step": 3183 }, { "epoch": 0.09296078946600099, "grad_norm": 0.7014509550943593, "learning_rate": 4.64681844716871e-05, "loss": 0.7217, "step": 3184 }, { "epoch": 0.09298998569384835, "grad_norm": 0.6339736353392291, "learning_rate": 4.6482778750729714e-05, "loss": 0.6051, "step": 3185 }, { "epoch": 0.09301918192169571, "grad_norm": 0.7321623873788577, "learning_rate": 4.649737302977233e-05, "loss": 0.7581, "step": 3186 }, { "epoch": 0.09304837814954307, "grad_norm": 0.855688152707956, "learning_rate": 4.6511967308814946e-05, "loss": 0.7718, "step": 3187 }, { "epoch": 0.09307757437739043, "grad_norm": 0.6788082525673379, "learning_rate": 4.6526561587857565e-05, "loss": 0.7157, "step": 3188 }, { "epoch": 0.09310677060523781, "grad_norm": 0.6875556831379186, "learning_rate": 4.654115586690018e-05, "loss": 0.7585, "step": 3189 }, { "epoch": 0.09313596683308517, "grad_norm": 0.6921298580298243, "learning_rate": 4.6555750145942797e-05, "loss": 0.7111, "step": 3190 }, { "epoch": 0.09316516306093253, "grad_norm": 0.7615969703761967, "learning_rate": 4.657034442498541e-05, "loss": 0.7289, "step": 3191 }, { "epoch": 0.0931943592887799, "grad_norm": 0.8263343486574443, "learning_rate": 4.658493870402803e-05, "loss": 0.7839, "step": 3192 }, { "epoch": 0.09322355551662725, "grad_norm": 0.7016559887710234, "learning_rate": 4.6599532983070634e-05, "loss": 0.74, "step": 3193 }, { "epoch": 0.09325275174447462, "grad_norm": 0.7285569734000032, "learning_rate": 4.661412726211325e-05, "loss": 0.7846, "step": 3194 }, { "epoch": 0.09328194797232198, "grad_norm": 1.0927590133395424, "learning_rate": 4.6628721541155866e-05, "loss": 0.705, "step": 3195 }, { "epoch": 0.09331114420016934, "grad_norm": 0.6467996233507685, "learning_rate": 4.6643315820198485e-05, "loss": 0.6751, "step": 3196 }, { "epoch": 0.0933403404280167, "grad_norm": 0.6641625590648143, "learning_rate": 4.66579100992411e-05, "loss": 0.6645, "step": 3197 }, { "epoch": 0.09336953665586406, "grad_norm": 0.6859901469890864, "learning_rate": 4.6672504378283717e-05, "loss": 0.7337, "step": 3198 }, { "epoch": 0.09339873288371142, "grad_norm": 0.68727388547006, "learning_rate": 4.668709865732633e-05, "loss": 0.6574, "step": 3199 }, { "epoch": 0.09342792911155878, "grad_norm": 0.6759517488855297, "learning_rate": 4.670169293636895e-05, "loss": 0.6924, "step": 3200 }, { "epoch": 0.09345712533940614, "grad_norm": 0.6627795866498305, "learning_rate": 4.671628721541156e-05, "loss": 0.6894, "step": 3201 }, { "epoch": 0.0934863215672535, "grad_norm": 0.64010500513039, "learning_rate": 4.673088149445417e-05, "loss": 0.6796, "step": 3202 }, { "epoch": 0.09351551779510087, "grad_norm": 0.6827476870649541, "learning_rate": 4.674547577349679e-05, "loss": 0.71, "step": 3203 }, { "epoch": 0.09354471402294824, "grad_norm": 0.7268274312930274, "learning_rate": 4.6760070052539405e-05, "loss": 0.7395, "step": 3204 }, { "epoch": 0.0935739102507956, "grad_norm": 0.7143783554875075, "learning_rate": 4.6774664331582024e-05, "loss": 0.6659, "step": 3205 }, { "epoch": 0.09360310647864296, "grad_norm": 0.6706263647397348, "learning_rate": 4.678925861062464e-05, "loss": 0.6622, "step": 3206 }, { "epoch": 0.09363230270649033, "grad_norm": 0.7344307857616634, "learning_rate": 4.6803852889667256e-05, "loss": 0.7154, "step": 3207 }, { "epoch": 0.09366149893433769, "grad_norm": 0.6592755956596561, "learning_rate": 4.681844716870987e-05, "loss": 0.665, "step": 3208 }, { "epoch": 0.09369069516218505, "grad_norm": 0.7527119558372999, "learning_rate": 4.683304144775249e-05, "loss": 0.7492, "step": 3209 }, { "epoch": 0.09371989139003241, "grad_norm": 0.7108681248966584, "learning_rate": 4.68476357267951e-05, "loss": 0.6926, "step": 3210 }, { "epoch": 0.09374908761787977, "grad_norm": 0.6623964923853806, "learning_rate": 4.686223000583771e-05, "loss": 0.7037, "step": 3211 }, { "epoch": 0.09377828384572713, "grad_norm": 0.6743335936298163, "learning_rate": 4.6876824284880325e-05, "loss": 0.6851, "step": 3212 }, { "epoch": 0.09380748007357449, "grad_norm": 0.6504189071394001, "learning_rate": 4.6891418563922944e-05, "loss": 0.636, "step": 3213 }, { "epoch": 0.09383667630142185, "grad_norm": 0.6397088261768668, "learning_rate": 4.690601284296556e-05, "loss": 0.611, "step": 3214 }, { "epoch": 0.09386587252926921, "grad_norm": 0.6573258568730831, "learning_rate": 4.6920607122008176e-05, "loss": 0.6959, "step": 3215 }, { "epoch": 0.09389506875711658, "grad_norm": 0.6729331445359469, "learning_rate": 4.693520140105079e-05, "loss": 0.6421, "step": 3216 }, { "epoch": 0.09392426498496394, "grad_norm": 0.6615808394532423, "learning_rate": 4.694979568009341e-05, "loss": 0.7042, "step": 3217 }, { "epoch": 0.0939534612128113, "grad_norm": 0.6976656150803752, "learning_rate": 4.696438995913603e-05, "loss": 0.7824, "step": 3218 }, { "epoch": 0.09398265744065867, "grad_norm": 0.6494602745750717, "learning_rate": 4.697898423817864e-05, "loss": 0.7311, "step": 3219 }, { "epoch": 0.09401185366850603, "grad_norm": 0.7037537051362414, "learning_rate": 4.699357851722125e-05, "loss": 0.7733, "step": 3220 }, { "epoch": 0.0940410498963534, "grad_norm": 0.6997944926872486, "learning_rate": 4.7008172796263864e-05, "loss": 0.6944, "step": 3221 }, { "epoch": 0.09407024612420076, "grad_norm": 0.6610656275949277, "learning_rate": 4.7022767075306484e-05, "loss": 0.6166, "step": 3222 }, { "epoch": 0.09409944235204812, "grad_norm": 0.7613503332296807, "learning_rate": 4.7037361354349096e-05, "loss": 0.7953, "step": 3223 }, { "epoch": 0.09412863857989548, "grad_norm": 0.6225724860906486, "learning_rate": 4.7051955633391715e-05, "loss": 0.6429, "step": 3224 }, { "epoch": 0.09415783480774284, "grad_norm": 0.666325627198219, "learning_rate": 4.706654991243433e-05, "loss": 0.6931, "step": 3225 }, { "epoch": 0.0941870310355902, "grad_norm": 0.6638989411135118, "learning_rate": 4.708114419147695e-05, "loss": 0.6503, "step": 3226 }, { "epoch": 0.09421622726343756, "grad_norm": 0.8049705926318376, "learning_rate": 4.709573847051956e-05, "loss": 0.7762, "step": 3227 }, { "epoch": 0.09424542349128492, "grad_norm": 0.6677066422559569, "learning_rate": 4.711033274956217e-05, "loss": 0.6255, "step": 3228 }, { "epoch": 0.09427461971913229, "grad_norm": 0.6931773832649715, "learning_rate": 4.7124927028604784e-05, "loss": 0.7541, "step": 3229 }, { "epoch": 0.09430381594697965, "grad_norm": 0.7373624937516186, "learning_rate": 4.7139521307647404e-05, "loss": 0.7745, "step": 3230 }, { "epoch": 0.09433301217482701, "grad_norm": 0.7530336494863018, "learning_rate": 4.7154115586690016e-05, "loss": 0.7568, "step": 3231 }, { "epoch": 0.09436220840267437, "grad_norm": 0.7376277928254372, "learning_rate": 4.7168709865732635e-05, "loss": 0.7945, "step": 3232 }, { "epoch": 0.09439140463052173, "grad_norm": 0.7697328225960803, "learning_rate": 4.718330414477525e-05, "loss": 0.7666, "step": 3233 }, { "epoch": 0.0944206008583691, "grad_norm": 0.7106201229662181, "learning_rate": 4.719789842381787e-05, "loss": 0.6719, "step": 3234 }, { "epoch": 0.09444979708621647, "grad_norm": 0.701366745913897, "learning_rate": 4.7212492702860486e-05, "loss": 0.7029, "step": 3235 }, { "epoch": 0.09447899331406383, "grad_norm": 0.698832308521188, "learning_rate": 4.72270869819031e-05, "loss": 0.722, "step": 3236 }, { "epoch": 0.09450818954191119, "grad_norm": 0.6654687673510601, "learning_rate": 4.724168126094571e-05, "loss": 0.7001, "step": 3237 }, { "epoch": 0.09453738576975855, "grad_norm": 0.645411877298748, "learning_rate": 4.7256275539988324e-05, "loss": 0.6988, "step": 3238 }, { "epoch": 0.09456658199760591, "grad_norm": 0.7413496469577938, "learning_rate": 4.727086981903094e-05, "loss": 0.6927, "step": 3239 }, { "epoch": 0.09459577822545327, "grad_norm": 0.721091243504044, "learning_rate": 4.7285464098073555e-05, "loss": 0.727, "step": 3240 }, { "epoch": 0.09462497445330063, "grad_norm": 0.6311895644877349, "learning_rate": 4.7300058377116175e-05, "loss": 0.6496, "step": 3241 }, { "epoch": 0.094654170681148, "grad_norm": 0.6627981145367332, "learning_rate": 4.731465265615879e-05, "loss": 0.6655, "step": 3242 }, { "epoch": 0.09468336690899536, "grad_norm": 0.6532507728134439, "learning_rate": 4.7329246935201406e-05, "loss": 0.6662, "step": 3243 }, { "epoch": 0.09471256313684272, "grad_norm": 0.7488778586276656, "learning_rate": 4.734384121424402e-05, "loss": 0.7588, "step": 3244 }, { "epoch": 0.09474175936469008, "grad_norm": 0.652490871262179, "learning_rate": 4.735843549328664e-05, "loss": 0.673, "step": 3245 }, { "epoch": 0.09477095559253744, "grad_norm": 0.7285390954801385, "learning_rate": 4.737302977232925e-05, "loss": 0.7305, "step": 3246 }, { "epoch": 0.0948001518203848, "grad_norm": 0.6271305857730033, "learning_rate": 4.738762405137186e-05, "loss": 0.6694, "step": 3247 }, { "epoch": 0.09482934804823216, "grad_norm": 0.6668505308236823, "learning_rate": 4.7402218330414475e-05, "loss": 0.6481, "step": 3248 }, { "epoch": 0.09485854427607954, "grad_norm": 0.6148500669558821, "learning_rate": 4.7416812609457095e-05, "loss": 0.6094, "step": 3249 }, { "epoch": 0.0948877405039269, "grad_norm": 0.6310804336668683, "learning_rate": 4.7431406888499714e-05, "loss": 0.6499, "step": 3250 }, { "epoch": 0.09491693673177426, "grad_norm": 0.7475208900741573, "learning_rate": 4.7446001167542326e-05, "loss": 0.783, "step": 3251 }, { "epoch": 0.09494613295962162, "grad_norm": 0.642399914170245, "learning_rate": 4.7460595446584946e-05, "loss": 0.6361, "step": 3252 }, { "epoch": 0.09497532918746898, "grad_norm": 0.6781775311636318, "learning_rate": 4.747518972562756e-05, "loss": 0.7372, "step": 3253 }, { "epoch": 0.09500452541531634, "grad_norm": 0.6401688585624157, "learning_rate": 4.748978400467017e-05, "loss": 0.6617, "step": 3254 }, { "epoch": 0.0950337216431637, "grad_norm": 0.6853844448535739, "learning_rate": 4.750437828371278e-05, "loss": 0.7062, "step": 3255 }, { "epoch": 0.09506291787101107, "grad_norm": 0.7326455862733375, "learning_rate": 4.75189725627554e-05, "loss": 0.8494, "step": 3256 }, { "epoch": 0.09509211409885843, "grad_norm": 0.6602409930166104, "learning_rate": 4.7533566841798015e-05, "loss": 0.6709, "step": 3257 }, { "epoch": 0.09512131032670579, "grad_norm": 0.9321273731846113, "learning_rate": 4.7548161120840634e-05, "loss": 0.7734, "step": 3258 }, { "epoch": 0.09515050655455315, "grad_norm": 0.6680450810167762, "learning_rate": 4.7562755399883246e-05, "loss": 0.7322, "step": 3259 }, { "epoch": 0.09517970278240051, "grad_norm": 0.6304909114759187, "learning_rate": 4.7577349678925866e-05, "loss": 0.628, "step": 3260 }, { "epoch": 0.09520889901024787, "grad_norm": 0.7533809220960661, "learning_rate": 4.759194395796848e-05, "loss": 0.7296, "step": 3261 }, { "epoch": 0.09523809523809523, "grad_norm": 0.6816421727230295, "learning_rate": 4.76065382370111e-05, "loss": 0.7012, "step": 3262 }, { "epoch": 0.0952672914659426, "grad_norm": 0.6921354494180829, "learning_rate": 4.762113251605371e-05, "loss": 0.6877, "step": 3263 }, { "epoch": 0.09529648769378997, "grad_norm": 0.6636085482251648, "learning_rate": 4.763572679509632e-05, "loss": 0.6666, "step": 3264 }, { "epoch": 0.09532568392163733, "grad_norm": 0.6864985371941291, "learning_rate": 4.7650321074138935e-05, "loss": 0.7331, "step": 3265 }, { "epoch": 0.09535488014948469, "grad_norm": 0.7763484682467938, "learning_rate": 4.7664915353181554e-05, "loss": 0.7275, "step": 3266 }, { "epoch": 0.09538407637733205, "grad_norm": 0.7459847622692071, "learning_rate": 4.767950963222417e-05, "loss": 0.6932, "step": 3267 }, { "epoch": 0.09541327260517941, "grad_norm": 0.6992110570548616, "learning_rate": 4.7694103911266786e-05, "loss": 0.6715, "step": 3268 }, { "epoch": 0.09544246883302678, "grad_norm": 0.6789908933927649, "learning_rate": 4.7708698190309405e-05, "loss": 0.6899, "step": 3269 }, { "epoch": 0.09547166506087414, "grad_norm": 0.7016025994315204, "learning_rate": 4.772329246935202e-05, "loss": 0.6166, "step": 3270 }, { "epoch": 0.0955008612887215, "grad_norm": 0.6763775640889577, "learning_rate": 4.773788674839464e-05, "loss": 0.7064, "step": 3271 }, { "epoch": 0.09553005751656886, "grad_norm": 0.6952110437619344, "learning_rate": 4.775248102743725e-05, "loss": 0.7579, "step": 3272 }, { "epoch": 0.09555925374441622, "grad_norm": 0.6458800284508793, "learning_rate": 4.776707530647986e-05, "loss": 0.6894, "step": 3273 }, { "epoch": 0.09558844997226358, "grad_norm": 0.6596915547843543, "learning_rate": 4.7781669585522474e-05, "loss": 0.6454, "step": 3274 }, { "epoch": 0.09561764620011094, "grad_norm": 0.6365050600602276, "learning_rate": 4.779626386456509e-05, "loss": 0.691, "step": 3275 }, { "epoch": 0.0956468424279583, "grad_norm": 0.615838786765412, "learning_rate": 4.7810858143607706e-05, "loss": 0.636, "step": 3276 }, { "epoch": 0.09567603865580566, "grad_norm": 0.7486522112692112, "learning_rate": 4.7825452422650325e-05, "loss": 0.8158, "step": 3277 }, { "epoch": 0.09570523488365303, "grad_norm": 0.6972192956152039, "learning_rate": 4.784004670169294e-05, "loss": 0.7282, "step": 3278 }, { "epoch": 0.0957344311115004, "grad_norm": 0.6900547444377646, "learning_rate": 4.785464098073556e-05, "loss": 0.6621, "step": 3279 }, { "epoch": 0.09576362733934776, "grad_norm": 0.6344505003786123, "learning_rate": 4.786923525977817e-05, "loss": 0.6048, "step": 3280 }, { "epoch": 0.09579282356719512, "grad_norm": 0.6287053492525984, "learning_rate": 4.788382953882078e-05, "loss": 0.6318, "step": 3281 }, { "epoch": 0.09582201979504248, "grad_norm": 0.6745118860898839, "learning_rate": 4.78984238178634e-05, "loss": 0.6748, "step": 3282 }, { "epoch": 0.09585121602288985, "grad_norm": 0.6755127961860083, "learning_rate": 4.791301809690601e-05, "loss": 0.7228, "step": 3283 }, { "epoch": 0.09588041225073721, "grad_norm": 0.6557616215285174, "learning_rate": 4.792761237594863e-05, "loss": 0.7143, "step": 3284 }, { "epoch": 0.09590960847858457, "grad_norm": 0.8888036039579167, "learning_rate": 4.7942206654991245e-05, "loss": 0.6649, "step": 3285 }, { "epoch": 0.09593880470643193, "grad_norm": 0.6602674167769882, "learning_rate": 4.7956800934033864e-05, "loss": 0.7049, "step": 3286 }, { "epoch": 0.09596800093427929, "grad_norm": 0.6717285976009859, "learning_rate": 4.797139521307648e-05, "loss": 0.7258, "step": 3287 }, { "epoch": 0.09599719716212665, "grad_norm": 0.7019070295624238, "learning_rate": 4.7985989492119096e-05, "loss": 0.7187, "step": 3288 }, { "epoch": 0.09602639338997401, "grad_norm": 0.6420525097852013, "learning_rate": 4.800058377116171e-05, "loss": 0.6218, "step": 3289 }, { "epoch": 0.09605558961782137, "grad_norm": 0.7638154435355365, "learning_rate": 4.801517805020432e-05, "loss": 0.7811, "step": 3290 }, { "epoch": 0.09608478584566874, "grad_norm": 0.7404735382581479, "learning_rate": 4.802977232924693e-05, "loss": 0.7691, "step": 3291 }, { "epoch": 0.0961139820735161, "grad_norm": 0.5535266559971339, "learning_rate": 4.804436660828955e-05, "loss": 0.507, "step": 3292 }, { "epoch": 0.09614317830136346, "grad_norm": 0.6456892160042847, "learning_rate": 4.8058960887332165e-05, "loss": 0.7067, "step": 3293 }, { "epoch": 0.09617237452921083, "grad_norm": 0.6139388147235021, "learning_rate": 4.8073555166374784e-05, "loss": 0.6069, "step": 3294 }, { "epoch": 0.0962015707570582, "grad_norm": 0.5993433758311348, "learning_rate": 4.80881494454174e-05, "loss": 0.575, "step": 3295 }, { "epoch": 0.09623076698490556, "grad_norm": 0.6289636350315648, "learning_rate": 4.8102743724460016e-05, "loss": 0.6377, "step": 3296 }, { "epoch": 0.09625996321275292, "grad_norm": 0.6716135091853797, "learning_rate": 4.811733800350263e-05, "loss": 0.7304, "step": 3297 }, { "epoch": 0.09628915944060028, "grad_norm": 0.648452267287365, "learning_rate": 4.813193228254525e-05, "loss": 0.668, "step": 3298 }, { "epoch": 0.09631835566844764, "grad_norm": 0.6940837315736027, "learning_rate": 4.814652656158786e-05, "loss": 0.7066, "step": 3299 }, { "epoch": 0.096347551896295, "grad_norm": 0.6340159334881202, "learning_rate": 4.816112084063047e-05, "loss": 0.6367, "step": 3300 }, { "epoch": 0.09637674812414236, "grad_norm": 0.7127616930240238, "learning_rate": 4.817571511967309e-05, "loss": 0.631, "step": 3301 }, { "epoch": 0.09640594435198972, "grad_norm": 0.7604295623844615, "learning_rate": 4.8190309398715704e-05, "loss": 0.8103, "step": 3302 }, { "epoch": 0.09643514057983708, "grad_norm": 0.6869821301701722, "learning_rate": 4.8204903677758324e-05, "loss": 0.7346, "step": 3303 }, { "epoch": 0.09646433680768444, "grad_norm": 0.6957959103322889, "learning_rate": 4.8219497956800936e-05, "loss": 0.7562, "step": 3304 }, { "epoch": 0.0964935330355318, "grad_norm": 0.6458089651165071, "learning_rate": 4.8234092235843555e-05, "loss": 0.6919, "step": 3305 }, { "epoch": 0.09652272926337917, "grad_norm": 0.8753872093121005, "learning_rate": 4.824868651488617e-05, "loss": 0.8194, "step": 3306 }, { "epoch": 0.09655192549122653, "grad_norm": 0.9372527798074197, "learning_rate": 4.826328079392878e-05, "loss": 0.7452, "step": 3307 }, { "epoch": 0.09658112171907389, "grad_norm": 0.638526624380089, "learning_rate": 4.827787507297139e-05, "loss": 0.6283, "step": 3308 }, { "epoch": 0.09661031794692125, "grad_norm": 0.6684311715299613, "learning_rate": 4.829246935201401e-05, "loss": 0.7183, "step": 3309 }, { "epoch": 0.09663951417476863, "grad_norm": 0.6291630975653221, "learning_rate": 4.8307063631056624e-05, "loss": 0.6594, "step": 3310 }, { "epoch": 0.09666871040261599, "grad_norm": 0.6616614203592024, "learning_rate": 4.8321657910099244e-05, "loss": 0.7373, "step": 3311 }, { "epoch": 0.09669790663046335, "grad_norm": 0.6700546075680139, "learning_rate": 4.8336252189141856e-05, "loss": 0.6756, "step": 3312 }, { "epoch": 0.09672710285831071, "grad_norm": 0.6408702715768706, "learning_rate": 4.8350846468184475e-05, "loss": 0.6013, "step": 3313 }, { "epoch": 0.09675629908615807, "grad_norm": 0.7759295093369182, "learning_rate": 4.8365440747227095e-05, "loss": 0.783, "step": 3314 }, { "epoch": 0.09678549531400543, "grad_norm": 0.6599274470028448, "learning_rate": 4.838003502626971e-05, "loss": 0.7061, "step": 3315 }, { "epoch": 0.0968146915418528, "grad_norm": 0.9379319832448891, "learning_rate": 4.839462930531232e-05, "loss": 0.71, "step": 3316 }, { "epoch": 0.09684388776970015, "grad_norm": 0.916405078394973, "learning_rate": 4.840922358435493e-05, "loss": 0.7739, "step": 3317 }, { "epoch": 0.09687308399754752, "grad_norm": 0.6896578848143259, "learning_rate": 4.842381786339755e-05, "loss": 0.681, "step": 3318 }, { "epoch": 0.09690228022539488, "grad_norm": 0.7200706204095946, "learning_rate": 4.8438412142440164e-05, "loss": 0.7319, "step": 3319 }, { "epoch": 0.09693147645324224, "grad_norm": 0.7153767410927814, "learning_rate": 4.845300642148278e-05, "loss": 0.7357, "step": 3320 }, { "epoch": 0.0969606726810896, "grad_norm": 0.6962032588950128, "learning_rate": 4.8467600700525395e-05, "loss": 0.7129, "step": 3321 }, { "epoch": 0.09698986890893696, "grad_norm": 0.683251525639649, "learning_rate": 4.8482194979568015e-05, "loss": 0.6996, "step": 3322 }, { "epoch": 0.09701906513678432, "grad_norm": 0.6846347763699725, "learning_rate": 4.849678925861063e-05, "loss": 0.7133, "step": 3323 }, { "epoch": 0.09704826136463168, "grad_norm": 0.6839990434610116, "learning_rate": 4.8511383537653246e-05, "loss": 0.7374, "step": 3324 }, { "epoch": 0.09707745759247906, "grad_norm": 0.6683381328515247, "learning_rate": 4.852597781669586e-05, "loss": 0.7235, "step": 3325 }, { "epoch": 0.09710665382032642, "grad_norm": 0.6839290510099874, "learning_rate": 4.854057209573847e-05, "loss": 0.742, "step": 3326 }, { "epoch": 0.09713585004817378, "grad_norm": 0.8777629287462018, "learning_rate": 4.8555166374781084e-05, "loss": 0.7306, "step": 3327 }, { "epoch": 0.09716504627602114, "grad_norm": 0.7326163161225332, "learning_rate": 4.85697606538237e-05, "loss": 0.7228, "step": 3328 }, { "epoch": 0.0971942425038685, "grad_norm": 0.8396690132868246, "learning_rate": 4.8584354932866315e-05, "loss": 0.7471, "step": 3329 }, { "epoch": 0.09722343873171586, "grad_norm": 0.7272162220852719, "learning_rate": 4.8598949211908935e-05, "loss": 0.6669, "step": 3330 }, { "epoch": 0.09725263495956323, "grad_norm": 0.7058247295160539, "learning_rate": 4.8613543490951554e-05, "loss": 0.6551, "step": 3331 }, { "epoch": 0.09728183118741059, "grad_norm": 0.5795902360338147, "learning_rate": 4.8628137769994166e-05, "loss": 0.5756, "step": 3332 }, { "epoch": 0.09731102741525795, "grad_norm": 0.6403613341657013, "learning_rate": 4.864273204903678e-05, "loss": 0.6798, "step": 3333 }, { "epoch": 0.09734022364310531, "grad_norm": 0.6395058091032837, "learning_rate": 4.865732632807939e-05, "loss": 0.6816, "step": 3334 }, { "epoch": 0.09736941987095267, "grad_norm": 0.7126647098075531, "learning_rate": 4.867192060712201e-05, "loss": 0.7322, "step": 3335 }, { "epoch": 0.09739861609880003, "grad_norm": 0.6385560433406509, "learning_rate": 4.868651488616462e-05, "loss": 0.5272, "step": 3336 }, { "epoch": 0.09742781232664739, "grad_norm": 0.8476589858994982, "learning_rate": 4.870110916520724e-05, "loss": 0.6658, "step": 3337 }, { "epoch": 0.09745700855449475, "grad_norm": 0.6649724462585347, "learning_rate": 4.8715703444249855e-05, "loss": 0.6491, "step": 3338 }, { "epoch": 0.09748620478234211, "grad_norm": 0.6076996061878516, "learning_rate": 4.8730297723292474e-05, "loss": 0.6412, "step": 3339 }, { "epoch": 0.09751540101018949, "grad_norm": 0.6758852259741361, "learning_rate": 4.8744892002335086e-05, "loss": 0.7072, "step": 3340 }, { "epoch": 0.09754459723803685, "grad_norm": 0.6914120070344363, "learning_rate": 4.8759486281377706e-05, "loss": 0.7356, "step": 3341 }, { "epoch": 0.09757379346588421, "grad_norm": 0.7190536362320502, "learning_rate": 4.877408056042032e-05, "loss": 0.7346, "step": 3342 }, { "epoch": 0.09760298969373157, "grad_norm": 0.7008288889251022, "learning_rate": 4.878867483946293e-05, "loss": 0.7923, "step": 3343 }, { "epoch": 0.09763218592157893, "grad_norm": 0.6652176213865207, "learning_rate": 4.880326911850554e-05, "loss": 0.7123, "step": 3344 }, { "epoch": 0.0976613821494263, "grad_norm": 0.6351191156807323, "learning_rate": 4.881786339754816e-05, "loss": 0.6434, "step": 3345 }, { "epoch": 0.09769057837727366, "grad_norm": 0.6618713062594346, "learning_rate": 4.883245767659078e-05, "loss": 0.7309, "step": 3346 }, { "epoch": 0.09771977460512102, "grad_norm": 0.6757102206931211, "learning_rate": 4.8847051955633394e-05, "loss": 0.625, "step": 3347 }, { "epoch": 0.09774897083296838, "grad_norm": 0.6314376170045579, "learning_rate": 4.886164623467601e-05, "loss": 0.6507, "step": 3348 }, { "epoch": 0.09777816706081574, "grad_norm": 0.7255542233546609, "learning_rate": 4.8876240513718626e-05, "loss": 0.7058, "step": 3349 }, { "epoch": 0.0978073632886631, "grad_norm": 0.6687784439348721, "learning_rate": 4.8890834792761245e-05, "loss": 0.7202, "step": 3350 }, { "epoch": 0.09783655951651046, "grad_norm": 0.7345448455081353, "learning_rate": 4.890542907180386e-05, "loss": 0.7485, "step": 3351 }, { "epoch": 0.09786575574435782, "grad_norm": 0.6433261775623481, "learning_rate": 4.892002335084647e-05, "loss": 0.7038, "step": 3352 }, { "epoch": 0.09789495197220519, "grad_norm": 0.7090980248411634, "learning_rate": 4.893461762988908e-05, "loss": 0.6775, "step": 3353 }, { "epoch": 0.09792414820005255, "grad_norm": 0.6982918960967467, "learning_rate": 4.89492119089317e-05, "loss": 0.7294, "step": 3354 }, { "epoch": 0.09795334442789992, "grad_norm": 0.859234122656619, "learning_rate": 4.8963806187974314e-05, "loss": 0.7905, "step": 3355 }, { "epoch": 0.09798254065574728, "grad_norm": 0.654017458237716, "learning_rate": 4.897840046701693e-05, "loss": 0.6964, "step": 3356 }, { "epoch": 0.09801173688359464, "grad_norm": 0.6637594542638589, "learning_rate": 4.8992994746059546e-05, "loss": 0.7187, "step": 3357 }, { "epoch": 0.098040933111442, "grad_norm": 0.7176672944485425, "learning_rate": 4.9007589025102165e-05, "loss": 0.7855, "step": 3358 }, { "epoch": 0.09807012933928937, "grad_norm": 0.7777064133183007, "learning_rate": 4.902218330414478e-05, "loss": 0.7343, "step": 3359 }, { "epoch": 0.09809932556713673, "grad_norm": 0.6449498236393969, "learning_rate": 4.903677758318739e-05, "loss": 0.6653, "step": 3360 }, { "epoch": 0.09812852179498409, "grad_norm": 0.7053717701184391, "learning_rate": 4.905137186223e-05, "loss": 0.7594, "step": 3361 }, { "epoch": 0.09815771802283145, "grad_norm": 0.6262171822706105, "learning_rate": 4.906596614127262e-05, "loss": 0.6368, "step": 3362 }, { "epoch": 0.09818691425067881, "grad_norm": 0.6473919533679283, "learning_rate": 4.908056042031524e-05, "loss": 0.6815, "step": 3363 }, { "epoch": 0.09821611047852617, "grad_norm": 0.7378635878817167, "learning_rate": 4.909515469935785e-05, "loss": 0.7457, "step": 3364 }, { "epoch": 0.09824530670637353, "grad_norm": 0.7029223662893, "learning_rate": 4.910974897840047e-05, "loss": 0.7408, "step": 3365 }, { "epoch": 0.0982745029342209, "grad_norm": 0.6519308252521636, "learning_rate": 4.9124343257443085e-05, "loss": 0.6768, "step": 3366 }, { "epoch": 0.09830369916206826, "grad_norm": 0.7110225005491497, "learning_rate": 4.9138937536485704e-05, "loss": 0.7826, "step": 3367 }, { "epoch": 0.09833289538991562, "grad_norm": 0.6417413137960954, "learning_rate": 4.915353181552832e-05, "loss": 0.6998, "step": 3368 }, { "epoch": 0.09836209161776298, "grad_norm": 0.6447177415062684, "learning_rate": 4.916812609457093e-05, "loss": 0.7005, "step": 3369 }, { "epoch": 0.09839128784561035, "grad_norm": 2.609856518860866, "learning_rate": 4.918272037361354e-05, "loss": 0.7584, "step": 3370 }, { "epoch": 0.09842048407345771, "grad_norm": 0.6187607633363982, "learning_rate": 4.919731465265616e-05, "loss": 0.6369, "step": 3371 }, { "epoch": 0.09844968030130508, "grad_norm": 0.7378004373416929, "learning_rate": 4.9211908931698773e-05, "loss": 0.7584, "step": 3372 }, { "epoch": 0.09847887652915244, "grad_norm": 0.69698254275406, "learning_rate": 4.922650321074139e-05, "loss": 0.6886, "step": 3373 }, { "epoch": 0.0985080727569998, "grad_norm": 0.7081848243504475, "learning_rate": 4.9241097489784005e-05, "loss": 0.8001, "step": 3374 }, { "epoch": 0.09853726898484716, "grad_norm": 0.635754797583786, "learning_rate": 4.9255691768826624e-05, "loss": 0.6464, "step": 3375 }, { "epoch": 0.09856646521269452, "grad_norm": 0.6439882592515337, "learning_rate": 4.927028604786924e-05, "loss": 0.6788, "step": 3376 }, { "epoch": 0.09859566144054188, "grad_norm": 0.6912235185639442, "learning_rate": 4.9284880326911856e-05, "loss": 0.7099, "step": 3377 }, { "epoch": 0.09862485766838924, "grad_norm": 0.6941587216587563, "learning_rate": 4.929947460595447e-05, "loss": 0.76, "step": 3378 }, { "epoch": 0.0986540538962366, "grad_norm": 0.6115482012074221, "learning_rate": 4.931406888499708e-05, "loss": 0.6663, "step": 3379 }, { "epoch": 0.09868325012408397, "grad_norm": 0.6366446112792121, "learning_rate": 4.93286631640397e-05, "loss": 0.6476, "step": 3380 }, { "epoch": 0.09871244635193133, "grad_norm": 0.6579549048619794, "learning_rate": 4.934325744308231e-05, "loss": 0.6739, "step": 3381 }, { "epoch": 0.09874164257977869, "grad_norm": 0.6626346637165812, "learning_rate": 4.935785172212493e-05, "loss": 0.7275, "step": 3382 }, { "epoch": 0.09877083880762605, "grad_norm": 0.8216802211687656, "learning_rate": 4.9372446001167544e-05, "loss": 0.7352, "step": 3383 }, { "epoch": 0.09880003503547341, "grad_norm": 0.6742590147604881, "learning_rate": 4.9387040280210164e-05, "loss": 0.6905, "step": 3384 }, { "epoch": 0.09882923126332079, "grad_norm": 0.7509245562900441, "learning_rate": 4.9401634559252776e-05, "loss": 0.7322, "step": 3385 }, { "epoch": 0.09885842749116815, "grad_norm": 0.6164581098800304, "learning_rate": 4.941622883829539e-05, "loss": 0.6845, "step": 3386 }, { "epoch": 0.09888762371901551, "grad_norm": 0.6757947136118012, "learning_rate": 4.9430823117338e-05, "loss": 0.6509, "step": 3387 }, { "epoch": 0.09891681994686287, "grad_norm": 0.6845498870519441, "learning_rate": 4.944541739638062e-05, "loss": 0.7045, "step": 3388 }, { "epoch": 0.09894601617471023, "grad_norm": 0.8059684566461248, "learning_rate": 4.946001167542323e-05, "loss": 0.7421, "step": 3389 }, { "epoch": 0.09897521240255759, "grad_norm": 0.6496518841316742, "learning_rate": 4.947460595446585e-05, "loss": 0.6508, "step": 3390 }, { "epoch": 0.09900440863040495, "grad_norm": 0.6645480234158712, "learning_rate": 4.9489200233508464e-05, "loss": 0.8001, "step": 3391 }, { "epoch": 0.09903360485825231, "grad_norm": 0.9596372137998067, "learning_rate": 4.9503794512551084e-05, "loss": 0.7189, "step": 3392 }, { "epoch": 0.09906280108609967, "grad_norm": 0.6661072062387561, "learning_rate": 4.9518388791593696e-05, "loss": 0.7533, "step": 3393 }, { "epoch": 0.09909199731394704, "grad_norm": 0.6864506947140907, "learning_rate": 4.9532983070636315e-05, "loss": 0.7339, "step": 3394 }, { "epoch": 0.0991211935417944, "grad_norm": 0.632221346464135, "learning_rate": 4.954757734967893e-05, "loss": 0.6354, "step": 3395 }, { "epoch": 0.09915038976964176, "grad_norm": 0.6775646364864865, "learning_rate": 4.956217162872154e-05, "loss": 0.7587, "step": 3396 }, { "epoch": 0.09917958599748912, "grad_norm": 0.6823256378502501, "learning_rate": 4.957676590776416e-05, "loss": 0.7544, "step": 3397 }, { "epoch": 0.09920878222533648, "grad_norm": 0.6883695130214713, "learning_rate": 4.959136018680677e-05, "loss": 0.723, "step": 3398 }, { "epoch": 0.09923797845318384, "grad_norm": 0.6326128708464298, "learning_rate": 4.960595446584939e-05, "loss": 0.5581, "step": 3399 }, { "epoch": 0.09926717468103122, "grad_norm": 0.6371669920588383, "learning_rate": 4.9620548744892004e-05, "loss": 0.6832, "step": 3400 }, { "epoch": 0.09929637090887858, "grad_norm": 0.7353685629568557, "learning_rate": 4.963514302393462e-05, "loss": 0.7002, "step": 3401 }, { "epoch": 0.09932556713672594, "grad_norm": 0.6455563161377303, "learning_rate": 4.9649737302977235e-05, "loss": 0.6903, "step": 3402 }, { "epoch": 0.0993547633645733, "grad_norm": 0.6658161373322247, "learning_rate": 4.9664331582019855e-05, "loss": 0.6997, "step": 3403 }, { "epoch": 0.09938395959242066, "grad_norm": 0.7405410426825858, "learning_rate": 4.967892586106247e-05, "loss": 0.7515, "step": 3404 }, { "epoch": 0.09941315582026802, "grad_norm": 0.6378091039308863, "learning_rate": 4.969352014010508e-05, "loss": 0.6157, "step": 3405 }, { "epoch": 0.09944235204811538, "grad_norm": 0.6661780323573305, "learning_rate": 4.970811441914769e-05, "loss": 0.6729, "step": 3406 }, { "epoch": 0.09947154827596275, "grad_norm": 0.7072232238548046, "learning_rate": 4.972270869819031e-05, "loss": 0.7354, "step": 3407 }, { "epoch": 0.0995007445038101, "grad_norm": 0.7243424905935658, "learning_rate": 4.9737302977232924e-05, "loss": 0.7254, "step": 3408 }, { "epoch": 0.09952994073165747, "grad_norm": 0.6859767631944957, "learning_rate": 4.975189725627554e-05, "loss": 0.6863, "step": 3409 }, { "epoch": 0.09955913695950483, "grad_norm": 0.6872461402331215, "learning_rate": 4.976649153531816e-05, "loss": 0.7393, "step": 3410 }, { "epoch": 0.09958833318735219, "grad_norm": 0.6451255781006368, "learning_rate": 4.9781085814360775e-05, "loss": 0.6617, "step": 3411 }, { "epoch": 0.09961752941519955, "grad_norm": 0.6674999109050641, "learning_rate": 4.9795680093403394e-05, "loss": 0.7049, "step": 3412 }, { "epoch": 0.09964672564304691, "grad_norm": 0.6349476845472897, "learning_rate": 4.9810274372446e-05, "loss": 0.6765, "step": 3413 }, { "epoch": 0.09967592187089427, "grad_norm": 0.9460440325192068, "learning_rate": 4.982486865148862e-05, "loss": 0.7587, "step": 3414 }, { "epoch": 0.09970511809874165, "grad_norm": 0.62440453558956, "learning_rate": 4.983946293053123e-05, "loss": 0.6272, "step": 3415 }, { "epoch": 0.09973431432658901, "grad_norm": 0.6481331109919002, "learning_rate": 4.985405720957385e-05, "loss": 0.6886, "step": 3416 }, { "epoch": 0.09976351055443637, "grad_norm": 0.6832543616347371, "learning_rate": 4.986865148861646e-05, "loss": 0.7208, "step": 3417 }, { "epoch": 0.09979270678228373, "grad_norm": 0.6647625381776906, "learning_rate": 4.988324576765908e-05, "loss": 0.6567, "step": 3418 }, { "epoch": 0.0998219030101311, "grad_norm": 0.672381587719753, "learning_rate": 4.9897840046701695e-05, "loss": 0.7276, "step": 3419 }, { "epoch": 0.09985109923797846, "grad_norm": 0.6788494949179725, "learning_rate": 4.9912434325744314e-05, "loss": 0.7389, "step": 3420 }, { "epoch": 0.09988029546582582, "grad_norm": 0.6476361645092699, "learning_rate": 4.9927028604786927e-05, "loss": 0.6799, "step": 3421 }, { "epoch": 0.09990949169367318, "grad_norm": 0.7629293628080182, "learning_rate": 4.994162288382954e-05, "loss": 0.6841, "step": 3422 }, { "epoch": 0.09993868792152054, "grad_norm": 0.7744986183006322, "learning_rate": 4.995621716287215e-05, "loss": 0.7776, "step": 3423 }, { "epoch": 0.0999678841493679, "grad_norm": 0.6854107865113376, "learning_rate": 4.997081144191477e-05, "loss": 0.7305, "step": 3424 }, { "epoch": 0.09999708037721526, "grad_norm": 0.6905684465726715, "learning_rate": 4.998540572095738e-05, "loss": 0.6204, "step": 3425 }, { "epoch": 0.10002627660506262, "grad_norm": 0.6612033200448582, "learning_rate": 5e-05, "loss": 0.7059, "step": 3426 }, { "epoch": 0.10005547283290998, "grad_norm": 0.6084777888564826, "learning_rate": 4.9998377939983784e-05, "loss": 0.6006, "step": 3427 }, { "epoch": 0.10008466906075734, "grad_norm": 0.6445959353677136, "learning_rate": 4.999675587996756e-05, "loss": 0.6442, "step": 3428 }, { "epoch": 0.1001138652886047, "grad_norm": 0.957353202368898, "learning_rate": 4.999513381995134e-05, "loss": 0.6942, "step": 3429 }, { "epoch": 0.10014306151645208, "grad_norm": 0.8088346108283246, "learning_rate": 4.9993511759935116e-05, "loss": 0.7634, "step": 3430 }, { "epoch": 0.10017225774429944, "grad_norm": 0.6123242663515497, "learning_rate": 4.99918896999189e-05, "loss": 0.5618, "step": 3431 }, { "epoch": 0.1002014539721468, "grad_norm": 0.6796921030691364, "learning_rate": 4.999026763990268e-05, "loss": 0.7, "step": 3432 }, { "epoch": 0.10023065019999416, "grad_norm": 0.7896427333983312, "learning_rate": 4.9988645579886454e-05, "loss": 0.7915, "step": 3433 }, { "epoch": 0.10025984642784153, "grad_norm": 0.7173278708750147, "learning_rate": 4.9987023519870236e-05, "loss": 0.749, "step": 3434 }, { "epoch": 0.10028904265568889, "grad_norm": 0.782520348180667, "learning_rate": 4.998540145985401e-05, "loss": 0.6536, "step": 3435 }, { "epoch": 0.10031823888353625, "grad_norm": 0.6267431233945006, "learning_rate": 4.99837793998378e-05, "loss": 0.7369, "step": 3436 }, { "epoch": 0.10034743511138361, "grad_norm": 0.6983877879245192, "learning_rate": 4.998215733982158e-05, "loss": 0.7046, "step": 3437 }, { "epoch": 0.10037663133923097, "grad_norm": 0.6541510384177815, "learning_rate": 4.9980535279805356e-05, "loss": 0.7164, "step": 3438 }, { "epoch": 0.10040582756707833, "grad_norm": 0.6435062786851109, "learning_rate": 4.997891321978914e-05, "loss": 0.7016, "step": 3439 }, { "epoch": 0.10043502379492569, "grad_norm": 0.6491353805407708, "learning_rate": 4.997729115977291e-05, "loss": 0.6573, "step": 3440 }, { "epoch": 0.10046422002277305, "grad_norm": 0.6343269078226523, "learning_rate": 4.9975669099756695e-05, "loss": 0.629, "step": 3441 }, { "epoch": 0.10049341625062042, "grad_norm": 0.6191940225951267, "learning_rate": 4.997404703974048e-05, "loss": 0.6782, "step": 3442 }, { "epoch": 0.10052261247846778, "grad_norm": 0.6976136111052277, "learning_rate": 4.997242497972425e-05, "loss": 0.752, "step": 3443 }, { "epoch": 0.10055180870631514, "grad_norm": 0.7403565243252151, "learning_rate": 4.9970802919708033e-05, "loss": 0.7995, "step": 3444 }, { "epoch": 0.10058100493416251, "grad_norm": 0.6515355905768319, "learning_rate": 4.996918085969181e-05, "loss": 0.6504, "step": 3445 }, { "epoch": 0.10061020116200987, "grad_norm": 0.7362953932859154, "learning_rate": 4.996755879967559e-05, "loss": 0.725, "step": 3446 }, { "epoch": 0.10063939738985724, "grad_norm": 0.6581614726314423, "learning_rate": 4.996593673965937e-05, "loss": 0.6681, "step": 3447 }, { "epoch": 0.1006685936177046, "grad_norm": 0.8623648166347275, "learning_rate": 4.996431467964315e-05, "loss": 0.7934, "step": 3448 }, { "epoch": 0.10069778984555196, "grad_norm": 0.6719674746198855, "learning_rate": 4.996269261962693e-05, "loss": 0.7029, "step": 3449 }, { "epoch": 0.10072698607339932, "grad_norm": 0.6232029211053677, "learning_rate": 4.9961070559610704e-05, "loss": 0.6106, "step": 3450 }, { "epoch": 0.10075618230124668, "grad_norm": 0.6309579205892388, "learning_rate": 4.9959448499594485e-05, "loss": 0.6452, "step": 3451 }, { "epoch": 0.10078537852909404, "grad_norm": 0.6779857393664376, "learning_rate": 4.995782643957827e-05, "loss": 0.7073, "step": 3452 }, { "epoch": 0.1008145747569414, "grad_norm": 0.7159467983232582, "learning_rate": 4.995620437956204e-05, "loss": 0.7587, "step": 3453 }, { "epoch": 0.10084377098478876, "grad_norm": 0.6988037867136205, "learning_rate": 4.9954582319545824e-05, "loss": 0.6402, "step": 3454 }, { "epoch": 0.10087296721263612, "grad_norm": 0.6986391237172739, "learning_rate": 4.9952960259529606e-05, "loss": 0.7136, "step": 3455 }, { "epoch": 0.10090216344048349, "grad_norm": 0.6989400498434672, "learning_rate": 4.995133819951339e-05, "loss": 0.7257, "step": 3456 }, { "epoch": 0.10093135966833085, "grad_norm": 0.8026940373524241, "learning_rate": 4.994971613949716e-05, "loss": 0.7702, "step": 3457 }, { "epoch": 0.10096055589617821, "grad_norm": 0.7292368996939756, "learning_rate": 4.9948094079480944e-05, "loss": 0.718, "step": 3458 }, { "epoch": 0.10098975212402557, "grad_norm": 0.6779590966967762, "learning_rate": 4.9946472019464726e-05, "loss": 0.6485, "step": 3459 }, { "epoch": 0.10101894835187294, "grad_norm": 0.6632465516802308, "learning_rate": 4.99448499594485e-05, "loss": 0.6757, "step": 3460 }, { "epoch": 0.1010481445797203, "grad_norm": 0.6608895424430082, "learning_rate": 4.994322789943228e-05, "loss": 0.7059, "step": 3461 }, { "epoch": 0.10107734080756767, "grad_norm": 0.8082899365451329, "learning_rate": 4.9941605839416065e-05, "loss": 0.7777, "step": 3462 }, { "epoch": 0.10110653703541503, "grad_norm": 0.6756093596061717, "learning_rate": 4.993998377939984e-05, "loss": 0.7084, "step": 3463 }, { "epoch": 0.10113573326326239, "grad_norm": 0.6410520358102194, "learning_rate": 4.993836171938362e-05, "loss": 0.6675, "step": 3464 }, { "epoch": 0.10116492949110975, "grad_norm": 0.6923624136866608, "learning_rate": 4.9936739659367396e-05, "loss": 0.7664, "step": 3465 }, { "epoch": 0.10119412571895711, "grad_norm": 0.6738805048438841, "learning_rate": 4.993511759935118e-05, "loss": 0.6589, "step": 3466 }, { "epoch": 0.10122332194680447, "grad_norm": 0.6689054006234241, "learning_rate": 4.993349553933496e-05, "loss": 0.6912, "step": 3467 }, { "epoch": 0.10125251817465183, "grad_norm": 0.7721553954782066, "learning_rate": 4.9931873479318735e-05, "loss": 0.7567, "step": 3468 }, { "epoch": 0.1012817144024992, "grad_norm": 0.704103339046176, "learning_rate": 4.9930251419302517e-05, "loss": 0.7387, "step": 3469 }, { "epoch": 0.10131091063034656, "grad_norm": 0.6719408459677984, "learning_rate": 4.992862935928629e-05, "loss": 0.7122, "step": 3470 }, { "epoch": 0.10134010685819392, "grad_norm": 0.6628279610109136, "learning_rate": 4.992700729927007e-05, "loss": 0.6786, "step": 3471 }, { "epoch": 0.10136930308604128, "grad_norm": 0.6006524927844384, "learning_rate": 4.9925385239253855e-05, "loss": 0.5804, "step": 3472 }, { "epoch": 0.10139849931388864, "grad_norm": 0.6950169836825001, "learning_rate": 4.992376317923763e-05, "loss": 0.8184, "step": 3473 }, { "epoch": 0.101427695541736, "grad_norm": 0.7862803773638772, "learning_rate": 4.992214111922142e-05, "loss": 0.7194, "step": 3474 }, { "epoch": 0.10145689176958338, "grad_norm": 0.6711994953748457, "learning_rate": 4.9920519059205194e-05, "loss": 0.7144, "step": 3475 }, { "epoch": 0.10148608799743074, "grad_norm": 0.6917185341675616, "learning_rate": 4.9918896999188975e-05, "loss": 0.6676, "step": 3476 }, { "epoch": 0.1015152842252781, "grad_norm": 0.6586031009901013, "learning_rate": 4.991727493917275e-05, "loss": 0.7011, "step": 3477 }, { "epoch": 0.10154448045312546, "grad_norm": 0.7009114431667955, "learning_rate": 4.991565287915653e-05, "loss": 0.7744, "step": 3478 }, { "epoch": 0.10157367668097282, "grad_norm": 0.7145558346363953, "learning_rate": 4.9914030819140314e-05, "loss": 0.7688, "step": 3479 }, { "epoch": 0.10160287290882018, "grad_norm": 0.6956152231265545, "learning_rate": 4.991240875912409e-05, "loss": 0.7049, "step": 3480 }, { "epoch": 0.10163206913666754, "grad_norm": 0.6646733938655992, "learning_rate": 4.991078669910787e-05, "loss": 0.6386, "step": 3481 }, { "epoch": 0.1016612653645149, "grad_norm": 0.6527815833324897, "learning_rate": 4.990916463909165e-05, "loss": 0.7047, "step": 3482 }, { "epoch": 0.10169046159236227, "grad_norm": 0.6992704601895657, "learning_rate": 4.990754257907543e-05, "loss": 0.7058, "step": 3483 }, { "epoch": 0.10171965782020963, "grad_norm": 0.6793895898033415, "learning_rate": 4.990592051905921e-05, "loss": 0.6985, "step": 3484 }, { "epoch": 0.10174885404805699, "grad_norm": 0.6635169376001202, "learning_rate": 4.9904298459042984e-05, "loss": 0.7287, "step": 3485 }, { "epoch": 0.10177805027590435, "grad_norm": 0.7040319465412004, "learning_rate": 4.9902676399026766e-05, "loss": 0.7659, "step": 3486 }, { "epoch": 0.10180724650375171, "grad_norm": 0.6823302215988585, "learning_rate": 4.990105433901055e-05, "loss": 0.6792, "step": 3487 }, { "epoch": 0.10183644273159907, "grad_norm": 0.6655249958442871, "learning_rate": 4.989943227899432e-05, "loss": 0.6534, "step": 3488 }, { "epoch": 0.10186563895944643, "grad_norm": 0.6477550724383647, "learning_rate": 4.9897810218978104e-05, "loss": 0.7097, "step": 3489 }, { "epoch": 0.10189483518729381, "grad_norm": 0.6368720985329985, "learning_rate": 4.989618815896188e-05, "loss": 0.7216, "step": 3490 }, { "epoch": 0.10192403141514117, "grad_norm": 0.6730728685737298, "learning_rate": 4.989456609894566e-05, "loss": 0.6937, "step": 3491 }, { "epoch": 0.10195322764298853, "grad_norm": 0.6043063749141916, "learning_rate": 4.989294403892944e-05, "loss": 0.604, "step": 3492 }, { "epoch": 0.10198242387083589, "grad_norm": 0.6227840503663097, "learning_rate": 4.9891321978913225e-05, "loss": 0.669, "step": 3493 }, { "epoch": 0.10201162009868325, "grad_norm": 0.6953324531424167, "learning_rate": 4.9889699918897006e-05, "loss": 0.7467, "step": 3494 }, { "epoch": 0.10204081632653061, "grad_norm": 0.6449074302878706, "learning_rate": 4.988807785888078e-05, "loss": 0.668, "step": 3495 }, { "epoch": 0.10207001255437798, "grad_norm": 0.6401519338024744, "learning_rate": 4.988645579886456e-05, "loss": 0.6463, "step": 3496 }, { "epoch": 0.10209920878222534, "grad_norm": 0.7463171993300418, "learning_rate": 4.988483373884834e-05, "loss": 0.7104, "step": 3497 }, { "epoch": 0.1021284050100727, "grad_norm": 0.6338776422696869, "learning_rate": 4.988321167883212e-05, "loss": 0.6984, "step": 3498 }, { "epoch": 0.10215760123792006, "grad_norm": 0.6751574125695706, "learning_rate": 4.98815896188159e-05, "loss": 0.675, "step": 3499 }, { "epoch": 0.10218679746576742, "grad_norm": 0.6560388249531269, "learning_rate": 4.9879967558799677e-05, "loss": 0.6863, "step": 3500 }, { "epoch": 0.10221599369361478, "grad_norm": 0.6988404216917172, "learning_rate": 4.987834549878346e-05, "loss": 0.692, "step": 3501 }, { "epoch": 0.10224518992146214, "grad_norm": 0.602039354390467, "learning_rate": 4.987672343876723e-05, "loss": 0.6184, "step": 3502 }, { "epoch": 0.1022743861493095, "grad_norm": 0.7267311359939365, "learning_rate": 4.9875101378751015e-05, "loss": 0.7636, "step": 3503 }, { "epoch": 0.10230358237715687, "grad_norm": 0.7373589154754128, "learning_rate": 4.98734793187348e-05, "loss": 0.7105, "step": 3504 }, { "epoch": 0.10233277860500424, "grad_norm": 0.6451483417356902, "learning_rate": 4.987185725871857e-05, "loss": 0.6241, "step": 3505 }, { "epoch": 0.1023619748328516, "grad_norm": 0.6376938773620898, "learning_rate": 4.9870235198702354e-05, "loss": 0.7052, "step": 3506 }, { "epoch": 0.10239117106069896, "grad_norm": 0.6434785983799074, "learning_rate": 4.9868613138686135e-05, "loss": 0.6679, "step": 3507 }, { "epoch": 0.10242036728854632, "grad_norm": 0.7251237231142766, "learning_rate": 4.986699107866991e-05, "loss": 0.6646, "step": 3508 }, { "epoch": 0.10244956351639369, "grad_norm": 0.9355087811285345, "learning_rate": 4.986536901865369e-05, "loss": 0.7436, "step": 3509 }, { "epoch": 0.10247875974424105, "grad_norm": 0.6240400090238523, "learning_rate": 4.986374695863747e-05, "loss": 0.6338, "step": 3510 }, { "epoch": 0.10250795597208841, "grad_norm": 0.6571401600221854, "learning_rate": 4.9862124898621256e-05, "loss": 0.6545, "step": 3511 }, { "epoch": 0.10253715219993577, "grad_norm": 0.6105057127688709, "learning_rate": 4.986050283860503e-05, "loss": 0.6828, "step": 3512 }, { "epoch": 0.10256634842778313, "grad_norm": 0.6523837375229214, "learning_rate": 4.985888077858881e-05, "loss": 0.6942, "step": 3513 }, { "epoch": 0.10259554465563049, "grad_norm": 0.6724071577843551, "learning_rate": 4.9857258718572594e-05, "loss": 0.6723, "step": 3514 }, { "epoch": 0.10262474088347785, "grad_norm": 0.6272593475825531, "learning_rate": 4.985563665855637e-05, "loss": 0.6856, "step": 3515 }, { "epoch": 0.10265393711132521, "grad_norm": 0.6422437404390193, "learning_rate": 4.985401459854015e-05, "loss": 0.6516, "step": 3516 }, { "epoch": 0.10268313333917257, "grad_norm": 0.5675130253620151, "learning_rate": 4.9852392538523926e-05, "loss": 0.5802, "step": 3517 }, { "epoch": 0.10271232956701994, "grad_norm": 0.7371024615890589, "learning_rate": 4.985077047850771e-05, "loss": 0.7363, "step": 3518 }, { "epoch": 0.1027415257948673, "grad_norm": 0.6987630149793546, "learning_rate": 4.984914841849149e-05, "loss": 0.7418, "step": 3519 }, { "epoch": 0.10277072202271466, "grad_norm": 0.640349480497819, "learning_rate": 4.9847526358475264e-05, "loss": 0.6837, "step": 3520 }, { "epoch": 0.10279991825056203, "grad_norm": 0.6141623264686745, "learning_rate": 4.9845904298459046e-05, "loss": 0.6017, "step": 3521 }, { "epoch": 0.1028291144784094, "grad_norm": 0.6322559242303032, "learning_rate": 4.984428223844282e-05, "loss": 0.6774, "step": 3522 }, { "epoch": 0.10285831070625676, "grad_norm": 0.6527852771451783, "learning_rate": 4.98426601784266e-05, "loss": 0.688, "step": 3523 }, { "epoch": 0.10288750693410412, "grad_norm": 0.593843486981083, "learning_rate": 4.9841038118410385e-05, "loss": 0.6283, "step": 3524 }, { "epoch": 0.10291670316195148, "grad_norm": 0.6666021734251155, "learning_rate": 4.983941605839416e-05, "loss": 0.6785, "step": 3525 }, { "epoch": 0.10294589938979884, "grad_norm": 0.7132813314595674, "learning_rate": 4.983779399837794e-05, "loss": 0.7702, "step": 3526 }, { "epoch": 0.1029750956176462, "grad_norm": 0.6277327467199697, "learning_rate": 4.983617193836172e-05, "loss": 0.5854, "step": 3527 }, { "epoch": 0.10300429184549356, "grad_norm": 0.711901190609046, "learning_rate": 4.98345498783455e-05, "loss": 0.7387, "step": 3528 }, { "epoch": 0.10303348807334092, "grad_norm": 0.6457720312651217, "learning_rate": 4.983292781832928e-05, "loss": 0.687, "step": 3529 }, { "epoch": 0.10306268430118828, "grad_norm": 0.6658950000264298, "learning_rate": 4.983130575831306e-05, "loss": 0.7508, "step": 3530 }, { "epoch": 0.10309188052903565, "grad_norm": 0.7063584885892592, "learning_rate": 4.9829683698296843e-05, "loss": 0.6639, "step": 3531 }, { "epoch": 0.103121076756883, "grad_norm": 0.6318195587421618, "learning_rate": 4.982806163828062e-05, "loss": 0.6708, "step": 3532 }, { "epoch": 0.10315027298473037, "grad_norm": 0.7173831994266147, "learning_rate": 4.98264395782644e-05, "loss": 0.7855, "step": 3533 }, { "epoch": 0.10317946921257773, "grad_norm": 0.6649885018890175, "learning_rate": 4.982481751824818e-05, "loss": 0.7043, "step": 3534 }, { "epoch": 0.10320866544042509, "grad_norm": 0.672098731675047, "learning_rate": 4.982319545823196e-05, "loss": 0.7093, "step": 3535 }, { "epoch": 0.10323786166827247, "grad_norm": 0.624862698011383, "learning_rate": 4.982157339821574e-05, "loss": 0.6938, "step": 3536 }, { "epoch": 0.10326705789611983, "grad_norm": 0.6053367051037247, "learning_rate": 4.9819951338199514e-05, "loss": 0.6025, "step": 3537 }, { "epoch": 0.10329625412396719, "grad_norm": 0.648763264853155, "learning_rate": 4.9818329278183295e-05, "loss": 0.6493, "step": 3538 }, { "epoch": 0.10332545035181455, "grad_norm": 0.6801696374619337, "learning_rate": 4.981670721816708e-05, "loss": 0.7194, "step": 3539 }, { "epoch": 0.10335464657966191, "grad_norm": 0.8443495208447579, "learning_rate": 4.981508515815085e-05, "loss": 0.662, "step": 3540 }, { "epoch": 0.10338384280750927, "grad_norm": 0.7082061129252969, "learning_rate": 4.9813463098134634e-05, "loss": 0.7603, "step": 3541 }, { "epoch": 0.10341303903535663, "grad_norm": 0.641802892415668, "learning_rate": 4.981184103811841e-05, "loss": 0.6878, "step": 3542 }, { "epoch": 0.103442235263204, "grad_norm": 0.6093733704860733, "learning_rate": 4.981021897810219e-05, "loss": 0.6453, "step": 3543 }, { "epoch": 0.10347143149105135, "grad_norm": 0.6903692750566448, "learning_rate": 4.980859691808597e-05, "loss": 0.7309, "step": 3544 }, { "epoch": 0.10350062771889872, "grad_norm": 0.6473964368917472, "learning_rate": 4.980697485806975e-05, "loss": 0.7333, "step": 3545 }, { "epoch": 0.10352982394674608, "grad_norm": 0.6629984580631865, "learning_rate": 4.980535279805353e-05, "loss": 0.7263, "step": 3546 }, { "epoch": 0.10355902017459344, "grad_norm": 0.6417910648374714, "learning_rate": 4.9803730738037304e-05, "loss": 0.7277, "step": 3547 }, { "epoch": 0.1035882164024408, "grad_norm": 0.678975162629082, "learning_rate": 4.9802108678021086e-05, "loss": 0.759, "step": 3548 }, { "epoch": 0.10361741263028816, "grad_norm": 0.8186452542143867, "learning_rate": 4.9800486618004875e-05, "loss": 0.7638, "step": 3549 }, { "epoch": 0.10364660885813552, "grad_norm": 0.568170592625856, "learning_rate": 4.979886455798865e-05, "loss": 0.5408, "step": 3550 }, { "epoch": 0.1036758050859829, "grad_norm": 0.662597091422609, "learning_rate": 4.979724249797243e-05, "loss": 0.6915, "step": 3551 }, { "epoch": 0.10370500131383026, "grad_norm": 0.7312425256164189, "learning_rate": 4.9795620437956206e-05, "loss": 0.7865, "step": 3552 }, { "epoch": 0.10373419754167762, "grad_norm": 0.6955840603214505, "learning_rate": 4.979399837793999e-05, "loss": 0.7828, "step": 3553 }, { "epoch": 0.10376339376952498, "grad_norm": 0.6265300752042982, "learning_rate": 4.979237631792377e-05, "loss": 0.6367, "step": 3554 }, { "epoch": 0.10379258999737234, "grad_norm": 0.7752970571790855, "learning_rate": 4.9790754257907545e-05, "loss": 0.7348, "step": 3555 }, { "epoch": 0.1038217862252197, "grad_norm": 0.635221668451183, "learning_rate": 4.9789132197891327e-05, "loss": 0.6663, "step": 3556 }, { "epoch": 0.10385098245306706, "grad_norm": 0.6195848494415473, "learning_rate": 4.97875101378751e-05, "loss": 0.6575, "step": 3557 }, { "epoch": 0.10388017868091443, "grad_norm": 0.7648741001477142, "learning_rate": 4.978588807785888e-05, "loss": 0.7376, "step": 3558 }, { "epoch": 0.10390937490876179, "grad_norm": 0.6586923161114083, "learning_rate": 4.9784266017842665e-05, "loss": 0.7038, "step": 3559 }, { "epoch": 0.10393857113660915, "grad_norm": 0.6298252544614982, "learning_rate": 4.978264395782644e-05, "loss": 0.7066, "step": 3560 }, { "epoch": 0.10396776736445651, "grad_norm": 0.7445418345984336, "learning_rate": 4.978102189781022e-05, "loss": 0.7167, "step": 3561 }, { "epoch": 0.10399696359230387, "grad_norm": 0.6751893447021357, "learning_rate": 4.9779399837794e-05, "loss": 0.7007, "step": 3562 }, { "epoch": 0.10402615982015123, "grad_norm": 0.675399319147016, "learning_rate": 4.977777777777778e-05, "loss": 0.7213, "step": 3563 }, { "epoch": 0.10405535604799859, "grad_norm": 0.7640726735479658, "learning_rate": 4.977615571776156e-05, "loss": 0.7065, "step": 3564 }, { "epoch": 0.10408455227584595, "grad_norm": 0.6587840099152246, "learning_rate": 4.9774533657745335e-05, "loss": 0.7096, "step": 3565 }, { "epoch": 0.10411374850369333, "grad_norm": 0.6948382825582239, "learning_rate": 4.977291159772912e-05, "loss": 0.7218, "step": 3566 }, { "epoch": 0.10414294473154069, "grad_norm": 0.5896496638150329, "learning_rate": 4.977128953771289e-05, "loss": 0.6157, "step": 3567 }, { "epoch": 0.10417214095938805, "grad_norm": 0.6022499084656447, "learning_rate": 4.976966747769668e-05, "loss": 0.6137, "step": 3568 }, { "epoch": 0.10420133718723541, "grad_norm": 0.6836466888362394, "learning_rate": 4.9768045417680456e-05, "loss": 0.6567, "step": 3569 }, { "epoch": 0.10423053341508277, "grad_norm": 0.6486115288792688, "learning_rate": 4.976642335766424e-05, "loss": 0.6646, "step": 3570 }, { "epoch": 0.10425972964293014, "grad_norm": 0.6196047003459426, "learning_rate": 4.976480129764802e-05, "loss": 0.6189, "step": 3571 }, { "epoch": 0.1042889258707775, "grad_norm": 0.6138457338664232, "learning_rate": 4.9763179237631794e-05, "loss": 0.6234, "step": 3572 }, { "epoch": 0.10431812209862486, "grad_norm": 0.6101699480536621, "learning_rate": 4.9761557177615576e-05, "loss": 0.6494, "step": 3573 }, { "epoch": 0.10434731832647222, "grad_norm": 0.6198888385100236, "learning_rate": 4.975993511759936e-05, "loss": 0.7004, "step": 3574 }, { "epoch": 0.10437651455431958, "grad_norm": 0.6164639795125847, "learning_rate": 4.975831305758313e-05, "loss": 0.6321, "step": 3575 }, { "epoch": 0.10440571078216694, "grad_norm": 0.7215868677624961, "learning_rate": 4.9756690997566914e-05, "loss": 0.6711, "step": 3576 }, { "epoch": 0.1044349070100143, "grad_norm": 0.6504607530792276, "learning_rate": 4.975506893755069e-05, "loss": 0.6893, "step": 3577 }, { "epoch": 0.10446410323786166, "grad_norm": 0.6873768027814662, "learning_rate": 4.975344687753447e-05, "loss": 0.6806, "step": 3578 }, { "epoch": 0.10449329946570902, "grad_norm": 0.6172708982218167, "learning_rate": 4.975182481751825e-05, "loss": 0.6228, "step": 3579 }, { "epoch": 0.10452249569355639, "grad_norm": 0.6453729420015913, "learning_rate": 4.975020275750203e-05, "loss": 0.6327, "step": 3580 }, { "epoch": 0.10455169192140376, "grad_norm": 0.6304306018800123, "learning_rate": 4.974858069748581e-05, "loss": 0.6553, "step": 3581 }, { "epoch": 0.10458088814925112, "grad_norm": 0.6523752655502185, "learning_rate": 4.9746958637469585e-05, "loss": 0.6366, "step": 3582 }, { "epoch": 0.10461008437709848, "grad_norm": 0.6633851153949682, "learning_rate": 4.9745336577453366e-05, "loss": 0.7222, "step": 3583 }, { "epoch": 0.10463928060494584, "grad_norm": 0.7225446457125501, "learning_rate": 4.974371451743715e-05, "loss": 0.7642, "step": 3584 }, { "epoch": 0.1046684768327932, "grad_norm": 0.6763463766666963, "learning_rate": 4.974209245742092e-05, "loss": 0.6515, "step": 3585 }, { "epoch": 0.10469767306064057, "grad_norm": 0.6393254007492822, "learning_rate": 4.9740470397404705e-05, "loss": 0.6784, "step": 3586 }, { "epoch": 0.10472686928848793, "grad_norm": 0.6291843423361043, "learning_rate": 4.973884833738849e-05, "loss": 0.6605, "step": 3587 }, { "epoch": 0.10475606551633529, "grad_norm": 0.6670273695657277, "learning_rate": 4.973722627737227e-05, "loss": 0.7178, "step": 3588 }, { "epoch": 0.10478526174418265, "grad_norm": 0.7844855649501868, "learning_rate": 4.973560421735604e-05, "loss": 0.7361, "step": 3589 }, { "epoch": 0.10481445797203001, "grad_norm": 0.6477459744042556, "learning_rate": 4.9733982157339825e-05, "loss": 0.712, "step": 3590 }, { "epoch": 0.10484365419987737, "grad_norm": 0.5828113742616461, "learning_rate": 4.973236009732361e-05, "loss": 0.5919, "step": 3591 }, { "epoch": 0.10487285042772473, "grad_norm": 0.6324576571139928, "learning_rate": 4.973073803730738e-05, "loss": 0.6209, "step": 3592 }, { "epoch": 0.1049020466555721, "grad_norm": 0.6946841426255537, "learning_rate": 4.9729115977291164e-05, "loss": 0.7428, "step": 3593 }, { "epoch": 0.10493124288341946, "grad_norm": 0.7120743249705171, "learning_rate": 4.9727493917274945e-05, "loss": 0.7512, "step": 3594 }, { "epoch": 0.10496043911126682, "grad_norm": 0.719052194626615, "learning_rate": 4.972587185725872e-05, "loss": 0.7272, "step": 3595 }, { "epoch": 0.10498963533911419, "grad_norm": 0.5930542975751606, "learning_rate": 4.97242497972425e-05, "loss": 0.6282, "step": 3596 }, { "epoch": 0.10501883156696155, "grad_norm": 0.7032154562325693, "learning_rate": 4.972262773722628e-05, "loss": 0.7648, "step": 3597 }, { "epoch": 0.10504802779480892, "grad_norm": 0.6625478718011254, "learning_rate": 4.972100567721006e-05, "loss": 0.813, "step": 3598 }, { "epoch": 0.10507722402265628, "grad_norm": 0.6265378272728019, "learning_rate": 4.971938361719384e-05, "loss": 0.648, "step": 3599 }, { "epoch": 0.10510642025050364, "grad_norm": 0.619492591309794, "learning_rate": 4.9717761557177616e-05, "loss": 0.6662, "step": 3600 }, { "epoch": 0.105135616478351, "grad_norm": 0.6046227646318677, "learning_rate": 4.97161394971614e-05, "loss": 0.6113, "step": 3601 }, { "epoch": 0.10516481270619836, "grad_norm": 0.6701757582018483, "learning_rate": 4.971451743714517e-05, "loss": 0.7072, "step": 3602 }, { "epoch": 0.10519400893404572, "grad_norm": 0.6421142346987972, "learning_rate": 4.9712895377128954e-05, "loss": 0.602, "step": 3603 }, { "epoch": 0.10522320516189308, "grad_norm": 0.6778769589853897, "learning_rate": 4.9711273317112736e-05, "loss": 0.7037, "step": 3604 }, { "epoch": 0.10525240138974044, "grad_norm": 0.6417350601494572, "learning_rate": 4.970965125709651e-05, "loss": 0.6175, "step": 3605 }, { "epoch": 0.1052815976175878, "grad_norm": 0.6899280092642458, "learning_rate": 4.97080291970803e-05, "loss": 0.7106, "step": 3606 }, { "epoch": 0.10531079384543517, "grad_norm": 0.6062492444986196, "learning_rate": 4.9706407137064074e-05, "loss": 0.6665, "step": 3607 }, { "epoch": 0.10533999007328253, "grad_norm": 0.6534555136122323, "learning_rate": 4.9704785077047856e-05, "loss": 0.725, "step": 3608 }, { "epoch": 0.10536918630112989, "grad_norm": 0.7569476458974017, "learning_rate": 4.970316301703163e-05, "loss": 0.8218, "step": 3609 }, { "epoch": 0.10539838252897725, "grad_norm": 0.6564279694405979, "learning_rate": 4.970154095701541e-05, "loss": 0.7398, "step": 3610 }, { "epoch": 0.10542757875682462, "grad_norm": 0.695711154329518, "learning_rate": 4.9699918896999195e-05, "loss": 0.6468, "step": 3611 }, { "epoch": 0.10545677498467199, "grad_norm": 0.6759083566026478, "learning_rate": 4.969829683698297e-05, "loss": 0.6799, "step": 3612 }, { "epoch": 0.10548597121251935, "grad_norm": 0.7360251571318323, "learning_rate": 4.969667477696675e-05, "loss": 0.7526, "step": 3613 }, { "epoch": 0.10551516744036671, "grad_norm": 0.634332589624378, "learning_rate": 4.9695052716950526e-05, "loss": 0.7098, "step": 3614 }, { "epoch": 0.10554436366821407, "grad_norm": 0.7093889852179817, "learning_rate": 4.969343065693431e-05, "loss": 0.723, "step": 3615 }, { "epoch": 0.10557355989606143, "grad_norm": 0.6253068783678265, "learning_rate": 4.969180859691809e-05, "loss": 0.6393, "step": 3616 }, { "epoch": 0.10560275612390879, "grad_norm": 0.6824288924461136, "learning_rate": 4.9690186536901865e-05, "loss": 0.7432, "step": 3617 }, { "epoch": 0.10563195235175615, "grad_norm": 0.6305082902055852, "learning_rate": 4.968856447688565e-05, "loss": 0.5221, "step": 3618 }, { "epoch": 0.10566114857960351, "grad_norm": 0.740888375425207, "learning_rate": 4.968694241686943e-05, "loss": 0.7515, "step": 3619 }, { "epoch": 0.10569034480745088, "grad_norm": 0.593387055412045, "learning_rate": 4.9685320356853203e-05, "loss": 0.6359, "step": 3620 }, { "epoch": 0.10571954103529824, "grad_norm": 0.6817421420743645, "learning_rate": 4.9683698296836985e-05, "loss": 0.7483, "step": 3621 }, { "epoch": 0.1057487372631456, "grad_norm": 0.6465603328062886, "learning_rate": 4.968207623682076e-05, "loss": 0.7143, "step": 3622 }, { "epoch": 0.10577793349099296, "grad_norm": 0.6544614134124197, "learning_rate": 4.968045417680454e-05, "loss": 0.721, "step": 3623 }, { "epoch": 0.10580712971884032, "grad_norm": 0.697208431760526, "learning_rate": 4.9678832116788324e-05, "loss": 0.7588, "step": 3624 }, { "epoch": 0.10583632594668768, "grad_norm": 0.7225343113508458, "learning_rate": 4.9677210056772105e-05, "loss": 0.6241, "step": 3625 }, { "epoch": 0.10586552217453506, "grad_norm": 0.5960292428023479, "learning_rate": 4.967558799675589e-05, "loss": 0.6117, "step": 3626 }, { "epoch": 0.10589471840238242, "grad_norm": 0.6560321412412397, "learning_rate": 4.967396593673966e-05, "loss": 0.6846, "step": 3627 }, { "epoch": 0.10592391463022978, "grad_norm": 0.6947778880310271, "learning_rate": 4.9672343876723444e-05, "loss": 0.8025, "step": 3628 }, { "epoch": 0.10595311085807714, "grad_norm": 0.6870381503035182, "learning_rate": 4.967072181670722e-05, "loss": 0.7374, "step": 3629 }, { "epoch": 0.1059823070859245, "grad_norm": 0.6029166368146271, "learning_rate": 4.9669099756691e-05, "loss": 0.5815, "step": 3630 }, { "epoch": 0.10601150331377186, "grad_norm": 0.663733136685757, "learning_rate": 4.966747769667478e-05, "loss": 0.7212, "step": 3631 }, { "epoch": 0.10604069954161922, "grad_norm": 0.7290457629966264, "learning_rate": 4.966585563665856e-05, "loss": 0.7768, "step": 3632 }, { "epoch": 0.10606989576946659, "grad_norm": 0.6036814131077266, "learning_rate": 4.966423357664234e-05, "loss": 0.5763, "step": 3633 }, { "epoch": 0.10609909199731395, "grad_norm": 0.971424475972962, "learning_rate": 4.9662611516626114e-05, "loss": 0.7169, "step": 3634 }, { "epoch": 0.10612828822516131, "grad_norm": 0.6772248639438309, "learning_rate": 4.9660989456609896e-05, "loss": 0.6735, "step": 3635 }, { "epoch": 0.10615748445300867, "grad_norm": 0.6575476892927778, "learning_rate": 4.965936739659368e-05, "loss": 0.6968, "step": 3636 }, { "epoch": 0.10618668068085603, "grad_norm": 0.9418398580580181, "learning_rate": 4.965774533657745e-05, "loss": 0.7533, "step": 3637 }, { "epoch": 0.10621587690870339, "grad_norm": 0.6056361388147952, "learning_rate": 4.9656123276561235e-05, "loss": 0.6156, "step": 3638 }, { "epoch": 0.10624507313655075, "grad_norm": 0.716159133600177, "learning_rate": 4.9654501216545016e-05, "loss": 0.6484, "step": 3639 }, { "epoch": 0.10627426936439811, "grad_norm": 0.6126707552366986, "learning_rate": 4.965287915652879e-05, "loss": 0.6658, "step": 3640 }, { "epoch": 0.10630346559224549, "grad_norm": 0.647478550544821, "learning_rate": 4.965125709651257e-05, "loss": 0.6733, "step": 3641 }, { "epoch": 0.10633266182009285, "grad_norm": 0.6534403895941802, "learning_rate": 4.964963503649635e-05, "loss": 0.7031, "step": 3642 }, { "epoch": 0.10636185804794021, "grad_norm": 0.6683019284335465, "learning_rate": 4.964801297648013e-05, "loss": 0.6926, "step": 3643 }, { "epoch": 0.10639105427578757, "grad_norm": 0.6561486310504054, "learning_rate": 4.964639091646391e-05, "loss": 0.6466, "step": 3644 }, { "epoch": 0.10642025050363493, "grad_norm": 0.7060099211202522, "learning_rate": 4.964476885644769e-05, "loss": 0.7596, "step": 3645 }, { "epoch": 0.1064494467314823, "grad_norm": 0.68111176645059, "learning_rate": 4.9643146796431475e-05, "loss": 0.7195, "step": 3646 }, { "epoch": 0.10647864295932966, "grad_norm": 0.6003640022796979, "learning_rate": 4.964152473641525e-05, "loss": 0.6028, "step": 3647 }, { "epoch": 0.10650783918717702, "grad_norm": 0.6178194557900683, "learning_rate": 4.963990267639903e-05, "loss": 0.6311, "step": 3648 }, { "epoch": 0.10653703541502438, "grad_norm": 0.6831906053093281, "learning_rate": 4.963828061638281e-05, "loss": 0.7509, "step": 3649 }, { "epoch": 0.10656623164287174, "grad_norm": 0.702933818254467, "learning_rate": 4.963665855636659e-05, "loss": 0.7667, "step": 3650 }, { "epoch": 0.1065954278707191, "grad_norm": 0.6767297566342384, "learning_rate": 4.963503649635037e-05, "loss": 0.7388, "step": 3651 }, { "epoch": 0.10662462409856646, "grad_norm": 0.8026370376551537, "learning_rate": 4.9633414436334145e-05, "loss": 0.7751, "step": 3652 }, { "epoch": 0.10665382032641382, "grad_norm": 0.6722952869143621, "learning_rate": 4.963179237631793e-05, "loss": 0.738, "step": 3653 }, { "epoch": 0.10668301655426118, "grad_norm": 0.6855208957863168, "learning_rate": 4.96301703163017e-05, "loss": 0.6062, "step": 3654 }, { "epoch": 0.10671221278210855, "grad_norm": 0.7680628535943641, "learning_rate": 4.9628548256285484e-05, "loss": 0.7709, "step": 3655 }, { "epoch": 0.10674140900995592, "grad_norm": 0.6148617181355089, "learning_rate": 4.9626926196269266e-05, "loss": 0.6558, "step": 3656 }, { "epoch": 0.10677060523780328, "grad_norm": 0.6304056304336109, "learning_rate": 4.962530413625304e-05, "loss": 0.6414, "step": 3657 }, { "epoch": 0.10679980146565064, "grad_norm": 0.6536533432081895, "learning_rate": 4.962368207623682e-05, "loss": 0.749, "step": 3658 }, { "epoch": 0.106828997693498, "grad_norm": 0.6405419468731688, "learning_rate": 4.96220600162206e-05, "loss": 0.6904, "step": 3659 }, { "epoch": 0.10685819392134537, "grad_norm": 0.6015440704120558, "learning_rate": 4.962043795620438e-05, "loss": 0.6447, "step": 3660 }, { "epoch": 0.10688739014919273, "grad_norm": 0.7068280870924352, "learning_rate": 4.961881589618816e-05, "loss": 0.7128, "step": 3661 }, { "epoch": 0.10691658637704009, "grad_norm": 0.6266681135906433, "learning_rate": 4.961719383617194e-05, "loss": 0.6849, "step": 3662 }, { "epoch": 0.10694578260488745, "grad_norm": 0.6469007858066255, "learning_rate": 4.9615571776155724e-05, "loss": 0.6701, "step": 3663 }, { "epoch": 0.10697497883273481, "grad_norm": 0.621346524449873, "learning_rate": 4.96139497161395e-05, "loss": 0.6372, "step": 3664 }, { "epoch": 0.10700417506058217, "grad_norm": 0.7189142266389731, "learning_rate": 4.961232765612328e-05, "loss": 0.6822, "step": 3665 }, { "epoch": 0.10703337128842953, "grad_norm": 0.6218289382988026, "learning_rate": 4.961070559610706e-05, "loss": 0.6518, "step": 3666 }, { "epoch": 0.1070625675162769, "grad_norm": 0.5977550631134724, "learning_rate": 4.960908353609084e-05, "loss": 0.582, "step": 3667 }, { "epoch": 0.10709176374412425, "grad_norm": 0.6380474719930028, "learning_rate": 4.960746147607462e-05, "loss": 0.6553, "step": 3668 }, { "epoch": 0.10712095997197162, "grad_norm": 0.7083448619338674, "learning_rate": 4.9605839416058395e-05, "loss": 0.8272, "step": 3669 }, { "epoch": 0.10715015619981898, "grad_norm": 0.6371642374212138, "learning_rate": 4.9604217356042176e-05, "loss": 0.7269, "step": 3670 }, { "epoch": 0.10717935242766635, "grad_norm": 0.6629354966715064, "learning_rate": 4.960259529602596e-05, "loss": 0.6973, "step": 3671 }, { "epoch": 0.10720854865551371, "grad_norm": 0.6251213036587094, "learning_rate": 4.960097323600973e-05, "loss": 0.6662, "step": 3672 }, { "epoch": 0.10723774488336107, "grad_norm": 0.5816078532237244, "learning_rate": 4.9599351175993515e-05, "loss": 0.5757, "step": 3673 }, { "epoch": 0.10726694111120844, "grad_norm": 0.6371239229513438, "learning_rate": 4.959772911597729e-05, "loss": 0.6871, "step": 3674 }, { "epoch": 0.1072961373390558, "grad_norm": 0.5878847868520536, "learning_rate": 4.959610705596107e-05, "loss": 0.606, "step": 3675 }, { "epoch": 0.10732533356690316, "grad_norm": 0.6680735729527457, "learning_rate": 4.959448499594485e-05, "loss": 0.6986, "step": 3676 }, { "epoch": 0.10735452979475052, "grad_norm": 0.6891253489388631, "learning_rate": 4.959286293592863e-05, "loss": 0.698, "step": 3677 }, { "epoch": 0.10738372602259788, "grad_norm": 0.6550336342909389, "learning_rate": 4.959124087591241e-05, "loss": 0.7124, "step": 3678 }, { "epoch": 0.10741292225044524, "grad_norm": 0.5847463513250183, "learning_rate": 4.9589618815896185e-05, "loss": 0.6357, "step": 3679 }, { "epoch": 0.1074421184782926, "grad_norm": 0.6663736009034165, "learning_rate": 4.958799675587997e-05, "loss": 0.7634, "step": 3680 }, { "epoch": 0.10747131470613996, "grad_norm": 0.6050579661361042, "learning_rate": 4.9586374695863755e-05, "loss": 0.6361, "step": 3681 }, { "epoch": 0.10750051093398733, "grad_norm": 0.724692240191626, "learning_rate": 4.958475263584753e-05, "loss": 0.7851, "step": 3682 }, { "epoch": 0.10752970716183469, "grad_norm": 0.5826223798744657, "learning_rate": 4.958313057583131e-05, "loss": 0.5903, "step": 3683 }, { "epoch": 0.10755890338968205, "grad_norm": 0.6730373213589358, "learning_rate": 4.958150851581509e-05, "loss": 0.7206, "step": 3684 }, { "epoch": 0.10758809961752941, "grad_norm": 0.5843367204518949, "learning_rate": 4.957988645579887e-05, "loss": 0.5696, "step": 3685 }, { "epoch": 0.10761729584537678, "grad_norm": 0.6415605512307784, "learning_rate": 4.957826439578265e-05, "loss": 0.6457, "step": 3686 }, { "epoch": 0.10764649207322415, "grad_norm": 0.5976590539509503, "learning_rate": 4.9576642335766426e-05, "loss": 0.5625, "step": 3687 }, { "epoch": 0.1076756883010715, "grad_norm": 0.6287909721185294, "learning_rate": 4.957502027575021e-05, "loss": 0.615, "step": 3688 }, { "epoch": 0.10770488452891887, "grad_norm": 0.6876400175262997, "learning_rate": 4.957339821573398e-05, "loss": 0.7446, "step": 3689 }, { "epoch": 0.10773408075676623, "grad_norm": 0.7722915200208684, "learning_rate": 4.9571776155717764e-05, "loss": 0.8289, "step": 3690 }, { "epoch": 0.10776327698461359, "grad_norm": 0.7210082196364538, "learning_rate": 4.9570154095701546e-05, "loss": 0.7704, "step": 3691 }, { "epoch": 0.10779247321246095, "grad_norm": 0.5905060102181526, "learning_rate": 4.956853203568532e-05, "loss": 0.5726, "step": 3692 }, { "epoch": 0.10782166944030831, "grad_norm": 0.6747848754445933, "learning_rate": 4.95669099756691e-05, "loss": 0.7301, "step": 3693 }, { "epoch": 0.10785086566815567, "grad_norm": 0.6627043481800117, "learning_rate": 4.956528791565288e-05, "loss": 0.6523, "step": 3694 }, { "epoch": 0.10788006189600303, "grad_norm": 0.5981988652539179, "learning_rate": 4.956366585563666e-05, "loss": 0.6029, "step": 3695 }, { "epoch": 0.1079092581238504, "grad_norm": 0.7948488261107516, "learning_rate": 4.956204379562044e-05, "loss": 0.8081, "step": 3696 }, { "epoch": 0.10793845435169776, "grad_norm": 0.625725297499219, "learning_rate": 4.9560421735604216e-05, "loss": 0.631, "step": 3697 }, { "epoch": 0.10796765057954512, "grad_norm": 0.6702255995116937, "learning_rate": 4.9558799675588e-05, "loss": 0.6667, "step": 3698 }, { "epoch": 0.10799684680739248, "grad_norm": 0.6334797170956213, "learning_rate": 4.955717761557177e-05, "loss": 0.6398, "step": 3699 }, { "epoch": 0.10802604303523984, "grad_norm": 0.5952897479645425, "learning_rate": 4.955555555555556e-05, "loss": 0.5733, "step": 3700 }, { "epoch": 0.10805523926308722, "grad_norm": 0.6630815151908538, "learning_rate": 4.9553933495539336e-05, "loss": 0.7209, "step": 3701 }, { "epoch": 0.10808443549093458, "grad_norm": 0.6585552115711847, "learning_rate": 4.955231143552312e-05, "loss": 0.6493, "step": 3702 }, { "epoch": 0.10811363171878194, "grad_norm": 0.6946113777291333, "learning_rate": 4.95506893755069e-05, "loss": 0.7376, "step": 3703 }, { "epoch": 0.1081428279466293, "grad_norm": 0.652687925052846, "learning_rate": 4.9549067315490675e-05, "loss": 0.6624, "step": 3704 }, { "epoch": 0.10817202417447666, "grad_norm": 0.6023090819659593, "learning_rate": 4.954744525547446e-05, "loss": 0.6484, "step": 3705 }, { "epoch": 0.10820122040232402, "grad_norm": 0.613233871782238, "learning_rate": 4.954582319545824e-05, "loss": 0.6672, "step": 3706 }, { "epoch": 0.10823041663017138, "grad_norm": 0.9625447440521908, "learning_rate": 4.9544201135442013e-05, "loss": 0.6966, "step": 3707 }, { "epoch": 0.10825961285801874, "grad_norm": 0.6106355973465614, "learning_rate": 4.9542579075425795e-05, "loss": 0.5665, "step": 3708 }, { "epoch": 0.1082888090858661, "grad_norm": 0.6575791908061382, "learning_rate": 4.954095701540957e-05, "loss": 0.7234, "step": 3709 }, { "epoch": 0.10831800531371347, "grad_norm": 0.6244606819870168, "learning_rate": 4.953933495539335e-05, "loss": 0.602, "step": 3710 }, { "epoch": 0.10834720154156083, "grad_norm": 0.6504826840576912, "learning_rate": 4.9537712895377134e-05, "loss": 0.6252, "step": 3711 }, { "epoch": 0.10837639776940819, "grad_norm": 0.6766425695517746, "learning_rate": 4.953609083536091e-05, "loss": 0.7236, "step": 3712 }, { "epoch": 0.10840559399725555, "grad_norm": 0.6552643049672046, "learning_rate": 4.953446877534469e-05, "loss": 0.7401, "step": 3713 }, { "epoch": 0.10843479022510291, "grad_norm": 0.8749655883563697, "learning_rate": 4.9532846715328465e-05, "loss": 0.6943, "step": 3714 }, { "epoch": 0.10846398645295027, "grad_norm": 0.7141337483579019, "learning_rate": 4.953122465531225e-05, "loss": 0.7464, "step": 3715 }, { "epoch": 0.10849318268079763, "grad_norm": 0.7041179287577202, "learning_rate": 4.952960259529603e-05, "loss": 0.6484, "step": 3716 }, { "epoch": 0.10852237890864501, "grad_norm": 0.6512120120630623, "learning_rate": 4.9527980535279804e-05, "loss": 0.6429, "step": 3717 }, { "epoch": 0.10855157513649237, "grad_norm": 0.6564057854738093, "learning_rate": 4.9526358475263586e-05, "loss": 0.6135, "step": 3718 }, { "epoch": 0.10858077136433973, "grad_norm": 0.7186743054995145, "learning_rate": 4.952473641524737e-05, "loss": 0.745, "step": 3719 }, { "epoch": 0.10860996759218709, "grad_norm": 0.6848427083704073, "learning_rate": 4.952311435523115e-05, "loss": 0.7609, "step": 3720 }, { "epoch": 0.10863916382003445, "grad_norm": 0.6944731422374142, "learning_rate": 4.9521492295214924e-05, "loss": 0.7273, "step": 3721 }, { "epoch": 0.10866836004788182, "grad_norm": 0.7934907893815697, "learning_rate": 4.9519870235198706e-05, "loss": 0.7267, "step": 3722 }, { "epoch": 0.10869755627572918, "grad_norm": 0.5805657466777625, "learning_rate": 4.951824817518249e-05, "loss": 0.608, "step": 3723 }, { "epoch": 0.10872675250357654, "grad_norm": 0.6119600374720999, "learning_rate": 4.951662611516626e-05, "loss": 0.6828, "step": 3724 }, { "epoch": 0.1087559487314239, "grad_norm": 0.7114927678440873, "learning_rate": 4.9515004055150045e-05, "loss": 0.7158, "step": 3725 }, { "epoch": 0.10878514495927126, "grad_norm": 0.6376505227896493, "learning_rate": 4.9513381995133826e-05, "loss": 0.6866, "step": 3726 }, { "epoch": 0.10881434118711862, "grad_norm": 0.7312232630812736, "learning_rate": 4.95117599351176e-05, "loss": 0.7038, "step": 3727 }, { "epoch": 0.10884353741496598, "grad_norm": 0.6735935265464341, "learning_rate": 4.951013787510138e-05, "loss": 0.7582, "step": 3728 }, { "epoch": 0.10887273364281334, "grad_norm": 0.6711372818831268, "learning_rate": 4.950851581508516e-05, "loss": 0.6853, "step": 3729 }, { "epoch": 0.1089019298706607, "grad_norm": 0.6564883487029182, "learning_rate": 4.950689375506894e-05, "loss": 0.7243, "step": 3730 }, { "epoch": 0.10893112609850807, "grad_norm": 0.692612089555508, "learning_rate": 4.950527169505272e-05, "loss": 0.7686, "step": 3731 }, { "epoch": 0.10896032232635544, "grad_norm": 0.6158132762390633, "learning_rate": 4.9503649635036497e-05, "loss": 0.6448, "step": 3732 }, { "epoch": 0.1089895185542028, "grad_norm": 0.6197540702620102, "learning_rate": 4.950202757502028e-05, "loss": 0.6364, "step": 3733 }, { "epoch": 0.10901871478205016, "grad_norm": 0.7483428270611308, "learning_rate": 4.950040551500405e-05, "loss": 0.7207, "step": 3734 }, { "epoch": 0.10904791100989752, "grad_norm": 0.7402748391876345, "learning_rate": 4.9498783454987835e-05, "loss": 0.8055, "step": 3735 }, { "epoch": 0.10907710723774489, "grad_norm": 0.6234717547777577, "learning_rate": 4.949716139497162e-05, "loss": 0.6969, "step": 3736 }, { "epoch": 0.10910630346559225, "grad_norm": 0.6927792879782431, "learning_rate": 4.949553933495539e-05, "loss": 0.7524, "step": 3737 }, { "epoch": 0.10913549969343961, "grad_norm": 0.7438401112974996, "learning_rate": 4.949391727493918e-05, "loss": 0.7605, "step": 3738 }, { "epoch": 0.10916469592128697, "grad_norm": 0.6722413115869411, "learning_rate": 4.9492295214922955e-05, "loss": 0.6851, "step": 3739 }, { "epoch": 0.10919389214913433, "grad_norm": 0.5984424966216431, "learning_rate": 4.949067315490674e-05, "loss": 0.5905, "step": 3740 }, { "epoch": 0.10922308837698169, "grad_norm": 0.6995432569578822, "learning_rate": 4.948905109489051e-05, "loss": 0.7427, "step": 3741 }, { "epoch": 0.10925228460482905, "grad_norm": 0.5620626537435565, "learning_rate": 4.9487429034874294e-05, "loss": 0.5705, "step": 3742 }, { "epoch": 0.10928148083267641, "grad_norm": 0.6376544834796959, "learning_rate": 4.9485806974858076e-05, "loss": 0.6449, "step": 3743 }, { "epoch": 0.10931067706052378, "grad_norm": 0.6698661242116931, "learning_rate": 4.948418491484185e-05, "loss": 0.6855, "step": 3744 }, { "epoch": 0.10933987328837114, "grad_norm": 0.7626024082131969, "learning_rate": 4.948256285482563e-05, "loss": 0.8503, "step": 3745 }, { "epoch": 0.1093690695162185, "grad_norm": 0.6380994323252491, "learning_rate": 4.948094079480941e-05, "loss": 0.7693, "step": 3746 }, { "epoch": 0.10939826574406587, "grad_norm": 0.6322623288423322, "learning_rate": 4.947931873479319e-05, "loss": 0.7043, "step": 3747 }, { "epoch": 0.10942746197191323, "grad_norm": 0.7070347778713506, "learning_rate": 4.947769667477697e-05, "loss": 0.8131, "step": 3748 }, { "epoch": 0.1094566581997606, "grad_norm": 0.9700723373787437, "learning_rate": 4.9476074614760746e-05, "loss": 0.7618, "step": 3749 }, { "epoch": 0.10948585442760796, "grad_norm": 0.6324359818175309, "learning_rate": 4.947445255474453e-05, "loss": 0.7177, "step": 3750 }, { "epoch": 0.10951505065545532, "grad_norm": 0.6542006518468914, "learning_rate": 4.947283049472831e-05, "loss": 0.7542, "step": 3751 }, { "epoch": 0.10954424688330268, "grad_norm": 0.7150103467615336, "learning_rate": 4.9471208434712084e-05, "loss": 0.7467, "step": 3752 }, { "epoch": 0.10957344311115004, "grad_norm": 0.736196097588705, "learning_rate": 4.9469586374695866e-05, "loss": 0.7719, "step": 3753 }, { "epoch": 0.1096026393389974, "grad_norm": 0.6438052404012744, "learning_rate": 4.946796431467964e-05, "loss": 0.6636, "step": 3754 }, { "epoch": 0.10963183556684476, "grad_norm": 0.6584244426042205, "learning_rate": 4.946634225466342e-05, "loss": 0.6901, "step": 3755 }, { "epoch": 0.10966103179469212, "grad_norm": 0.6695085218814717, "learning_rate": 4.9464720194647205e-05, "loss": 0.6903, "step": 3756 }, { "epoch": 0.10969022802253948, "grad_norm": 0.5700537480400752, "learning_rate": 4.9463098134630986e-05, "loss": 0.6071, "step": 3757 }, { "epoch": 0.10971942425038685, "grad_norm": 0.7053404369602725, "learning_rate": 4.946147607461477e-05, "loss": 0.7556, "step": 3758 }, { "epoch": 0.10974862047823421, "grad_norm": 0.6713024402001021, "learning_rate": 4.945985401459854e-05, "loss": 0.6521, "step": 3759 }, { "epoch": 0.10977781670608157, "grad_norm": 0.7329517179388569, "learning_rate": 4.9458231954582325e-05, "loss": 0.6419, "step": 3760 }, { "epoch": 0.10980701293392893, "grad_norm": 0.7891848993279583, "learning_rate": 4.94566098945661e-05, "loss": 0.8318, "step": 3761 }, { "epoch": 0.1098362091617763, "grad_norm": 0.6514830929512476, "learning_rate": 4.945498783454988e-05, "loss": 0.7052, "step": 3762 }, { "epoch": 0.10986540538962367, "grad_norm": 0.6649224551675452, "learning_rate": 4.945336577453366e-05, "loss": 0.7202, "step": 3763 }, { "epoch": 0.10989460161747103, "grad_norm": 0.6360351930441883, "learning_rate": 4.945174371451744e-05, "loss": 0.6413, "step": 3764 }, { "epoch": 0.10992379784531839, "grad_norm": 0.7098659652255839, "learning_rate": 4.945012165450122e-05, "loss": 0.6855, "step": 3765 }, { "epoch": 0.10995299407316575, "grad_norm": 0.6717727045977314, "learning_rate": 4.9448499594484995e-05, "loss": 0.6884, "step": 3766 }, { "epoch": 0.10998219030101311, "grad_norm": 0.5986414515618871, "learning_rate": 4.944687753446878e-05, "loss": 0.6247, "step": 3767 }, { "epoch": 0.11001138652886047, "grad_norm": 0.5862649066830873, "learning_rate": 4.944525547445256e-05, "loss": 0.6047, "step": 3768 }, { "epoch": 0.11004058275670783, "grad_norm": 0.6632131024662492, "learning_rate": 4.9443633414436334e-05, "loss": 0.714, "step": 3769 }, { "epoch": 0.1100697789845552, "grad_norm": 0.6846711343881111, "learning_rate": 4.9442011354420115e-05, "loss": 0.7789, "step": 3770 }, { "epoch": 0.11009897521240256, "grad_norm": 0.6271101885990574, "learning_rate": 4.94403892944039e-05, "loss": 0.649, "step": 3771 }, { "epoch": 0.11012817144024992, "grad_norm": 0.6235046717811326, "learning_rate": 4.943876723438767e-05, "loss": 0.6581, "step": 3772 }, { "epoch": 0.11015736766809728, "grad_norm": 0.6855912554757985, "learning_rate": 4.9437145174371454e-05, "loss": 0.7674, "step": 3773 }, { "epoch": 0.11018656389594464, "grad_norm": 0.6027414529516486, "learning_rate": 4.943552311435523e-05, "loss": 0.5869, "step": 3774 }, { "epoch": 0.110215760123792, "grad_norm": 0.6038457374534159, "learning_rate": 4.943390105433901e-05, "loss": 0.6267, "step": 3775 }, { "epoch": 0.11024495635163936, "grad_norm": 0.6531433681779935, "learning_rate": 4.943227899432279e-05, "loss": 0.7273, "step": 3776 }, { "epoch": 0.11027415257948674, "grad_norm": 0.7000627757506477, "learning_rate": 4.9430656934306574e-05, "loss": 0.748, "step": 3777 }, { "epoch": 0.1103033488073341, "grad_norm": 0.666023263958205, "learning_rate": 4.9429034874290356e-05, "loss": 0.6461, "step": 3778 }, { "epoch": 0.11033254503518146, "grad_norm": 0.6400805030915728, "learning_rate": 4.942741281427413e-05, "loss": 0.6746, "step": 3779 }, { "epoch": 0.11036174126302882, "grad_norm": 0.6211649148238142, "learning_rate": 4.942579075425791e-05, "loss": 0.6008, "step": 3780 }, { "epoch": 0.11039093749087618, "grad_norm": 0.736703580095424, "learning_rate": 4.942416869424169e-05, "loss": 0.7588, "step": 3781 }, { "epoch": 0.11042013371872354, "grad_norm": 0.7456479756289983, "learning_rate": 4.942254663422547e-05, "loss": 0.7612, "step": 3782 }, { "epoch": 0.1104493299465709, "grad_norm": 0.6842439150605304, "learning_rate": 4.942092457420925e-05, "loss": 0.7792, "step": 3783 }, { "epoch": 0.11047852617441827, "grad_norm": 0.6183786439221004, "learning_rate": 4.9419302514193026e-05, "loss": 0.6087, "step": 3784 }, { "epoch": 0.11050772240226563, "grad_norm": 0.7489961789867021, "learning_rate": 4.941768045417681e-05, "loss": 0.7703, "step": 3785 }, { "epoch": 0.11053691863011299, "grad_norm": 0.6241298071113858, "learning_rate": 4.941605839416058e-05, "loss": 0.6381, "step": 3786 }, { "epoch": 0.11056611485796035, "grad_norm": 0.63189971532029, "learning_rate": 4.9414436334144365e-05, "loss": 0.6132, "step": 3787 }, { "epoch": 0.11059531108580771, "grad_norm": 0.6631169075509326, "learning_rate": 4.9412814274128146e-05, "loss": 0.6931, "step": 3788 }, { "epoch": 0.11062450731365507, "grad_norm": 0.6459307044792779, "learning_rate": 4.941119221411192e-05, "loss": 0.6494, "step": 3789 }, { "epoch": 0.11065370354150243, "grad_norm": 0.6900623798241148, "learning_rate": 4.94095701540957e-05, "loss": 0.8265, "step": 3790 }, { "epoch": 0.1106828997693498, "grad_norm": 0.6128166416641321, "learning_rate": 4.940794809407948e-05, "loss": 0.6585, "step": 3791 }, { "epoch": 0.11071209599719717, "grad_norm": 0.6264624123119297, "learning_rate": 4.940632603406326e-05, "loss": 0.7003, "step": 3792 }, { "epoch": 0.11074129222504453, "grad_norm": 0.7146734496421097, "learning_rate": 4.940470397404704e-05, "loss": 0.7682, "step": 3793 }, { "epoch": 0.11077048845289189, "grad_norm": 0.6525092086375918, "learning_rate": 4.940308191403082e-05, "loss": 0.7077, "step": 3794 }, { "epoch": 0.11079968468073925, "grad_norm": 0.7300105524738837, "learning_rate": 4.9401459854014605e-05, "loss": 0.7759, "step": 3795 }, { "epoch": 0.11082888090858661, "grad_norm": 0.7290098944997067, "learning_rate": 4.939983779399838e-05, "loss": 0.7052, "step": 3796 }, { "epoch": 0.11085807713643397, "grad_norm": 0.6415991007787208, "learning_rate": 4.939821573398216e-05, "loss": 0.7224, "step": 3797 }, { "epoch": 0.11088727336428134, "grad_norm": 0.6647852172228453, "learning_rate": 4.9396593673965944e-05, "loss": 0.6593, "step": 3798 }, { "epoch": 0.1109164695921287, "grad_norm": 0.7165249623938992, "learning_rate": 4.939497161394972e-05, "loss": 0.7735, "step": 3799 }, { "epoch": 0.11094566581997606, "grad_norm": 0.6476503437711402, "learning_rate": 4.93933495539335e-05, "loss": 0.6402, "step": 3800 }, { "epoch": 0.11097486204782342, "grad_norm": 0.6220366930579394, "learning_rate": 4.9391727493917275e-05, "loss": 0.6383, "step": 3801 }, { "epoch": 0.11100405827567078, "grad_norm": 0.6105932533213249, "learning_rate": 4.939010543390106e-05, "loss": 0.6283, "step": 3802 }, { "epoch": 0.11103325450351814, "grad_norm": 0.5845521888767224, "learning_rate": 4.938848337388484e-05, "loss": 0.5725, "step": 3803 }, { "epoch": 0.1110624507313655, "grad_norm": 0.7086961147616839, "learning_rate": 4.9386861313868614e-05, "loss": 0.7662, "step": 3804 }, { "epoch": 0.11109164695921286, "grad_norm": 0.6993904713197255, "learning_rate": 4.9385239253852396e-05, "loss": 0.7074, "step": 3805 }, { "epoch": 0.11112084318706023, "grad_norm": 0.6388371318630139, "learning_rate": 4.938361719383617e-05, "loss": 0.6553, "step": 3806 }, { "epoch": 0.1111500394149076, "grad_norm": 0.7930463299631548, "learning_rate": 4.938199513381995e-05, "loss": 0.6872, "step": 3807 }, { "epoch": 0.11117923564275496, "grad_norm": 0.6572592913994598, "learning_rate": 4.9380373073803734e-05, "loss": 0.6485, "step": 3808 }, { "epoch": 0.11120843187060232, "grad_norm": 0.7118467327241763, "learning_rate": 4.937875101378751e-05, "loss": 0.7902, "step": 3809 }, { "epoch": 0.11123762809844968, "grad_norm": 0.7089005135283748, "learning_rate": 4.937712895377129e-05, "loss": 0.759, "step": 3810 }, { "epoch": 0.11126682432629705, "grad_norm": 0.6613391253108917, "learning_rate": 4.9375506893755066e-05, "loss": 0.6979, "step": 3811 }, { "epoch": 0.1112960205541444, "grad_norm": 0.7821308420953481, "learning_rate": 4.937388483373885e-05, "loss": 0.7604, "step": 3812 }, { "epoch": 0.11132521678199177, "grad_norm": 0.6376778378249631, "learning_rate": 4.937226277372263e-05, "loss": 0.6679, "step": 3813 }, { "epoch": 0.11135441300983913, "grad_norm": 0.6241714749189988, "learning_rate": 4.937064071370641e-05, "loss": 0.62, "step": 3814 }, { "epoch": 0.11138360923768649, "grad_norm": 0.6695156478377664, "learning_rate": 4.936901865369019e-05, "loss": 0.6871, "step": 3815 }, { "epoch": 0.11141280546553385, "grad_norm": 0.643773911294652, "learning_rate": 4.936739659367397e-05, "loss": 0.662, "step": 3816 }, { "epoch": 0.11144200169338121, "grad_norm": 0.6114611041431335, "learning_rate": 4.936577453365775e-05, "loss": 0.6726, "step": 3817 }, { "epoch": 0.11147119792122857, "grad_norm": 0.6922630617306952, "learning_rate": 4.936415247364153e-05, "loss": 0.7027, "step": 3818 }, { "epoch": 0.11150039414907593, "grad_norm": 0.6228820256074308, "learning_rate": 4.9362530413625307e-05, "loss": 0.6497, "step": 3819 }, { "epoch": 0.1115295903769233, "grad_norm": 0.7376062598921733, "learning_rate": 4.936090835360909e-05, "loss": 0.7742, "step": 3820 }, { "epoch": 0.11155878660477066, "grad_norm": 0.6929721525165656, "learning_rate": 4.935928629359286e-05, "loss": 0.687, "step": 3821 }, { "epoch": 0.11158798283261803, "grad_norm": 0.6042872268072713, "learning_rate": 4.9357664233576645e-05, "loss": 0.6324, "step": 3822 }, { "epoch": 0.1116171790604654, "grad_norm": 0.6725317138763817, "learning_rate": 4.935604217356043e-05, "loss": 0.6993, "step": 3823 }, { "epoch": 0.11164637528831275, "grad_norm": 0.620075553243937, "learning_rate": 4.93544201135442e-05, "loss": 0.5965, "step": 3824 }, { "epoch": 0.11167557151616012, "grad_norm": 0.6908678085244877, "learning_rate": 4.9352798053527984e-05, "loss": 0.7279, "step": 3825 }, { "epoch": 0.11170476774400748, "grad_norm": 0.6326886712648007, "learning_rate": 4.935117599351176e-05, "loss": 0.7071, "step": 3826 }, { "epoch": 0.11173396397185484, "grad_norm": 0.6190643670035909, "learning_rate": 4.934955393349554e-05, "loss": 0.6239, "step": 3827 }, { "epoch": 0.1117631601997022, "grad_norm": 0.6870582657819216, "learning_rate": 4.934793187347932e-05, "loss": 0.7042, "step": 3828 }, { "epoch": 0.11179235642754956, "grad_norm": 0.592459012625655, "learning_rate": 4.93463098134631e-05, "loss": 0.5715, "step": 3829 }, { "epoch": 0.11182155265539692, "grad_norm": 0.6764914885958049, "learning_rate": 4.934468775344688e-05, "loss": 0.6741, "step": 3830 }, { "epoch": 0.11185074888324428, "grad_norm": 0.6917766898552367, "learning_rate": 4.9343065693430654e-05, "loss": 0.7609, "step": 3831 }, { "epoch": 0.11187994511109164, "grad_norm": 0.6600528517799275, "learning_rate": 4.934144363341444e-05, "loss": 0.6535, "step": 3832 }, { "epoch": 0.111909141338939, "grad_norm": 0.7521024456085541, "learning_rate": 4.933982157339822e-05, "loss": 0.6357, "step": 3833 }, { "epoch": 0.11193833756678637, "grad_norm": 0.6321747888165526, "learning_rate": 4.9338199513382e-05, "loss": 0.6817, "step": 3834 }, { "epoch": 0.11196753379463373, "grad_norm": 0.6028634466147452, "learning_rate": 4.933657745336578e-05, "loss": 0.5967, "step": 3835 }, { "epoch": 0.11199673002248109, "grad_norm": 0.6021826278859527, "learning_rate": 4.9334955393349556e-05, "loss": 0.6368, "step": 3836 }, { "epoch": 0.11202592625032846, "grad_norm": 0.6414334692226014, "learning_rate": 4.933333333333334e-05, "loss": 0.5948, "step": 3837 }, { "epoch": 0.11205512247817583, "grad_norm": 0.6692645947058119, "learning_rate": 4.933171127331712e-05, "loss": 0.7088, "step": 3838 }, { "epoch": 0.11208431870602319, "grad_norm": 0.6762354048219947, "learning_rate": 4.9330089213300894e-05, "loss": 0.6856, "step": 3839 }, { "epoch": 0.11211351493387055, "grad_norm": 0.7025432458211551, "learning_rate": 4.9328467153284676e-05, "loss": 0.693, "step": 3840 }, { "epoch": 0.11214271116171791, "grad_norm": 0.7420776853352032, "learning_rate": 4.932684509326845e-05, "loss": 0.7447, "step": 3841 }, { "epoch": 0.11217190738956527, "grad_norm": 0.6799321684425135, "learning_rate": 4.932522303325223e-05, "loss": 0.7449, "step": 3842 }, { "epoch": 0.11220110361741263, "grad_norm": 0.7019889952386845, "learning_rate": 4.9323600973236015e-05, "loss": 0.7251, "step": 3843 }, { "epoch": 0.11223029984525999, "grad_norm": 0.642347396934504, "learning_rate": 4.932197891321979e-05, "loss": 0.5882, "step": 3844 }, { "epoch": 0.11225949607310735, "grad_norm": 0.6724903827798462, "learning_rate": 4.932035685320357e-05, "loss": 0.686, "step": 3845 }, { "epoch": 0.11228869230095471, "grad_norm": 0.6170570767515253, "learning_rate": 4.9318734793187346e-05, "loss": 0.6344, "step": 3846 }, { "epoch": 0.11231788852880208, "grad_norm": 0.6068682760901618, "learning_rate": 4.931711273317113e-05, "loss": 0.597, "step": 3847 }, { "epoch": 0.11234708475664944, "grad_norm": 0.6043741289843174, "learning_rate": 4.931549067315491e-05, "loss": 0.6742, "step": 3848 }, { "epoch": 0.1123762809844968, "grad_norm": 0.630208466164579, "learning_rate": 4.9313868613138685e-05, "loss": 0.6854, "step": 3849 }, { "epoch": 0.11240547721234416, "grad_norm": 0.6487578066288076, "learning_rate": 4.931224655312247e-05, "loss": 0.707, "step": 3850 }, { "epoch": 0.11243467344019152, "grad_norm": 0.6586123566876674, "learning_rate": 4.931062449310625e-05, "loss": 0.7131, "step": 3851 }, { "epoch": 0.1124638696680389, "grad_norm": 0.6757667249423733, "learning_rate": 4.930900243309003e-05, "loss": 0.703, "step": 3852 }, { "epoch": 0.11249306589588626, "grad_norm": 0.6707496541527014, "learning_rate": 4.9307380373073805e-05, "loss": 0.6717, "step": 3853 }, { "epoch": 0.11252226212373362, "grad_norm": 0.6430646588146784, "learning_rate": 4.930575831305759e-05, "loss": 0.66, "step": 3854 }, { "epoch": 0.11255145835158098, "grad_norm": 0.6597159193353439, "learning_rate": 4.930413625304137e-05, "loss": 0.6853, "step": 3855 }, { "epoch": 0.11258065457942834, "grad_norm": 0.6711961365020922, "learning_rate": 4.9302514193025144e-05, "loss": 0.7142, "step": 3856 }, { "epoch": 0.1126098508072757, "grad_norm": 0.6523745768528741, "learning_rate": 4.9300892133008925e-05, "loss": 0.6924, "step": 3857 }, { "epoch": 0.11263904703512306, "grad_norm": 0.6502939296894208, "learning_rate": 4.92992700729927e-05, "loss": 0.6894, "step": 3858 }, { "epoch": 0.11266824326297042, "grad_norm": 0.6381647296695696, "learning_rate": 4.929764801297648e-05, "loss": 0.6479, "step": 3859 }, { "epoch": 0.11269743949081779, "grad_norm": 0.7982002841717536, "learning_rate": 4.9296025952960264e-05, "loss": 0.8155, "step": 3860 }, { "epoch": 0.11272663571866515, "grad_norm": 0.6490030863010668, "learning_rate": 4.929440389294404e-05, "loss": 0.7098, "step": 3861 }, { "epoch": 0.11275583194651251, "grad_norm": 0.7957612183388724, "learning_rate": 4.929278183292782e-05, "loss": 0.7498, "step": 3862 }, { "epoch": 0.11278502817435987, "grad_norm": 0.5872224473374957, "learning_rate": 4.92911597729116e-05, "loss": 0.6338, "step": 3863 }, { "epoch": 0.11281422440220723, "grad_norm": 0.5984780840606506, "learning_rate": 4.928953771289538e-05, "loss": 0.5488, "step": 3864 }, { "epoch": 0.11284342063005459, "grad_norm": 0.6545504636018603, "learning_rate": 4.928791565287916e-05, "loss": 0.6662, "step": 3865 }, { "epoch": 0.11287261685790195, "grad_norm": 0.6608944673704392, "learning_rate": 4.9286293592862934e-05, "loss": 0.7226, "step": 3866 }, { "epoch": 0.11290181308574933, "grad_norm": 0.6235113740168484, "learning_rate": 4.9284671532846716e-05, "loss": 0.6474, "step": 3867 }, { "epoch": 0.11293100931359669, "grad_norm": 0.7575924923858306, "learning_rate": 4.92830494728305e-05, "loss": 0.8683, "step": 3868 }, { "epoch": 0.11296020554144405, "grad_norm": 0.6757266603189857, "learning_rate": 4.928142741281427e-05, "loss": 0.686, "step": 3869 }, { "epoch": 0.11298940176929141, "grad_norm": 0.665420945994238, "learning_rate": 4.927980535279806e-05, "loss": 0.7009, "step": 3870 }, { "epoch": 0.11301859799713877, "grad_norm": 0.5642310341078007, "learning_rate": 4.9278183292781836e-05, "loss": 0.5368, "step": 3871 }, { "epoch": 0.11304779422498613, "grad_norm": 0.6577907020074437, "learning_rate": 4.927656123276562e-05, "loss": 0.7159, "step": 3872 }, { "epoch": 0.1130769904528335, "grad_norm": 0.6443667884720014, "learning_rate": 4.927493917274939e-05, "loss": 0.6822, "step": 3873 }, { "epoch": 0.11310618668068086, "grad_norm": 0.6896774438103358, "learning_rate": 4.9273317112733175e-05, "loss": 0.7121, "step": 3874 }, { "epoch": 0.11313538290852822, "grad_norm": 0.7551160038931776, "learning_rate": 4.9271695052716956e-05, "loss": 0.6752, "step": 3875 }, { "epoch": 0.11316457913637558, "grad_norm": 0.7311241721674449, "learning_rate": 4.927007299270073e-05, "loss": 0.729, "step": 3876 }, { "epoch": 0.11319377536422294, "grad_norm": 0.6838955405886028, "learning_rate": 4.926845093268451e-05, "loss": 0.714, "step": 3877 }, { "epoch": 0.1132229715920703, "grad_norm": 0.5913987124823021, "learning_rate": 4.926682887266829e-05, "loss": 0.5798, "step": 3878 }, { "epoch": 0.11325216781991766, "grad_norm": 0.6036560563397728, "learning_rate": 4.926520681265207e-05, "loss": 0.6803, "step": 3879 }, { "epoch": 0.11328136404776502, "grad_norm": 0.6588292138619903, "learning_rate": 4.926358475263585e-05, "loss": 0.6907, "step": 3880 }, { "epoch": 0.11331056027561238, "grad_norm": 0.6013992370452075, "learning_rate": 4.926196269261963e-05, "loss": 0.6759, "step": 3881 }, { "epoch": 0.11333975650345976, "grad_norm": 0.653757043575489, "learning_rate": 4.926034063260341e-05, "loss": 0.7177, "step": 3882 }, { "epoch": 0.11336895273130712, "grad_norm": 0.6061612917843897, "learning_rate": 4.925871857258719e-05, "loss": 0.6097, "step": 3883 }, { "epoch": 0.11339814895915448, "grad_norm": 0.6302781861877131, "learning_rate": 4.9257096512570965e-05, "loss": 0.6793, "step": 3884 }, { "epoch": 0.11342734518700184, "grad_norm": 0.6423720605974895, "learning_rate": 4.925547445255475e-05, "loss": 0.613, "step": 3885 }, { "epoch": 0.1134565414148492, "grad_norm": 0.6558457578322654, "learning_rate": 4.925385239253852e-05, "loss": 0.7345, "step": 3886 }, { "epoch": 0.11348573764269657, "grad_norm": 0.6889426280053084, "learning_rate": 4.9252230332522304e-05, "loss": 0.724, "step": 3887 }, { "epoch": 0.11351493387054393, "grad_norm": 0.6295432658968545, "learning_rate": 4.9250608272506085e-05, "loss": 0.6486, "step": 3888 }, { "epoch": 0.11354413009839129, "grad_norm": 0.6833773801750487, "learning_rate": 4.924898621248987e-05, "loss": 0.7241, "step": 3889 }, { "epoch": 0.11357332632623865, "grad_norm": 0.6715312411457888, "learning_rate": 4.924736415247365e-05, "loss": 0.7574, "step": 3890 }, { "epoch": 0.11360252255408601, "grad_norm": 0.7066006292160583, "learning_rate": 4.9245742092457424e-05, "loss": 0.7588, "step": 3891 }, { "epoch": 0.11363171878193337, "grad_norm": 0.6444307228142773, "learning_rate": 4.9244120032441206e-05, "loss": 0.6716, "step": 3892 }, { "epoch": 0.11366091500978073, "grad_norm": 0.6291784938744545, "learning_rate": 4.924249797242498e-05, "loss": 0.6729, "step": 3893 }, { "epoch": 0.1136901112376281, "grad_norm": 0.5996769235519347, "learning_rate": 4.924087591240876e-05, "loss": 0.6126, "step": 3894 }, { "epoch": 0.11371930746547546, "grad_norm": 0.6304844442885172, "learning_rate": 4.9239253852392544e-05, "loss": 0.7134, "step": 3895 }, { "epoch": 0.11374850369332282, "grad_norm": 0.6115239769184163, "learning_rate": 4.923763179237632e-05, "loss": 0.6205, "step": 3896 }, { "epoch": 0.11377769992117019, "grad_norm": 0.6814834944870386, "learning_rate": 4.92360097323601e-05, "loss": 0.7488, "step": 3897 }, { "epoch": 0.11380689614901755, "grad_norm": 0.7480077138333282, "learning_rate": 4.9234387672343876e-05, "loss": 0.7135, "step": 3898 }, { "epoch": 0.11383609237686491, "grad_norm": 0.6685223728407091, "learning_rate": 4.923276561232766e-05, "loss": 0.7538, "step": 3899 }, { "epoch": 0.11386528860471228, "grad_norm": 0.6681763902840001, "learning_rate": 4.923114355231144e-05, "loss": 0.6673, "step": 3900 }, { "epoch": 0.11389448483255964, "grad_norm": 0.7288777366008256, "learning_rate": 4.9229521492295215e-05, "loss": 0.8258, "step": 3901 }, { "epoch": 0.113923681060407, "grad_norm": 0.5914328296514214, "learning_rate": 4.9227899432278996e-05, "loss": 0.6398, "step": 3902 }, { "epoch": 0.11395287728825436, "grad_norm": 0.6343829492688148, "learning_rate": 4.922627737226277e-05, "loss": 0.6822, "step": 3903 }, { "epoch": 0.11398207351610172, "grad_norm": 0.602345295607695, "learning_rate": 4.922465531224655e-05, "loss": 0.6522, "step": 3904 }, { "epoch": 0.11401126974394908, "grad_norm": 0.6249347083522391, "learning_rate": 4.9223033252230335e-05, "loss": 0.656, "step": 3905 }, { "epoch": 0.11404046597179644, "grad_norm": 0.7151538310022256, "learning_rate": 4.922141119221411e-05, "loss": 0.69, "step": 3906 }, { "epoch": 0.1140696621996438, "grad_norm": 0.7007069435574425, "learning_rate": 4.921978913219789e-05, "loss": 0.7276, "step": 3907 }, { "epoch": 0.11409885842749116, "grad_norm": 0.7627389667301596, "learning_rate": 4.921816707218167e-05, "loss": 0.8031, "step": 3908 }, { "epoch": 0.11412805465533853, "grad_norm": 0.712524683416839, "learning_rate": 4.9216545012165455e-05, "loss": 0.6847, "step": 3909 }, { "epoch": 0.11415725088318589, "grad_norm": 0.6461640813352114, "learning_rate": 4.921492295214924e-05, "loss": 0.7362, "step": 3910 }, { "epoch": 0.11418644711103325, "grad_norm": 0.7103269241610639, "learning_rate": 4.921330089213301e-05, "loss": 0.7367, "step": 3911 }, { "epoch": 0.11421564333888061, "grad_norm": 0.7133524554237218, "learning_rate": 4.9211678832116794e-05, "loss": 0.7586, "step": 3912 }, { "epoch": 0.11424483956672798, "grad_norm": 0.6996636502283831, "learning_rate": 4.921005677210057e-05, "loss": 0.6963, "step": 3913 }, { "epoch": 0.11427403579457535, "grad_norm": 0.6076405750400516, "learning_rate": 4.920843471208435e-05, "loss": 0.5996, "step": 3914 }, { "epoch": 0.11430323202242271, "grad_norm": 0.6483227305419337, "learning_rate": 4.920681265206813e-05, "loss": 0.6859, "step": 3915 }, { "epoch": 0.11433242825027007, "grad_norm": 0.9969111007239625, "learning_rate": 4.920519059205191e-05, "loss": 0.7537, "step": 3916 }, { "epoch": 0.11436162447811743, "grad_norm": 0.6632236224165771, "learning_rate": 4.920356853203569e-05, "loss": 0.7452, "step": 3917 }, { "epoch": 0.11439082070596479, "grad_norm": 0.674544845760926, "learning_rate": 4.9201946472019464e-05, "loss": 0.7299, "step": 3918 }, { "epoch": 0.11442001693381215, "grad_norm": 0.6227097173393024, "learning_rate": 4.9200324412003246e-05, "loss": 0.6864, "step": 3919 }, { "epoch": 0.11444921316165951, "grad_norm": 0.7021993216263295, "learning_rate": 4.919870235198703e-05, "loss": 0.7696, "step": 3920 }, { "epoch": 0.11447840938950687, "grad_norm": 0.7150555752379828, "learning_rate": 4.91970802919708e-05, "loss": 0.8083, "step": 3921 }, { "epoch": 0.11450760561735424, "grad_norm": 0.6244600882611342, "learning_rate": 4.9195458231954584e-05, "loss": 0.6649, "step": 3922 }, { "epoch": 0.1145368018452016, "grad_norm": 0.6716311896893623, "learning_rate": 4.919383617193836e-05, "loss": 0.7586, "step": 3923 }, { "epoch": 0.11456599807304896, "grad_norm": 0.6836535469392091, "learning_rate": 4.919221411192214e-05, "loss": 0.6999, "step": 3924 }, { "epoch": 0.11459519430089632, "grad_norm": 0.5460985778037591, "learning_rate": 4.919059205190592e-05, "loss": 0.5751, "step": 3925 }, { "epoch": 0.11462439052874368, "grad_norm": 0.652199127239031, "learning_rate": 4.91889699918897e-05, "loss": 0.7185, "step": 3926 }, { "epoch": 0.11465358675659104, "grad_norm": 0.7463853380048089, "learning_rate": 4.9187347931873486e-05, "loss": 0.7425, "step": 3927 }, { "epoch": 0.11468278298443842, "grad_norm": 0.6057493375582162, "learning_rate": 4.918572587185726e-05, "loss": 0.6716, "step": 3928 }, { "epoch": 0.11471197921228578, "grad_norm": 0.6422624348150556, "learning_rate": 4.918410381184104e-05, "loss": 0.7452, "step": 3929 }, { "epoch": 0.11474117544013314, "grad_norm": 0.6943431445030884, "learning_rate": 4.9182481751824825e-05, "loss": 0.7512, "step": 3930 }, { "epoch": 0.1147703716679805, "grad_norm": 0.694864967511875, "learning_rate": 4.91808596918086e-05, "loss": 0.7138, "step": 3931 }, { "epoch": 0.11479956789582786, "grad_norm": 0.6459194113443816, "learning_rate": 4.917923763179238e-05, "loss": 0.6422, "step": 3932 }, { "epoch": 0.11482876412367522, "grad_norm": 0.6628200514143757, "learning_rate": 4.9177615571776156e-05, "loss": 0.7344, "step": 3933 }, { "epoch": 0.11485796035152258, "grad_norm": 0.6593493731993157, "learning_rate": 4.917599351175994e-05, "loss": 0.73, "step": 3934 }, { "epoch": 0.11488715657936995, "grad_norm": 0.6951180082355657, "learning_rate": 4.917437145174372e-05, "loss": 0.8221, "step": 3935 }, { "epoch": 0.1149163528072173, "grad_norm": 0.6223197384063424, "learning_rate": 4.9172749391727495e-05, "loss": 0.6826, "step": 3936 }, { "epoch": 0.11494554903506467, "grad_norm": 0.6271553506823673, "learning_rate": 4.917112733171128e-05, "loss": 0.7107, "step": 3937 }, { "epoch": 0.11497474526291203, "grad_norm": 0.7077571693076774, "learning_rate": 4.916950527169505e-05, "loss": 0.6853, "step": 3938 }, { "epoch": 0.11500394149075939, "grad_norm": 0.6377328781537365, "learning_rate": 4.916788321167883e-05, "loss": 0.6383, "step": 3939 }, { "epoch": 0.11503313771860675, "grad_norm": 0.627514705493944, "learning_rate": 4.9166261151662615e-05, "loss": 0.6981, "step": 3940 }, { "epoch": 0.11506233394645411, "grad_norm": 0.6026059832907483, "learning_rate": 4.916463909164639e-05, "loss": 0.6752, "step": 3941 }, { "epoch": 0.11509153017430147, "grad_norm": 0.6413865739429885, "learning_rate": 4.916301703163017e-05, "loss": 0.6868, "step": 3942 }, { "epoch": 0.11512072640214885, "grad_norm": 0.5871744630483345, "learning_rate": 4.916139497161395e-05, "loss": 0.626, "step": 3943 }, { "epoch": 0.11514992262999621, "grad_norm": 0.6891734500992486, "learning_rate": 4.915977291159773e-05, "loss": 0.7178, "step": 3944 }, { "epoch": 0.11517911885784357, "grad_norm": 0.6360798590639966, "learning_rate": 4.915815085158151e-05, "loss": 0.7147, "step": 3945 }, { "epoch": 0.11520831508569093, "grad_norm": 0.594637648622307, "learning_rate": 4.915652879156529e-05, "loss": 0.5973, "step": 3946 }, { "epoch": 0.1152375113135383, "grad_norm": 0.6477275302970329, "learning_rate": 4.9154906731549074e-05, "loss": 0.6805, "step": 3947 }, { "epoch": 0.11526670754138565, "grad_norm": 0.7176917014485821, "learning_rate": 4.915328467153285e-05, "loss": 0.7486, "step": 3948 }, { "epoch": 0.11529590376923302, "grad_norm": 0.6950928131529887, "learning_rate": 4.915166261151663e-05, "loss": 0.7796, "step": 3949 }, { "epoch": 0.11532509999708038, "grad_norm": 0.6515883156009887, "learning_rate": 4.915004055150041e-05, "loss": 0.6836, "step": 3950 }, { "epoch": 0.11535429622492774, "grad_norm": 0.7142388477998292, "learning_rate": 4.914841849148419e-05, "loss": 0.8424, "step": 3951 }, { "epoch": 0.1153834924527751, "grad_norm": 0.6629350799365669, "learning_rate": 4.914679643146797e-05, "loss": 0.5775, "step": 3952 }, { "epoch": 0.11541268868062246, "grad_norm": 0.6785908460714033, "learning_rate": 4.9145174371451744e-05, "loss": 0.7089, "step": 3953 }, { "epoch": 0.11544188490846982, "grad_norm": 0.6211327000371212, "learning_rate": 4.9143552311435526e-05, "loss": 0.6387, "step": 3954 }, { "epoch": 0.11547108113631718, "grad_norm": 0.6665009764667814, "learning_rate": 4.914193025141931e-05, "loss": 0.7121, "step": 3955 }, { "epoch": 0.11550027736416454, "grad_norm": 0.5888685628941979, "learning_rate": 4.914030819140308e-05, "loss": 0.5907, "step": 3956 }, { "epoch": 0.1155294735920119, "grad_norm": 0.7831964654863247, "learning_rate": 4.9138686131386864e-05, "loss": 0.6756, "step": 3957 }, { "epoch": 0.11555866981985928, "grad_norm": 0.6417511581275425, "learning_rate": 4.913706407137064e-05, "loss": 0.6148, "step": 3958 }, { "epoch": 0.11558786604770664, "grad_norm": 0.6171017988388445, "learning_rate": 4.913544201135442e-05, "loss": 0.6302, "step": 3959 }, { "epoch": 0.115617062275554, "grad_norm": 0.6680607512992206, "learning_rate": 4.91338199513382e-05, "loss": 0.6722, "step": 3960 }, { "epoch": 0.11564625850340136, "grad_norm": 0.6939631471476808, "learning_rate": 4.913219789132198e-05, "loss": 0.7773, "step": 3961 }, { "epoch": 0.11567545473124873, "grad_norm": 0.7085857369891765, "learning_rate": 4.913057583130576e-05, "loss": 0.7916, "step": 3962 }, { "epoch": 0.11570465095909609, "grad_norm": 0.6648460529206587, "learning_rate": 4.9128953771289535e-05, "loss": 0.8179, "step": 3963 }, { "epoch": 0.11573384718694345, "grad_norm": 0.6869280546980111, "learning_rate": 4.912733171127332e-05, "loss": 0.7177, "step": 3964 }, { "epoch": 0.11576304341479081, "grad_norm": 0.7371786971591392, "learning_rate": 4.91257096512571e-05, "loss": 0.6936, "step": 3965 }, { "epoch": 0.11579223964263817, "grad_norm": 0.6832344769967514, "learning_rate": 4.912408759124088e-05, "loss": 0.8002, "step": 3966 }, { "epoch": 0.11582143587048553, "grad_norm": 0.6457039678064462, "learning_rate": 4.912246553122466e-05, "loss": 0.7342, "step": 3967 }, { "epoch": 0.11585063209833289, "grad_norm": 0.6209790875862379, "learning_rate": 4.912084347120844e-05, "loss": 0.6731, "step": 3968 }, { "epoch": 0.11587982832618025, "grad_norm": 0.6065446426608168, "learning_rate": 4.911922141119222e-05, "loss": 0.6898, "step": 3969 }, { "epoch": 0.11590902455402761, "grad_norm": 0.6618696279028934, "learning_rate": 4.9117599351176e-05, "loss": 0.8082, "step": 3970 }, { "epoch": 0.11593822078187498, "grad_norm": 0.6599004822193596, "learning_rate": 4.9115977291159775e-05, "loss": 0.7721, "step": 3971 }, { "epoch": 0.11596741700972234, "grad_norm": 0.6272447872196504, "learning_rate": 4.911435523114356e-05, "loss": 0.6977, "step": 3972 }, { "epoch": 0.11599661323756971, "grad_norm": 0.6570793418429907, "learning_rate": 4.911273317112733e-05, "loss": 0.6695, "step": 3973 }, { "epoch": 0.11602580946541707, "grad_norm": 0.7606318536638177, "learning_rate": 4.9111111111111114e-05, "loss": 0.747, "step": 3974 }, { "epoch": 0.11605500569326443, "grad_norm": 0.6020131448859324, "learning_rate": 4.9109489051094895e-05, "loss": 0.63, "step": 3975 }, { "epoch": 0.1160842019211118, "grad_norm": 0.7418658580244836, "learning_rate": 4.910786699107867e-05, "loss": 0.7849, "step": 3976 }, { "epoch": 0.11611339814895916, "grad_norm": 0.6443315520735994, "learning_rate": 4.910624493106245e-05, "loss": 0.7477, "step": 3977 }, { "epoch": 0.11614259437680652, "grad_norm": 0.6238276217586183, "learning_rate": 4.910462287104623e-05, "loss": 0.6381, "step": 3978 }, { "epoch": 0.11617179060465388, "grad_norm": 0.7063571187290274, "learning_rate": 4.910300081103001e-05, "loss": 0.7956, "step": 3979 }, { "epoch": 0.11620098683250124, "grad_norm": 0.6889012258926164, "learning_rate": 4.910137875101379e-05, "loss": 0.7828, "step": 3980 }, { "epoch": 0.1162301830603486, "grad_norm": 1.0441479623341574, "learning_rate": 4.9099756690997566e-05, "loss": 0.6862, "step": 3981 }, { "epoch": 0.11625937928819596, "grad_norm": 0.7110158487224943, "learning_rate": 4.909813463098135e-05, "loss": 0.7746, "step": 3982 }, { "epoch": 0.11628857551604332, "grad_norm": 0.6483861711887265, "learning_rate": 4.909651257096513e-05, "loss": 0.7266, "step": 3983 }, { "epoch": 0.11631777174389069, "grad_norm": 0.6546323397989711, "learning_rate": 4.909489051094891e-05, "loss": 0.6839, "step": 3984 }, { "epoch": 0.11634696797173805, "grad_norm": 0.7343415377273255, "learning_rate": 4.9093268450932686e-05, "loss": 0.6737, "step": 3985 }, { "epoch": 0.11637616419958541, "grad_norm": 0.659355418800026, "learning_rate": 4.909164639091647e-05, "loss": 0.6905, "step": 3986 }, { "epoch": 0.11640536042743277, "grad_norm": 0.5976303117587438, "learning_rate": 4.909002433090025e-05, "loss": 0.592, "step": 3987 }, { "epoch": 0.11643455665528014, "grad_norm": 0.6074967989313573, "learning_rate": 4.9088402270884025e-05, "loss": 0.6538, "step": 3988 }, { "epoch": 0.1164637528831275, "grad_norm": 0.6419496166287711, "learning_rate": 4.9086780210867806e-05, "loss": 0.7011, "step": 3989 }, { "epoch": 0.11649294911097487, "grad_norm": 0.6148741994280718, "learning_rate": 4.908515815085158e-05, "loss": 0.6525, "step": 3990 }, { "epoch": 0.11652214533882223, "grad_norm": 1.1368939552594486, "learning_rate": 4.908353609083536e-05, "loss": 0.7425, "step": 3991 }, { "epoch": 0.11655134156666959, "grad_norm": 0.5668865545730893, "learning_rate": 4.9081914030819145e-05, "loss": 0.5762, "step": 3992 }, { "epoch": 0.11658053779451695, "grad_norm": 0.7091692969378375, "learning_rate": 4.908029197080292e-05, "loss": 0.6838, "step": 3993 }, { "epoch": 0.11660973402236431, "grad_norm": 0.6329342633671927, "learning_rate": 4.90786699107867e-05, "loss": 0.6012, "step": 3994 }, { "epoch": 0.11663893025021167, "grad_norm": 0.6663094566930938, "learning_rate": 4.907704785077048e-05, "loss": 0.6658, "step": 3995 }, { "epoch": 0.11666812647805903, "grad_norm": 0.6030951487149957, "learning_rate": 4.907542579075426e-05, "loss": 0.6062, "step": 3996 }, { "epoch": 0.1166973227059064, "grad_norm": 0.733841772523598, "learning_rate": 4.907380373073804e-05, "loss": 0.7909, "step": 3997 }, { "epoch": 0.11672651893375376, "grad_norm": 0.6022667431703858, "learning_rate": 4.9072181670721815e-05, "loss": 0.6255, "step": 3998 }, { "epoch": 0.11675571516160112, "grad_norm": 0.643530977395522, "learning_rate": 4.90705596107056e-05, "loss": 0.6494, "step": 3999 }, { "epoch": 0.11678491138944848, "grad_norm": 0.6879764265661273, "learning_rate": 4.906893755068938e-05, "loss": 0.6982, "step": 4000 }, { "epoch": 0.11681410761729584, "grad_norm": 0.6404490638157212, "learning_rate": 4.9067315490673154e-05, "loss": 0.7157, "step": 4001 }, { "epoch": 0.1168433038451432, "grad_norm": 0.6415501855433049, "learning_rate": 4.906569343065694e-05, "loss": 0.7383, "step": 4002 }, { "epoch": 0.11687250007299058, "grad_norm": 0.5838159464167545, "learning_rate": 4.906407137064072e-05, "loss": 0.5525, "step": 4003 }, { "epoch": 0.11690169630083794, "grad_norm": 0.6082503347271128, "learning_rate": 4.90624493106245e-05, "loss": 0.6247, "step": 4004 }, { "epoch": 0.1169308925286853, "grad_norm": 0.662426080431196, "learning_rate": 4.9060827250608274e-05, "loss": 0.6521, "step": 4005 }, { "epoch": 0.11696008875653266, "grad_norm": 0.9541565838777651, "learning_rate": 4.9059205190592056e-05, "loss": 0.7983, "step": 4006 }, { "epoch": 0.11698928498438002, "grad_norm": 0.6057647205392888, "learning_rate": 4.905758313057584e-05, "loss": 0.6877, "step": 4007 }, { "epoch": 0.11701848121222738, "grad_norm": 0.6603613453097449, "learning_rate": 4.905596107055961e-05, "loss": 0.7284, "step": 4008 }, { "epoch": 0.11704767744007474, "grad_norm": 0.6758657034674665, "learning_rate": 4.9054339010543394e-05, "loss": 0.679, "step": 4009 }, { "epoch": 0.1170768736679221, "grad_norm": 0.6047273893983889, "learning_rate": 4.905271695052717e-05, "loss": 0.6405, "step": 4010 }, { "epoch": 0.11710606989576947, "grad_norm": 0.6048233803253406, "learning_rate": 4.905109489051095e-05, "loss": 0.6466, "step": 4011 }, { "epoch": 0.11713526612361683, "grad_norm": 1.4587193428077874, "learning_rate": 4.904947283049473e-05, "loss": 0.6644, "step": 4012 }, { "epoch": 0.11716446235146419, "grad_norm": 0.669688635161106, "learning_rate": 4.904785077047851e-05, "loss": 0.7218, "step": 4013 }, { "epoch": 0.11719365857931155, "grad_norm": 0.6909770585905556, "learning_rate": 4.904622871046229e-05, "loss": 0.7494, "step": 4014 }, { "epoch": 0.11722285480715891, "grad_norm": 0.6256996991565785, "learning_rate": 4.904460665044607e-05, "loss": 0.6851, "step": 4015 }, { "epoch": 0.11725205103500627, "grad_norm": 0.6936998979549832, "learning_rate": 4.9042984590429846e-05, "loss": 0.747, "step": 4016 }, { "epoch": 0.11728124726285363, "grad_norm": 0.6243639935419077, "learning_rate": 4.904136253041363e-05, "loss": 0.6815, "step": 4017 }, { "epoch": 0.11731044349070101, "grad_norm": 0.6299551321761893, "learning_rate": 4.90397404703974e-05, "loss": 0.6235, "step": 4018 }, { "epoch": 0.11733963971854837, "grad_norm": 0.6097990714433263, "learning_rate": 4.9038118410381185e-05, "loss": 0.6749, "step": 4019 }, { "epoch": 0.11736883594639573, "grad_norm": 0.6316470963723835, "learning_rate": 4.9036496350364966e-05, "loss": 0.7091, "step": 4020 }, { "epoch": 0.11739803217424309, "grad_norm": 0.6798196695001392, "learning_rate": 4.903487429034875e-05, "loss": 0.7606, "step": 4021 }, { "epoch": 0.11742722840209045, "grad_norm": 0.6170263409526395, "learning_rate": 4.903325223033253e-05, "loss": 0.6645, "step": 4022 }, { "epoch": 0.11745642462993781, "grad_norm": 0.6563967401756052, "learning_rate": 4.9031630170316305e-05, "loss": 0.7456, "step": 4023 }, { "epoch": 0.11748562085778518, "grad_norm": 0.6428938000180718, "learning_rate": 4.903000811030009e-05, "loss": 0.6808, "step": 4024 }, { "epoch": 0.11751481708563254, "grad_norm": 0.6344943630949506, "learning_rate": 4.902838605028386e-05, "loss": 0.71, "step": 4025 }, { "epoch": 0.1175440133134799, "grad_norm": 0.6781741703811934, "learning_rate": 4.9026763990267643e-05, "loss": 0.6596, "step": 4026 }, { "epoch": 0.11757320954132726, "grad_norm": 1.0096744468387673, "learning_rate": 4.9025141930251425e-05, "loss": 0.7803, "step": 4027 }, { "epoch": 0.11760240576917462, "grad_norm": 0.6794256885456036, "learning_rate": 4.90235198702352e-05, "loss": 0.7056, "step": 4028 }, { "epoch": 0.11763160199702198, "grad_norm": 0.641782684221526, "learning_rate": 4.902189781021898e-05, "loss": 0.6794, "step": 4029 }, { "epoch": 0.11766079822486934, "grad_norm": 0.6382287246415667, "learning_rate": 4.902027575020276e-05, "loss": 0.6338, "step": 4030 }, { "epoch": 0.1176899944527167, "grad_norm": 0.6503679493314563, "learning_rate": 4.901865369018654e-05, "loss": 0.6798, "step": 4031 }, { "epoch": 0.11771919068056406, "grad_norm": 0.6366842753517544, "learning_rate": 4.901703163017032e-05, "loss": 0.6963, "step": 4032 }, { "epoch": 0.11774838690841144, "grad_norm": 0.6048354486838821, "learning_rate": 4.9015409570154095e-05, "loss": 0.634, "step": 4033 }, { "epoch": 0.1177775831362588, "grad_norm": 0.630032227406886, "learning_rate": 4.901378751013788e-05, "loss": 0.7418, "step": 4034 }, { "epoch": 0.11780677936410616, "grad_norm": 0.6744938887243932, "learning_rate": 4.901216545012165e-05, "loss": 0.7361, "step": 4035 }, { "epoch": 0.11783597559195352, "grad_norm": 0.7663738712538423, "learning_rate": 4.9010543390105434e-05, "loss": 0.7104, "step": 4036 }, { "epoch": 0.11786517181980088, "grad_norm": 0.6504429359407997, "learning_rate": 4.9008921330089216e-05, "loss": 0.6543, "step": 4037 }, { "epoch": 0.11789436804764825, "grad_norm": 0.8309382897597607, "learning_rate": 4.900729927007299e-05, "loss": 0.8448, "step": 4038 }, { "epoch": 0.11792356427549561, "grad_norm": 0.6286708821552188, "learning_rate": 4.900567721005677e-05, "loss": 0.6712, "step": 4039 }, { "epoch": 0.11795276050334297, "grad_norm": 0.6619973591711407, "learning_rate": 4.9004055150040554e-05, "loss": 0.8031, "step": 4040 }, { "epoch": 0.11798195673119033, "grad_norm": 0.5925244777515055, "learning_rate": 4.9002433090024336e-05, "loss": 0.667, "step": 4041 }, { "epoch": 0.11801115295903769, "grad_norm": 0.7180202993518975, "learning_rate": 4.900081103000812e-05, "loss": 0.6973, "step": 4042 }, { "epoch": 0.11804034918688505, "grad_norm": 0.6557701271179693, "learning_rate": 4.899918896999189e-05, "loss": 0.7322, "step": 4043 }, { "epoch": 0.11806954541473241, "grad_norm": 0.6312352998043406, "learning_rate": 4.8997566909975674e-05, "loss": 0.6456, "step": 4044 }, { "epoch": 0.11809874164257977, "grad_norm": 1.1129202300329235, "learning_rate": 4.899594484995945e-05, "loss": 0.7329, "step": 4045 }, { "epoch": 0.11812793787042714, "grad_norm": 0.5860163484925865, "learning_rate": 4.899432278994323e-05, "loss": 0.6419, "step": 4046 }, { "epoch": 0.1181571340982745, "grad_norm": 0.5982796120589391, "learning_rate": 4.899270072992701e-05, "loss": 0.6597, "step": 4047 }, { "epoch": 0.11818633032612187, "grad_norm": 0.6449857905099279, "learning_rate": 4.899107866991079e-05, "loss": 0.7033, "step": 4048 }, { "epoch": 0.11821552655396923, "grad_norm": 0.6814272445303369, "learning_rate": 4.898945660989457e-05, "loss": 0.7578, "step": 4049 }, { "epoch": 0.1182447227818166, "grad_norm": 0.6564535641258924, "learning_rate": 4.8987834549878345e-05, "loss": 0.6928, "step": 4050 }, { "epoch": 0.11827391900966396, "grad_norm": 0.6243244931147741, "learning_rate": 4.8986212489862126e-05, "loss": 0.6445, "step": 4051 }, { "epoch": 0.11830311523751132, "grad_norm": 0.7218744307106465, "learning_rate": 4.898459042984591e-05, "loss": 0.7293, "step": 4052 }, { "epoch": 0.11833231146535868, "grad_norm": 0.6219981151434637, "learning_rate": 4.898296836982968e-05, "loss": 0.6534, "step": 4053 }, { "epoch": 0.11836150769320604, "grad_norm": 0.6440941058225373, "learning_rate": 4.8981346309813465e-05, "loss": 0.712, "step": 4054 }, { "epoch": 0.1183907039210534, "grad_norm": 0.655351359397994, "learning_rate": 4.897972424979724e-05, "loss": 0.6707, "step": 4055 }, { "epoch": 0.11841990014890076, "grad_norm": 0.6236717117927835, "learning_rate": 4.897810218978102e-05, "loss": 0.6998, "step": 4056 }, { "epoch": 0.11844909637674812, "grad_norm": 0.6243432161258146, "learning_rate": 4.8976480129764803e-05, "loss": 0.6914, "step": 4057 }, { "epoch": 0.11847829260459548, "grad_norm": 0.8284671816569854, "learning_rate": 4.897485806974858e-05, "loss": 0.7787, "step": 4058 }, { "epoch": 0.11850748883244284, "grad_norm": 0.6440239622159417, "learning_rate": 4.897323600973237e-05, "loss": 0.7455, "step": 4059 }, { "epoch": 0.1185366850602902, "grad_norm": 0.6138514583844135, "learning_rate": 4.897161394971614e-05, "loss": 0.6901, "step": 4060 }, { "epoch": 0.11856588128813757, "grad_norm": 0.6449635751858045, "learning_rate": 4.8969991889699924e-05, "loss": 0.7519, "step": 4061 }, { "epoch": 0.11859507751598493, "grad_norm": 0.6518336719305198, "learning_rate": 4.8968369829683706e-05, "loss": 0.7292, "step": 4062 }, { "epoch": 0.1186242737438323, "grad_norm": 0.5863040223322881, "learning_rate": 4.896674776966748e-05, "loss": 0.5954, "step": 4063 }, { "epoch": 0.11865346997167966, "grad_norm": 0.6173941003023483, "learning_rate": 4.896512570965126e-05, "loss": 0.6711, "step": 4064 }, { "epoch": 0.11868266619952703, "grad_norm": 0.6787245470754256, "learning_rate": 4.896350364963504e-05, "loss": 0.7172, "step": 4065 }, { "epoch": 0.11871186242737439, "grad_norm": 0.6849908025160932, "learning_rate": 4.896188158961882e-05, "loss": 0.7586, "step": 4066 }, { "epoch": 0.11874105865522175, "grad_norm": 0.5926690287461507, "learning_rate": 4.89602595296026e-05, "loss": 0.6553, "step": 4067 }, { "epoch": 0.11877025488306911, "grad_norm": 0.8203858743676977, "learning_rate": 4.8958637469586376e-05, "loss": 0.7265, "step": 4068 }, { "epoch": 0.11879945111091647, "grad_norm": 0.6215464332959282, "learning_rate": 4.895701540957016e-05, "loss": 0.6677, "step": 4069 }, { "epoch": 0.11882864733876383, "grad_norm": 0.7383129702400423, "learning_rate": 4.895539334955393e-05, "loss": 0.6561, "step": 4070 }, { "epoch": 0.1188578435666112, "grad_norm": 0.811141800218298, "learning_rate": 4.8953771289537714e-05, "loss": 0.6809, "step": 4071 }, { "epoch": 0.11888703979445855, "grad_norm": 0.6570333337588244, "learning_rate": 4.8952149229521496e-05, "loss": 0.6853, "step": 4072 }, { "epoch": 0.11891623602230592, "grad_norm": 0.5634234875659995, "learning_rate": 4.895052716950527e-05, "loss": 0.6201, "step": 4073 }, { "epoch": 0.11894543225015328, "grad_norm": 3.5278740029039057, "learning_rate": 4.894890510948905e-05, "loss": 0.807, "step": 4074 }, { "epoch": 0.11897462847800064, "grad_norm": 0.7189026832783373, "learning_rate": 4.894728304947283e-05, "loss": 0.7627, "step": 4075 }, { "epoch": 0.119003824705848, "grad_norm": 0.6938890809899039, "learning_rate": 4.894566098945661e-05, "loss": 0.7391, "step": 4076 }, { "epoch": 0.11903302093369536, "grad_norm": 0.66200637745912, "learning_rate": 4.894403892944039e-05, "loss": 0.6665, "step": 4077 }, { "epoch": 0.11906221716154274, "grad_norm": 0.6033331738220792, "learning_rate": 4.894241686942417e-05, "loss": 0.6591, "step": 4078 }, { "epoch": 0.1190914133893901, "grad_norm": 0.6471601543549892, "learning_rate": 4.8940794809407955e-05, "loss": 0.6562, "step": 4079 }, { "epoch": 0.11912060961723746, "grad_norm": 0.6916349715081059, "learning_rate": 4.893917274939173e-05, "loss": 0.7423, "step": 4080 }, { "epoch": 0.11914980584508482, "grad_norm": 0.9968469526643552, "learning_rate": 4.893755068937551e-05, "loss": 0.6979, "step": 4081 }, { "epoch": 0.11917900207293218, "grad_norm": 0.9319715013104123, "learning_rate": 4.893592862935929e-05, "loss": 0.7207, "step": 4082 }, { "epoch": 0.11920819830077954, "grad_norm": 0.6984573691317524, "learning_rate": 4.893430656934307e-05, "loss": 0.722, "step": 4083 }, { "epoch": 0.1192373945286269, "grad_norm": 0.6638453999884043, "learning_rate": 4.893268450932685e-05, "loss": 0.7365, "step": 4084 }, { "epoch": 0.11926659075647426, "grad_norm": 0.6524428266844995, "learning_rate": 4.8931062449310625e-05, "loss": 0.7367, "step": 4085 }, { "epoch": 0.11929578698432163, "grad_norm": 0.6478351182811951, "learning_rate": 4.892944038929441e-05, "loss": 0.67, "step": 4086 }, { "epoch": 0.11932498321216899, "grad_norm": 0.6070748032018675, "learning_rate": 4.892781832927819e-05, "loss": 0.609, "step": 4087 }, { "epoch": 0.11935417944001635, "grad_norm": 0.7169603646899261, "learning_rate": 4.8926196269261964e-05, "loss": 0.6905, "step": 4088 }, { "epoch": 0.11938337566786371, "grad_norm": 0.6196197612419788, "learning_rate": 4.8924574209245745e-05, "loss": 0.6286, "step": 4089 }, { "epoch": 0.11941257189571107, "grad_norm": 1.0145462503308893, "learning_rate": 4.892295214922952e-05, "loss": 0.7284, "step": 4090 }, { "epoch": 0.11944176812355843, "grad_norm": 0.6206176471533917, "learning_rate": 4.89213300892133e-05, "loss": 0.6907, "step": 4091 }, { "epoch": 0.11947096435140579, "grad_norm": 0.6402087400655729, "learning_rate": 4.8919708029197084e-05, "loss": 0.695, "step": 4092 }, { "epoch": 0.11950016057925317, "grad_norm": 0.6491132755707032, "learning_rate": 4.891808596918086e-05, "loss": 0.757, "step": 4093 }, { "epoch": 0.11952935680710053, "grad_norm": 0.6549173707201075, "learning_rate": 4.891646390916464e-05, "loss": 0.6939, "step": 4094 }, { "epoch": 0.11955855303494789, "grad_norm": 0.6199781223005386, "learning_rate": 4.8914841849148416e-05, "loss": 0.6303, "step": 4095 }, { "epoch": 0.11958774926279525, "grad_norm": 0.5980200696907494, "learning_rate": 4.89132197891322e-05, "loss": 0.6217, "step": 4096 }, { "epoch": 0.11961694549064261, "grad_norm": 0.6347959452999894, "learning_rate": 4.891159772911598e-05, "loss": 0.7265, "step": 4097 }, { "epoch": 0.11964614171848997, "grad_norm": 0.6701546977082504, "learning_rate": 4.890997566909976e-05, "loss": 0.6928, "step": 4098 }, { "epoch": 0.11967533794633733, "grad_norm": 0.6668115038395614, "learning_rate": 4.890835360908354e-05, "loss": 0.7263, "step": 4099 }, { "epoch": 0.1197045341741847, "grad_norm": 0.6187528264044169, "learning_rate": 4.890673154906732e-05, "loss": 0.633, "step": 4100 }, { "epoch": 0.11973373040203206, "grad_norm": 0.5740705153555422, "learning_rate": 4.89051094890511e-05, "loss": 0.6233, "step": 4101 }, { "epoch": 0.11976292662987942, "grad_norm": 0.7399856912184766, "learning_rate": 4.8903487429034874e-05, "loss": 0.7289, "step": 4102 }, { "epoch": 0.11979212285772678, "grad_norm": 0.6425908192671609, "learning_rate": 4.8901865369018656e-05, "loss": 0.8148, "step": 4103 }, { "epoch": 0.11982131908557414, "grad_norm": 0.7035463624784004, "learning_rate": 4.890024330900244e-05, "loss": 0.7572, "step": 4104 }, { "epoch": 0.1198505153134215, "grad_norm": 0.6452766396115069, "learning_rate": 4.889862124898621e-05, "loss": 0.7698, "step": 4105 }, { "epoch": 0.11987971154126886, "grad_norm": 0.6596919256357949, "learning_rate": 4.8896999188969995e-05, "loss": 0.608, "step": 4106 }, { "epoch": 0.11990890776911622, "grad_norm": 0.686030636997492, "learning_rate": 4.8895377128953776e-05, "loss": 0.7314, "step": 4107 }, { "epoch": 0.11993810399696359, "grad_norm": 0.5957296566084439, "learning_rate": 4.889375506893755e-05, "loss": 0.6783, "step": 4108 }, { "epoch": 0.11996730022481096, "grad_norm": 0.6372291543356622, "learning_rate": 4.889213300892133e-05, "loss": 0.7038, "step": 4109 }, { "epoch": 0.11999649645265832, "grad_norm": 0.6755157069778974, "learning_rate": 4.889051094890511e-05, "loss": 0.7861, "step": 4110 }, { "epoch": 0.12002569268050568, "grad_norm": 0.6644757894039881, "learning_rate": 4.888888888888889e-05, "loss": 0.7164, "step": 4111 }, { "epoch": 0.12005488890835304, "grad_norm": 0.6929553393414182, "learning_rate": 4.888726682887267e-05, "loss": 0.7514, "step": 4112 }, { "epoch": 0.1200840851362004, "grad_norm": 0.6044758257076321, "learning_rate": 4.888564476885645e-05, "loss": 0.6602, "step": 4113 }, { "epoch": 0.12011328136404777, "grad_norm": 0.620481966056644, "learning_rate": 4.888402270884023e-05, "loss": 0.6974, "step": 4114 }, { "epoch": 0.12014247759189513, "grad_norm": 0.6784276942765718, "learning_rate": 4.888240064882401e-05, "loss": 0.7382, "step": 4115 }, { "epoch": 0.12017167381974249, "grad_norm": 0.5885668876053296, "learning_rate": 4.888077858880779e-05, "loss": 0.5724, "step": 4116 }, { "epoch": 0.12020087004758985, "grad_norm": 0.6901986186691101, "learning_rate": 4.887915652879157e-05, "loss": 0.7165, "step": 4117 }, { "epoch": 0.12023006627543721, "grad_norm": 0.6986111750084927, "learning_rate": 4.887753446877535e-05, "loss": 0.7289, "step": 4118 }, { "epoch": 0.12025926250328457, "grad_norm": 0.66481418924342, "learning_rate": 4.887591240875913e-05, "loss": 0.7662, "step": 4119 }, { "epoch": 0.12028845873113193, "grad_norm": 0.6519514248541295, "learning_rate": 4.8874290348742905e-05, "loss": 0.7081, "step": 4120 }, { "epoch": 0.1203176549589793, "grad_norm": 0.6463048146912924, "learning_rate": 4.887266828872669e-05, "loss": 0.672, "step": 4121 }, { "epoch": 0.12034685118682666, "grad_norm": 0.6604789293509851, "learning_rate": 4.887104622871046e-05, "loss": 0.6552, "step": 4122 }, { "epoch": 0.12037604741467402, "grad_norm": 0.6290544576592484, "learning_rate": 4.8869424168694244e-05, "loss": 0.6528, "step": 4123 }, { "epoch": 0.12040524364252139, "grad_norm": 0.6300515690711509, "learning_rate": 4.8867802108678026e-05, "loss": 0.7461, "step": 4124 }, { "epoch": 0.12043443987036875, "grad_norm": 0.6714005102642786, "learning_rate": 4.88661800486618e-05, "loss": 0.7502, "step": 4125 }, { "epoch": 0.12046363609821611, "grad_norm": 0.6301439374513187, "learning_rate": 4.886455798864558e-05, "loss": 0.6645, "step": 4126 }, { "epoch": 0.12049283232606348, "grad_norm": 0.6220801050037792, "learning_rate": 4.8862935928629364e-05, "loss": 0.7062, "step": 4127 }, { "epoch": 0.12052202855391084, "grad_norm": 0.6318697010922657, "learning_rate": 4.886131386861314e-05, "loss": 0.6745, "step": 4128 }, { "epoch": 0.1205512247817582, "grad_norm": 0.60022390221739, "learning_rate": 4.885969180859692e-05, "loss": 0.6473, "step": 4129 }, { "epoch": 0.12058042100960556, "grad_norm": 0.6250001111650194, "learning_rate": 4.8858069748580696e-05, "loss": 0.6714, "step": 4130 }, { "epoch": 0.12060961723745292, "grad_norm": 0.6380053068408514, "learning_rate": 4.885644768856448e-05, "loss": 0.7176, "step": 4131 }, { "epoch": 0.12063881346530028, "grad_norm": 0.6519523252637272, "learning_rate": 4.885482562854826e-05, "loss": 0.7026, "step": 4132 }, { "epoch": 0.12066800969314764, "grad_norm": 0.6855820459141624, "learning_rate": 4.8853203568532034e-05, "loss": 0.6753, "step": 4133 }, { "epoch": 0.120697205920995, "grad_norm": 0.5831229826696955, "learning_rate": 4.885158150851582e-05, "loss": 0.6122, "step": 4134 }, { "epoch": 0.12072640214884237, "grad_norm": 0.6480512131387766, "learning_rate": 4.88499594484996e-05, "loss": 0.722, "step": 4135 }, { "epoch": 0.12075559837668973, "grad_norm": 0.6175725681996167, "learning_rate": 4.884833738848338e-05, "loss": 0.6626, "step": 4136 }, { "epoch": 0.12078479460453709, "grad_norm": 0.6577653864071368, "learning_rate": 4.8846715328467155e-05, "loss": 0.696, "step": 4137 }, { "epoch": 0.12081399083238445, "grad_norm": 0.6010623184242132, "learning_rate": 4.8845093268450936e-05, "loss": 0.6038, "step": 4138 }, { "epoch": 0.12084318706023182, "grad_norm": 0.6587224503172061, "learning_rate": 4.884347120843472e-05, "loss": 0.6861, "step": 4139 }, { "epoch": 0.12087238328807919, "grad_norm": 0.6823529214194536, "learning_rate": 4.884184914841849e-05, "loss": 0.8098, "step": 4140 }, { "epoch": 0.12090157951592655, "grad_norm": 0.7014360803478009, "learning_rate": 4.8840227088402275e-05, "loss": 0.7332, "step": 4141 }, { "epoch": 0.12093077574377391, "grad_norm": 0.6508822554314014, "learning_rate": 4.883860502838605e-05, "loss": 0.6475, "step": 4142 }, { "epoch": 0.12095997197162127, "grad_norm": 0.6356967420525738, "learning_rate": 4.883698296836983e-05, "loss": 0.6958, "step": 4143 }, { "epoch": 0.12098916819946863, "grad_norm": 0.5880553054191638, "learning_rate": 4.8835360908353613e-05, "loss": 0.6055, "step": 4144 }, { "epoch": 0.12101836442731599, "grad_norm": 0.6596946081712419, "learning_rate": 4.883373884833739e-05, "loss": 0.6677, "step": 4145 }, { "epoch": 0.12104756065516335, "grad_norm": 0.6923104362819547, "learning_rate": 4.883211678832117e-05, "loss": 0.7179, "step": 4146 }, { "epoch": 0.12107675688301071, "grad_norm": 0.6010465676963963, "learning_rate": 4.8830494728304945e-05, "loss": 0.7055, "step": 4147 }, { "epoch": 0.12110595311085808, "grad_norm": 0.6100242546340888, "learning_rate": 4.882887266828873e-05, "loss": 0.6497, "step": 4148 }, { "epoch": 0.12113514933870544, "grad_norm": 0.6444731130363417, "learning_rate": 4.882725060827251e-05, "loss": 0.7371, "step": 4149 }, { "epoch": 0.1211643455665528, "grad_norm": 0.6285079705118931, "learning_rate": 4.8825628548256284e-05, "loss": 0.6396, "step": 4150 }, { "epoch": 0.12119354179440016, "grad_norm": 0.5706757203016889, "learning_rate": 4.8824006488240065e-05, "loss": 0.5758, "step": 4151 }, { "epoch": 0.12122273802224752, "grad_norm": 0.6629656158932381, "learning_rate": 4.882238442822385e-05, "loss": 0.7167, "step": 4152 }, { "epoch": 0.12125193425009488, "grad_norm": 0.6167470975697451, "learning_rate": 4.882076236820763e-05, "loss": 0.6824, "step": 4153 }, { "epoch": 0.12128113047794226, "grad_norm": 0.5933962935737704, "learning_rate": 4.881914030819141e-05, "loss": 0.6448, "step": 4154 }, { "epoch": 0.12131032670578962, "grad_norm": 0.5978078044835551, "learning_rate": 4.8817518248175186e-05, "loss": 0.6346, "step": 4155 }, { "epoch": 0.12133952293363698, "grad_norm": 0.757769191355763, "learning_rate": 4.881589618815897e-05, "loss": 0.7917, "step": 4156 }, { "epoch": 0.12136871916148434, "grad_norm": 0.6804289195703428, "learning_rate": 4.881427412814274e-05, "loss": 0.7818, "step": 4157 }, { "epoch": 0.1213979153893317, "grad_norm": 0.952148457111128, "learning_rate": 4.8812652068126524e-05, "loss": 0.837, "step": 4158 }, { "epoch": 0.12142711161717906, "grad_norm": 0.6692394780999722, "learning_rate": 4.8811030008110306e-05, "loss": 0.7558, "step": 4159 }, { "epoch": 0.12145630784502642, "grad_norm": 0.6586082497027433, "learning_rate": 4.880940794809408e-05, "loss": 0.7563, "step": 4160 }, { "epoch": 0.12148550407287378, "grad_norm": 0.6707940790838491, "learning_rate": 4.880778588807786e-05, "loss": 0.7813, "step": 4161 }, { "epoch": 0.12151470030072115, "grad_norm": 0.6301081933390021, "learning_rate": 4.880616382806164e-05, "loss": 0.6542, "step": 4162 }, { "epoch": 0.1215438965285685, "grad_norm": 0.6654895558390195, "learning_rate": 4.880454176804542e-05, "loss": 0.6848, "step": 4163 }, { "epoch": 0.12157309275641587, "grad_norm": 0.9944171782255045, "learning_rate": 4.88029197080292e-05, "loss": 0.8523, "step": 4164 }, { "epoch": 0.12160228898426323, "grad_norm": 0.8255488320215278, "learning_rate": 4.8801297648012976e-05, "loss": 0.6711, "step": 4165 }, { "epoch": 0.12163148521211059, "grad_norm": 0.7523270151198077, "learning_rate": 4.879967558799676e-05, "loss": 0.6998, "step": 4166 }, { "epoch": 0.12166068143995795, "grad_norm": 0.8044693622767857, "learning_rate": 4.879805352798053e-05, "loss": 0.7634, "step": 4167 }, { "epoch": 0.12168987766780531, "grad_norm": 0.6453635571345275, "learning_rate": 4.8796431467964315e-05, "loss": 0.6491, "step": 4168 }, { "epoch": 0.12171907389565269, "grad_norm": 0.5976340476581645, "learning_rate": 4.8794809407948097e-05, "loss": 0.642, "step": 4169 }, { "epoch": 0.12174827012350005, "grad_norm": 0.7251128313191257, "learning_rate": 4.879318734793187e-05, "loss": 0.7895, "step": 4170 }, { "epoch": 0.12177746635134741, "grad_norm": 0.5895544920222477, "learning_rate": 4.879156528791565e-05, "loss": 0.6727, "step": 4171 }, { "epoch": 0.12180666257919477, "grad_norm": 0.7198426334810676, "learning_rate": 4.8789943227899435e-05, "loss": 0.7282, "step": 4172 }, { "epoch": 0.12183585880704213, "grad_norm": 0.6452201813269682, "learning_rate": 4.878832116788322e-05, "loss": 0.6982, "step": 4173 }, { "epoch": 0.1218650550348895, "grad_norm": 0.6617543367428775, "learning_rate": 4.8786699107867e-05, "loss": 0.701, "step": 4174 }, { "epoch": 0.12189425126273686, "grad_norm": 0.7087901430047062, "learning_rate": 4.8785077047850774e-05, "loss": 0.742, "step": 4175 }, { "epoch": 0.12192344749058422, "grad_norm": 0.7003635619980435, "learning_rate": 4.8783454987834555e-05, "loss": 0.7694, "step": 4176 }, { "epoch": 0.12195264371843158, "grad_norm": 0.5792184330196603, "learning_rate": 4.878183292781833e-05, "loss": 0.6372, "step": 4177 }, { "epoch": 0.12198183994627894, "grad_norm": 0.6097025744984831, "learning_rate": 4.878021086780211e-05, "loss": 0.6729, "step": 4178 }, { "epoch": 0.1220110361741263, "grad_norm": 0.6465052964903563, "learning_rate": 4.8778588807785894e-05, "loss": 0.682, "step": 4179 }, { "epoch": 0.12204023240197366, "grad_norm": 0.6264619546628023, "learning_rate": 4.877696674776967e-05, "loss": 0.6242, "step": 4180 }, { "epoch": 0.12206942862982102, "grad_norm": 0.6137823700371449, "learning_rate": 4.877534468775345e-05, "loss": 0.654, "step": 4181 }, { "epoch": 0.12209862485766838, "grad_norm": 0.7768145668481236, "learning_rate": 4.8773722627737226e-05, "loss": 0.6957, "step": 4182 }, { "epoch": 0.12212782108551574, "grad_norm": 0.6313751172454832, "learning_rate": 4.877210056772101e-05, "loss": 0.6497, "step": 4183 }, { "epoch": 0.12215701731336312, "grad_norm": 0.6539439171201376, "learning_rate": 4.877047850770479e-05, "loss": 0.6619, "step": 4184 }, { "epoch": 0.12218621354121048, "grad_norm": 0.6081339671670765, "learning_rate": 4.8768856447688564e-05, "loss": 0.6854, "step": 4185 }, { "epoch": 0.12221540976905784, "grad_norm": 0.622610970696453, "learning_rate": 4.8767234387672346e-05, "loss": 0.6465, "step": 4186 }, { "epoch": 0.1222446059969052, "grad_norm": 0.6686775621422768, "learning_rate": 4.876561232765612e-05, "loss": 0.7517, "step": 4187 }, { "epoch": 0.12227380222475256, "grad_norm": 0.661017784179002, "learning_rate": 4.87639902676399e-05, "loss": 0.7959, "step": 4188 }, { "epoch": 0.12230299845259993, "grad_norm": 0.576943031110484, "learning_rate": 4.8762368207623684e-05, "loss": 0.5815, "step": 4189 }, { "epoch": 0.12233219468044729, "grad_norm": 0.5800892079291576, "learning_rate": 4.876074614760746e-05, "loss": 0.6221, "step": 4190 }, { "epoch": 0.12236139090829465, "grad_norm": 0.6449425183328579, "learning_rate": 4.875912408759125e-05, "loss": 0.7662, "step": 4191 }, { "epoch": 0.12239058713614201, "grad_norm": 0.6201200444224306, "learning_rate": 4.875750202757502e-05, "loss": 0.6858, "step": 4192 }, { "epoch": 0.12241978336398937, "grad_norm": 0.6884820246610174, "learning_rate": 4.8755879967558805e-05, "loss": 0.7776, "step": 4193 }, { "epoch": 0.12244897959183673, "grad_norm": 0.6823337338019346, "learning_rate": 4.8754257907542586e-05, "loss": 0.6924, "step": 4194 }, { "epoch": 0.1224781758196841, "grad_norm": 0.6307539572616366, "learning_rate": 4.875263584752636e-05, "loss": 0.7001, "step": 4195 }, { "epoch": 0.12250737204753145, "grad_norm": 0.9334191032360857, "learning_rate": 4.875101378751014e-05, "loss": 0.7591, "step": 4196 }, { "epoch": 0.12253656827537882, "grad_norm": 0.8415700867624364, "learning_rate": 4.874939172749392e-05, "loss": 0.6789, "step": 4197 }, { "epoch": 0.12256576450322618, "grad_norm": 0.6834432154642686, "learning_rate": 4.87477696674777e-05, "loss": 0.7382, "step": 4198 }, { "epoch": 0.12259496073107355, "grad_norm": 0.6320900983822832, "learning_rate": 4.874614760746148e-05, "loss": 0.7242, "step": 4199 }, { "epoch": 0.12262415695892091, "grad_norm": 0.5577977806514922, "learning_rate": 4.874452554744526e-05, "loss": 0.5624, "step": 4200 }, { "epoch": 0.12265335318676827, "grad_norm": 0.636744220768491, "learning_rate": 4.874290348742904e-05, "loss": 0.691, "step": 4201 }, { "epoch": 0.12268254941461564, "grad_norm": 0.6284625378529111, "learning_rate": 4.874128142741281e-05, "loss": 0.6651, "step": 4202 }, { "epoch": 0.122711745642463, "grad_norm": 0.5865006838564253, "learning_rate": 4.8739659367396595e-05, "loss": 0.6158, "step": 4203 }, { "epoch": 0.12274094187031036, "grad_norm": 0.6121313205286955, "learning_rate": 4.873803730738038e-05, "loss": 0.6553, "step": 4204 }, { "epoch": 0.12277013809815772, "grad_norm": 0.7133568216971865, "learning_rate": 4.873641524736415e-05, "loss": 0.7603, "step": 4205 }, { "epoch": 0.12279933432600508, "grad_norm": 0.6987629299768163, "learning_rate": 4.8734793187347934e-05, "loss": 0.7073, "step": 4206 }, { "epoch": 0.12282853055385244, "grad_norm": 7.603508089102242, "learning_rate": 4.873317112733171e-05, "loss": 0.8346, "step": 4207 }, { "epoch": 0.1228577267816998, "grad_norm": 0.6183323071596569, "learning_rate": 4.873154906731549e-05, "loss": 0.6817, "step": 4208 }, { "epoch": 0.12288692300954716, "grad_norm": 0.6519287480889274, "learning_rate": 4.872992700729927e-05, "loss": 0.7223, "step": 4209 }, { "epoch": 0.12291611923739452, "grad_norm": 0.6196069292204989, "learning_rate": 4.8728304947283054e-05, "loss": 0.6657, "step": 4210 }, { "epoch": 0.12294531546524189, "grad_norm": 0.5907392028732242, "learning_rate": 4.8726682887266836e-05, "loss": 0.644, "step": 4211 }, { "epoch": 0.12297451169308925, "grad_norm": 0.6320999295684188, "learning_rate": 4.872506082725061e-05, "loss": 0.6838, "step": 4212 }, { "epoch": 0.12300370792093661, "grad_norm": 0.6253691472118444, "learning_rate": 4.872343876723439e-05, "loss": 0.6739, "step": 4213 }, { "epoch": 0.12303290414878398, "grad_norm": 0.5806031616014843, "learning_rate": 4.8721816707218174e-05, "loss": 0.6642, "step": 4214 }, { "epoch": 0.12306210037663134, "grad_norm": 0.6898641188343202, "learning_rate": 4.872019464720195e-05, "loss": 0.7748, "step": 4215 }, { "epoch": 0.1230912966044787, "grad_norm": 0.6104552515132423, "learning_rate": 4.871857258718573e-05, "loss": 0.6909, "step": 4216 }, { "epoch": 0.12312049283232607, "grad_norm": 0.6323344122980785, "learning_rate": 4.8716950527169506e-05, "loss": 0.699, "step": 4217 }, { "epoch": 0.12314968906017343, "grad_norm": 0.6661942947420971, "learning_rate": 4.871532846715329e-05, "loss": 0.7835, "step": 4218 }, { "epoch": 0.12317888528802079, "grad_norm": 0.6728075420169874, "learning_rate": 4.871370640713707e-05, "loss": 0.7276, "step": 4219 }, { "epoch": 0.12320808151586815, "grad_norm": 0.6896466272211176, "learning_rate": 4.8712084347120844e-05, "loss": 0.6721, "step": 4220 }, { "epoch": 0.12323727774371551, "grad_norm": 0.6236725669406259, "learning_rate": 4.8710462287104626e-05, "loss": 0.6515, "step": 4221 }, { "epoch": 0.12326647397156287, "grad_norm": 0.7569905048271345, "learning_rate": 4.87088402270884e-05, "loss": 0.7277, "step": 4222 }, { "epoch": 0.12329567019941023, "grad_norm": 0.5815365182702703, "learning_rate": 4.870721816707218e-05, "loss": 0.5898, "step": 4223 }, { "epoch": 0.1233248664272576, "grad_norm": 0.6651919729081676, "learning_rate": 4.8705596107055965e-05, "loss": 0.6804, "step": 4224 }, { "epoch": 0.12335406265510496, "grad_norm": 0.6404287118490075, "learning_rate": 4.870397404703974e-05, "loss": 0.7253, "step": 4225 }, { "epoch": 0.12338325888295232, "grad_norm": 0.6528900322082971, "learning_rate": 4.870235198702352e-05, "loss": 0.6673, "step": 4226 }, { "epoch": 0.12341245511079968, "grad_norm": 0.6393126396357648, "learning_rate": 4.8700729927007296e-05, "loss": 0.7018, "step": 4227 }, { "epoch": 0.12344165133864704, "grad_norm": 0.6149375067061973, "learning_rate": 4.869910786699108e-05, "loss": 0.6509, "step": 4228 }, { "epoch": 0.12347084756649442, "grad_norm": 0.6047011337329198, "learning_rate": 4.869748580697486e-05, "loss": 0.642, "step": 4229 }, { "epoch": 0.12350004379434178, "grad_norm": 0.6393484645887333, "learning_rate": 4.869586374695864e-05, "loss": 0.6988, "step": 4230 }, { "epoch": 0.12352924002218914, "grad_norm": 0.6060547601820272, "learning_rate": 4.8694241686942423e-05, "loss": 0.5929, "step": 4231 }, { "epoch": 0.1235584362500365, "grad_norm": 0.6022147658894171, "learning_rate": 4.86926196269262e-05, "loss": 0.6012, "step": 4232 }, { "epoch": 0.12358763247788386, "grad_norm": 0.6652093717857512, "learning_rate": 4.869099756690998e-05, "loss": 0.7559, "step": 4233 }, { "epoch": 0.12361682870573122, "grad_norm": 0.6414278834999727, "learning_rate": 4.8689375506893755e-05, "loss": 0.7217, "step": 4234 }, { "epoch": 0.12364602493357858, "grad_norm": 0.6321436959618083, "learning_rate": 4.868775344687754e-05, "loss": 0.6686, "step": 4235 }, { "epoch": 0.12367522116142594, "grad_norm": 0.6079523113585896, "learning_rate": 4.868613138686132e-05, "loss": 0.6879, "step": 4236 }, { "epoch": 0.1237044173892733, "grad_norm": 0.6068109131819046, "learning_rate": 4.8684509326845094e-05, "loss": 0.6038, "step": 4237 }, { "epoch": 0.12373361361712067, "grad_norm": 0.7025095414374417, "learning_rate": 4.8682887266828876e-05, "loss": 0.7659, "step": 4238 }, { "epoch": 0.12376280984496803, "grad_norm": 0.6158639907962729, "learning_rate": 4.868126520681266e-05, "loss": 0.6962, "step": 4239 }, { "epoch": 0.12379200607281539, "grad_norm": 0.6380266550199327, "learning_rate": 4.867964314679643e-05, "loss": 0.7417, "step": 4240 }, { "epoch": 0.12382120230066275, "grad_norm": 0.5840129803247172, "learning_rate": 4.8678021086780214e-05, "loss": 0.6192, "step": 4241 }, { "epoch": 0.12385039852851011, "grad_norm": 0.6666192158931304, "learning_rate": 4.867639902676399e-05, "loss": 0.7856, "step": 4242 }, { "epoch": 0.12387959475635747, "grad_norm": 0.6171841327354561, "learning_rate": 4.867477696674777e-05, "loss": 0.6932, "step": 4243 }, { "epoch": 0.12390879098420485, "grad_norm": 0.6606152767139908, "learning_rate": 4.867315490673155e-05, "loss": 0.7498, "step": 4244 }, { "epoch": 0.12393798721205221, "grad_norm": 0.6526403211504703, "learning_rate": 4.867153284671533e-05, "loss": 0.6874, "step": 4245 }, { "epoch": 0.12396718343989957, "grad_norm": 0.6324889761687535, "learning_rate": 4.866991078669911e-05, "loss": 0.6928, "step": 4246 }, { "epoch": 0.12399637966774693, "grad_norm": 0.580906905115417, "learning_rate": 4.8668288726682884e-05, "loss": 0.6744, "step": 4247 }, { "epoch": 0.12402557589559429, "grad_norm": 0.5901945137638542, "learning_rate": 4.866666666666667e-05, "loss": 0.6161, "step": 4248 }, { "epoch": 0.12405477212344165, "grad_norm": 0.6591347954145437, "learning_rate": 4.866504460665045e-05, "loss": 0.6674, "step": 4249 }, { "epoch": 0.12408396835128901, "grad_norm": 0.671022081184862, "learning_rate": 4.866342254663423e-05, "loss": 0.6064, "step": 4250 }, { "epoch": 0.12411316457913638, "grad_norm": 0.6067606194001586, "learning_rate": 4.866180048661801e-05, "loss": 0.6596, "step": 4251 }, { "epoch": 0.12414236080698374, "grad_norm": 0.6309651167575787, "learning_rate": 4.8660178426601786e-05, "loss": 0.6987, "step": 4252 }, { "epoch": 0.1241715570348311, "grad_norm": 0.7051963240056736, "learning_rate": 4.865855636658557e-05, "loss": 0.7473, "step": 4253 }, { "epoch": 0.12420075326267846, "grad_norm": 0.6559107664826205, "learning_rate": 4.865693430656934e-05, "loss": 0.698, "step": 4254 }, { "epoch": 0.12422994949052582, "grad_norm": 0.6203295500585296, "learning_rate": 4.8655312246553125e-05, "loss": 0.7079, "step": 4255 }, { "epoch": 0.12425914571837318, "grad_norm": 0.6559354246572864, "learning_rate": 4.8653690186536907e-05, "loss": 0.7246, "step": 4256 }, { "epoch": 0.12428834194622054, "grad_norm": 0.6595925262672286, "learning_rate": 4.865206812652068e-05, "loss": 0.6663, "step": 4257 }, { "epoch": 0.1243175381740679, "grad_norm": 0.6017616920752089, "learning_rate": 4.865044606650446e-05, "loss": 0.6267, "step": 4258 }, { "epoch": 0.12434673440191528, "grad_norm": 0.6556668149479495, "learning_rate": 4.864882400648824e-05, "loss": 0.781, "step": 4259 }, { "epoch": 0.12437593062976264, "grad_norm": 0.7120562214402699, "learning_rate": 4.864720194647202e-05, "loss": 0.8279, "step": 4260 }, { "epoch": 0.12440512685761, "grad_norm": 0.6405691612406861, "learning_rate": 4.86455798864558e-05, "loss": 0.649, "step": 4261 }, { "epoch": 0.12443432308545736, "grad_norm": 0.6953285520958746, "learning_rate": 4.864395782643958e-05, "loss": 0.6593, "step": 4262 }, { "epoch": 0.12446351931330472, "grad_norm": 0.6096868640274463, "learning_rate": 4.864233576642336e-05, "loss": 0.6807, "step": 4263 }, { "epoch": 0.12449271554115209, "grad_norm": 0.5991804948795647, "learning_rate": 4.864071370640714e-05, "loss": 0.69, "step": 4264 }, { "epoch": 0.12452191176899945, "grad_norm": 0.6409282335282942, "learning_rate": 4.8639091646390915e-05, "loss": 0.763, "step": 4265 }, { "epoch": 0.12455110799684681, "grad_norm": 0.623595694558114, "learning_rate": 4.8637469586374704e-05, "loss": 0.5855, "step": 4266 }, { "epoch": 0.12458030422469417, "grad_norm": 0.5618717219318485, "learning_rate": 4.863584752635848e-05, "loss": 0.567, "step": 4267 }, { "epoch": 0.12460950045254153, "grad_norm": 0.5552018789851009, "learning_rate": 4.863422546634226e-05, "loss": 0.5761, "step": 4268 }, { "epoch": 0.12463869668038889, "grad_norm": 0.6306813280063099, "learning_rate": 4.8632603406326036e-05, "loss": 0.6917, "step": 4269 }, { "epoch": 0.12466789290823625, "grad_norm": 0.5957137104112077, "learning_rate": 4.863098134630982e-05, "loss": 0.6269, "step": 4270 }, { "epoch": 0.12469708913608361, "grad_norm": 0.659300709923478, "learning_rate": 4.86293592862936e-05, "loss": 0.7519, "step": 4271 }, { "epoch": 0.12472628536393097, "grad_norm": 0.791341166328972, "learning_rate": 4.8627737226277374e-05, "loss": 0.6662, "step": 4272 }, { "epoch": 0.12475548159177834, "grad_norm": 0.5977982157072617, "learning_rate": 4.8626115166261156e-05, "loss": 0.6119, "step": 4273 }, { "epoch": 0.12478467781962571, "grad_norm": 0.6723067796689675, "learning_rate": 4.862449310624493e-05, "loss": 0.6865, "step": 4274 }, { "epoch": 0.12481387404747307, "grad_norm": 0.6970892505808285, "learning_rate": 4.862287104622871e-05, "loss": 0.6945, "step": 4275 }, { "epoch": 0.12484307027532043, "grad_norm": 0.7042497090263778, "learning_rate": 4.8621248986212494e-05, "loss": 0.852, "step": 4276 }, { "epoch": 0.1248722665031678, "grad_norm": 0.6388034877413473, "learning_rate": 4.861962692619627e-05, "loss": 0.7542, "step": 4277 }, { "epoch": 0.12490146273101516, "grad_norm": 0.6200271641665659, "learning_rate": 4.861800486618005e-05, "loss": 0.6906, "step": 4278 }, { "epoch": 0.12493065895886252, "grad_norm": 0.6047293042339009, "learning_rate": 4.8616382806163826e-05, "loss": 0.6572, "step": 4279 }, { "epoch": 0.12495985518670988, "grad_norm": 0.6529812862321379, "learning_rate": 4.861476074614761e-05, "loss": 0.7235, "step": 4280 }, { "epoch": 0.12498905141455724, "grad_norm": 0.603920829288699, "learning_rate": 4.861313868613139e-05, "loss": 0.6347, "step": 4281 }, { "epoch": 0.1250182476424046, "grad_norm": 0.6946579886729088, "learning_rate": 4.8611516626115165e-05, "loss": 0.7668, "step": 4282 }, { "epoch": 0.12504744387025196, "grad_norm": 0.5938377576869994, "learning_rate": 4.8609894566098946e-05, "loss": 0.6893, "step": 4283 }, { "epoch": 0.12507664009809932, "grad_norm": 0.6312168104971796, "learning_rate": 4.860827250608273e-05, "loss": 0.7068, "step": 4284 }, { "epoch": 0.12510583632594668, "grad_norm": 0.6187691629871594, "learning_rate": 4.860665044606651e-05, "loss": 0.7095, "step": 4285 }, { "epoch": 0.12513503255379405, "grad_norm": 0.5810662949904415, "learning_rate": 4.860502838605029e-05, "loss": 0.6232, "step": 4286 }, { "epoch": 0.1251642287816414, "grad_norm": 0.6133909744856304, "learning_rate": 4.860340632603407e-05, "loss": 0.6521, "step": 4287 }, { "epoch": 0.12519342500948877, "grad_norm": 0.6130089824041881, "learning_rate": 4.860178426601785e-05, "loss": 0.6478, "step": 4288 }, { "epoch": 0.12522262123733613, "grad_norm": 0.6264291079825576, "learning_rate": 4.8600162206001623e-05, "loss": 0.7286, "step": 4289 }, { "epoch": 0.1252518174651835, "grad_norm": 0.6825019145073996, "learning_rate": 4.8598540145985405e-05, "loss": 0.7595, "step": 4290 }, { "epoch": 0.12528101369303085, "grad_norm": 0.6097110925796997, "learning_rate": 4.859691808596919e-05, "loss": 0.6511, "step": 4291 }, { "epoch": 0.1253102099208782, "grad_norm": 0.6448884117195536, "learning_rate": 4.859529602595296e-05, "loss": 0.7033, "step": 4292 }, { "epoch": 0.12533940614872557, "grad_norm": 0.6714352222834484, "learning_rate": 4.8593673965936744e-05, "loss": 0.8054, "step": 4293 }, { "epoch": 0.12536860237657294, "grad_norm": 0.6343197756400164, "learning_rate": 4.859205190592052e-05, "loss": 0.6669, "step": 4294 }, { "epoch": 0.1253977986044203, "grad_norm": 0.6006439888217958, "learning_rate": 4.85904298459043e-05, "loss": 0.5911, "step": 4295 }, { "epoch": 0.12542699483226766, "grad_norm": 0.707731094389808, "learning_rate": 4.858880778588808e-05, "loss": 0.7874, "step": 4296 }, { "epoch": 0.12545619106011505, "grad_norm": 0.5895937447999018, "learning_rate": 4.858718572587186e-05, "loss": 0.6537, "step": 4297 }, { "epoch": 0.1254853872879624, "grad_norm": 0.6697205606101232, "learning_rate": 4.858556366585564e-05, "loss": 0.7074, "step": 4298 }, { "epoch": 0.12551458351580977, "grad_norm": 0.5797279489757029, "learning_rate": 4.8583941605839414e-05, "loss": 0.6388, "step": 4299 }, { "epoch": 0.12554377974365713, "grad_norm": 0.6878618460183027, "learning_rate": 4.8582319545823196e-05, "loss": 0.7737, "step": 4300 }, { "epoch": 0.1255729759715045, "grad_norm": 0.5538642906234731, "learning_rate": 4.858069748580698e-05, "loss": 0.6005, "step": 4301 }, { "epoch": 0.12560217219935185, "grad_norm": 0.6141650732446562, "learning_rate": 4.857907542579075e-05, "loss": 0.7049, "step": 4302 }, { "epoch": 0.1256313684271992, "grad_norm": 0.6947770024619799, "learning_rate": 4.8577453365774534e-05, "loss": 0.6774, "step": 4303 }, { "epoch": 0.12566056465504657, "grad_norm": 0.6544274221975674, "learning_rate": 4.8575831305758316e-05, "loss": 0.6871, "step": 4304 }, { "epoch": 0.12568976088289394, "grad_norm": 0.663397054761009, "learning_rate": 4.85742092457421e-05, "loss": 0.7638, "step": 4305 }, { "epoch": 0.1257189571107413, "grad_norm": 0.7081107366012472, "learning_rate": 4.857258718572588e-05, "loss": 0.847, "step": 4306 }, { "epoch": 0.12574815333858866, "grad_norm": 0.6118052330651125, "learning_rate": 4.8570965125709654e-05, "loss": 0.6826, "step": 4307 }, { "epoch": 0.12577734956643602, "grad_norm": 0.6963555336315161, "learning_rate": 4.8569343065693436e-05, "loss": 0.7089, "step": 4308 }, { "epoch": 0.12580654579428338, "grad_norm": 0.612161867723125, "learning_rate": 4.856772100567721e-05, "loss": 0.665, "step": 4309 }, { "epoch": 0.12583574202213074, "grad_norm": 0.628268849586153, "learning_rate": 4.856609894566099e-05, "loss": 0.7105, "step": 4310 }, { "epoch": 0.1258649382499781, "grad_norm": 0.7498998239850786, "learning_rate": 4.8564476885644775e-05, "loss": 0.6224, "step": 4311 }, { "epoch": 0.12589413447782546, "grad_norm": 0.582595523658195, "learning_rate": 4.856285482562855e-05, "loss": 0.6157, "step": 4312 }, { "epoch": 0.12592333070567283, "grad_norm": 0.7235257705702719, "learning_rate": 4.856123276561233e-05, "loss": 0.703, "step": 4313 }, { "epoch": 0.1259525269335202, "grad_norm": 0.6153419262459165, "learning_rate": 4.8559610705596106e-05, "loss": 0.6847, "step": 4314 }, { "epoch": 0.12598172316136755, "grad_norm": 0.6541826917423723, "learning_rate": 4.855798864557989e-05, "loss": 0.7461, "step": 4315 }, { "epoch": 0.1260109193892149, "grad_norm": 0.6717643108752003, "learning_rate": 4.855636658556367e-05, "loss": 0.6654, "step": 4316 }, { "epoch": 0.12604011561706227, "grad_norm": 0.5679884780506274, "learning_rate": 4.8554744525547445e-05, "loss": 0.6105, "step": 4317 }, { "epoch": 0.12606931184490963, "grad_norm": 0.6119652161898188, "learning_rate": 4.855312246553123e-05, "loss": 0.7045, "step": 4318 }, { "epoch": 0.126098508072757, "grad_norm": 0.6160470944431817, "learning_rate": 4.8551500405515e-05, "loss": 0.6761, "step": 4319 }, { "epoch": 0.12612770430060435, "grad_norm": 0.6788825050350039, "learning_rate": 4.8549878345498783e-05, "loss": 0.7819, "step": 4320 }, { "epoch": 0.12615690052845172, "grad_norm": 0.5719765973725764, "learning_rate": 4.8548256285482565e-05, "loss": 0.5977, "step": 4321 }, { "epoch": 0.12618609675629908, "grad_norm": 0.7180635826428053, "learning_rate": 4.854663422546634e-05, "loss": 0.6885, "step": 4322 }, { "epoch": 0.12621529298414644, "grad_norm": 0.6459441357909826, "learning_rate": 4.854501216545013e-05, "loss": 0.691, "step": 4323 }, { "epoch": 0.1262444892119938, "grad_norm": 0.6156527097427353, "learning_rate": 4.8543390105433904e-05, "loss": 0.6029, "step": 4324 }, { "epoch": 0.12627368543984116, "grad_norm": 0.5937186933459625, "learning_rate": 4.8541768045417686e-05, "loss": 0.6266, "step": 4325 }, { "epoch": 0.12630288166768852, "grad_norm": 0.641313622544928, "learning_rate": 4.854014598540147e-05, "loss": 0.7299, "step": 4326 }, { "epoch": 0.1263320778955359, "grad_norm": 0.5999410263847781, "learning_rate": 4.853852392538524e-05, "loss": 0.6127, "step": 4327 }, { "epoch": 0.12636127412338327, "grad_norm": 0.665260036814714, "learning_rate": 4.8536901865369024e-05, "loss": 0.7525, "step": 4328 }, { "epoch": 0.12639047035123063, "grad_norm": 0.6722570453609156, "learning_rate": 4.85352798053528e-05, "loss": 0.718, "step": 4329 }, { "epoch": 0.126419666579078, "grad_norm": 0.6544188793582753, "learning_rate": 4.853365774533658e-05, "loss": 0.7164, "step": 4330 }, { "epoch": 0.12644886280692536, "grad_norm": 0.608459018721037, "learning_rate": 4.853203568532036e-05, "loss": 0.6215, "step": 4331 }, { "epoch": 0.12647805903477272, "grad_norm": 0.6113270174919899, "learning_rate": 4.853041362530414e-05, "loss": 0.7083, "step": 4332 }, { "epoch": 0.12650725526262008, "grad_norm": 0.6513672364495666, "learning_rate": 4.852879156528792e-05, "loss": 0.6797, "step": 4333 }, { "epoch": 0.12653645149046744, "grad_norm": 0.5750122056064484, "learning_rate": 4.8527169505271694e-05, "loss": 0.6182, "step": 4334 }, { "epoch": 0.1265656477183148, "grad_norm": 0.6113409801399077, "learning_rate": 4.8525547445255476e-05, "loss": 0.6842, "step": 4335 }, { "epoch": 0.12659484394616216, "grad_norm": 0.5878779922417012, "learning_rate": 4.852392538523926e-05, "loss": 0.59, "step": 4336 }, { "epoch": 0.12662404017400952, "grad_norm": 0.6741533230638737, "learning_rate": 4.852230332522303e-05, "loss": 0.7187, "step": 4337 }, { "epoch": 0.12665323640185688, "grad_norm": 0.6328628018903121, "learning_rate": 4.8520681265206815e-05, "loss": 0.7087, "step": 4338 }, { "epoch": 0.12668243262970424, "grad_norm": 0.5830406116820961, "learning_rate": 4.851905920519059e-05, "loss": 0.5872, "step": 4339 }, { "epoch": 0.1267116288575516, "grad_norm": 0.5932977679942302, "learning_rate": 4.851743714517437e-05, "loss": 0.6789, "step": 4340 }, { "epoch": 0.12674082508539897, "grad_norm": 0.7319514137104293, "learning_rate": 4.851581508515815e-05, "loss": 0.7369, "step": 4341 }, { "epoch": 0.12677002131324633, "grad_norm": 0.6573626754908208, "learning_rate": 4.8514193025141935e-05, "loss": 0.7504, "step": 4342 }, { "epoch": 0.1267992175410937, "grad_norm": 0.6858353923832694, "learning_rate": 4.8512570965125717e-05, "loss": 0.6933, "step": 4343 }, { "epoch": 0.12682841376894105, "grad_norm": 0.6299936402369152, "learning_rate": 4.851094890510949e-05, "loss": 0.6745, "step": 4344 }, { "epoch": 0.1268576099967884, "grad_norm": 0.581178552470194, "learning_rate": 4.850932684509327e-05, "loss": 0.5916, "step": 4345 }, { "epoch": 0.12688680622463577, "grad_norm": 0.6379643052511855, "learning_rate": 4.850770478507705e-05, "loss": 0.6936, "step": 4346 }, { "epoch": 0.12691600245248313, "grad_norm": 0.6534532829951246, "learning_rate": 4.850608272506083e-05, "loss": 0.697, "step": 4347 }, { "epoch": 0.1269451986803305, "grad_norm": 0.7937867468177271, "learning_rate": 4.850446066504461e-05, "loss": 0.7325, "step": 4348 }, { "epoch": 0.12697439490817786, "grad_norm": 0.6235188900265033, "learning_rate": 4.850283860502839e-05, "loss": 0.6131, "step": 4349 }, { "epoch": 0.12700359113602522, "grad_norm": 0.6218707986809708, "learning_rate": 4.850121654501217e-05, "loss": 0.6596, "step": 4350 }, { "epoch": 0.12703278736387258, "grad_norm": 0.6361512809044645, "learning_rate": 4.849959448499595e-05, "loss": 0.7438, "step": 4351 }, { "epoch": 0.12706198359171994, "grad_norm": 0.6427506500867064, "learning_rate": 4.8497972424979725e-05, "loss": 0.6493, "step": 4352 }, { "epoch": 0.1270911798195673, "grad_norm": 0.6721518058820122, "learning_rate": 4.849635036496351e-05, "loss": 0.8533, "step": 4353 }, { "epoch": 0.12712037604741466, "grad_norm": 0.6472864301219611, "learning_rate": 4.849472830494728e-05, "loss": 0.6824, "step": 4354 }, { "epoch": 0.12714957227526202, "grad_norm": 0.630887821198275, "learning_rate": 4.8493106244931064e-05, "loss": 0.6511, "step": 4355 }, { "epoch": 0.12717876850310939, "grad_norm": 0.6797802012091057, "learning_rate": 4.8491484184914846e-05, "loss": 0.7126, "step": 4356 }, { "epoch": 0.12720796473095677, "grad_norm": 0.6863877102808436, "learning_rate": 4.848986212489862e-05, "loss": 0.6985, "step": 4357 }, { "epoch": 0.12723716095880414, "grad_norm": 0.8671333509486989, "learning_rate": 4.84882400648824e-05, "loss": 0.7073, "step": 4358 }, { "epoch": 0.1272663571866515, "grad_norm": 0.6781363100445359, "learning_rate": 4.848661800486618e-05, "loss": 0.675, "step": 4359 }, { "epoch": 0.12729555341449886, "grad_norm": 0.6783972101573482, "learning_rate": 4.848499594484996e-05, "loss": 0.7048, "step": 4360 }, { "epoch": 0.12732474964234622, "grad_norm": 0.6419526665093404, "learning_rate": 4.848337388483374e-05, "loss": 0.7331, "step": 4361 }, { "epoch": 0.12735394587019358, "grad_norm": 0.64595698608064, "learning_rate": 4.848175182481752e-05, "loss": 0.6682, "step": 4362 }, { "epoch": 0.12738314209804094, "grad_norm": 0.638235495101104, "learning_rate": 4.8480129764801304e-05, "loss": 0.6722, "step": 4363 }, { "epoch": 0.1274123383258883, "grad_norm": 0.665667655456827, "learning_rate": 4.847850770478508e-05, "loss": 0.6108, "step": 4364 }, { "epoch": 0.12744153455373566, "grad_norm": 0.5884932535334929, "learning_rate": 4.847688564476886e-05, "loss": 0.59, "step": 4365 }, { "epoch": 0.12747073078158302, "grad_norm": 0.6781787572555853, "learning_rate": 4.8475263584752636e-05, "loss": 0.7761, "step": 4366 }, { "epoch": 0.12749992700943039, "grad_norm": 0.6526946470797936, "learning_rate": 4.847364152473642e-05, "loss": 0.7072, "step": 4367 }, { "epoch": 0.12752912323727775, "grad_norm": 0.6200920347331786, "learning_rate": 4.84720194647202e-05, "loss": 0.6963, "step": 4368 }, { "epoch": 0.1275583194651251, "grad_norm": 0.6734945271042958, "learning_rate": 4.8470397404703975e-05, "loss": 0.7525, "step": 4369 }, { "epoch": 0.12758751569297247, "grad_norm": 0.5749513586904819, "learning_rate": 4.8468775344687756e-05, "loss": 0.6185, "step": 4370 }, { "epoch": 0.12761671192081983, "grad_norm": 0.8011244302293168, "learning_rate": 4.846715328467154e-05, "loss": 0.7436, "step": 4371 }, { "epoch": 0.1276459081486672, "grad_norm": 0.697296095964778, "learning_rate": 4.846553122465531e-05, "loss": 0.7755, "step": 4372 }, { "epoch": 0.12767510437651455, "grad_norm": 0.6426716137645776, "learning_rate": 4.8463909164639095e-05, "loss": 0.7108, "step": 4373 }, { "epoch": 0.12770430060436191, "grad_norm": 0.578079575830548, "learning_rate": 4.846228710462287e-05, "loss": 0.6157, "step": 4374 }, { "epoch": 0.12773349683220928, "grad_norm": 0.6104748238133362, "learning_rate": 4.846066504460665e-05, "loss": 0.6814, "step": 4375 }, { "epoch": 0.12776269306005664, "grad_norm": 0.609223752589127, "learning_rate": 4.8459042984590433e-05, "loss": 0.6431, "step": 4376 }, { "epoch": 0.127791889287904, "grad_norm": 0.6043440781571324, "learning_rate": 4.845742092457421e-05, "loss": 0.5976, "step": 4377 }, { "epoch": 0.12782108551575136, "grad_norm": 0.5917948550944045, "learning_rate": 4.845579886455799e-05, "loss": 0.6043, "step": 4378 }, { "epoch": 0.12785028174359872, "grad_norm": 0.5736184319156817, "learning_rate": 4.8454176804541765e-05, "loss": 0.6083, "step": 4379 }, { "epoch": 0.12787947797144608, "grad_norm": 0.6053520764502378, "learning_rate": 4.8452554744525554e-05, "loss": 0.6953, "step": 4380 }, { "epoch": 0.12790867419929344, "grad_norm": 0.6126572344752148, "learning_rate": 4.845093268450933e-05, "loss": 0.581, "step": 4381 }, { "epoch": 0.1279378704271408, "grad_norm": 0.7744937676411395, "learning_rate": 4.844931062449311e-05, "loss": 0.6001, "step": 4382 }, { "epoch": 0.12796706665498817, "grad_norm": 0.6593548864981102, "learning_rate": 4.844768856447689e-05, "loss": 0.6818, "step": 4383 }, { "epoch": 0.12799626288283553, "grad_norm": 0.6316105289955269, "learning_rate": 4.844606650446067e-05, "loss": 0.65, "step": 4384 }, { "epoch": 0.1280254591106829, "grad_norm": 0.639455781750111, "learning_rate": 4.844444444444445e-05, "loss": 0.744, "step": 4385 }, { "epoch": 0.12805465533853025, "grad_norm": 0.6446861890548297, "learning_rate": 4.8442822384428224e-05, "loss": 0.7335, "step": 4386 }, { "epoch": 0.12808385156637764, "grad_norm": 0.7216219785127642, "learning_rate": 4.8441200324412006e-05, "loss": 0.6385, "step": 4387 }, { "epoch": 0.128113047794225, "grad_norm": 0.6026047493055995, "learning_rate": 4.843957826439579e-05, "loss": 0.6019, "step": 4388 }, { "epoch": 0.12814224402207236, "grad_norm": 0.6189574521840744, "learning_rate": 4.843795620437956e-05, "loss": 0.6603, "step": 4389 }, { "epoch": 0.12817144024991972, "grad_norm": 0.6037753210718411, "learning_rate": 4.8436334144363344e-05, "loss": 0.6498, "step": 4390 }, { "epoch": 0.12820063647776708, "grad_norm": 0.6094493168788976, "learning_rate": 4.843471208434712e-05, "loss": 0.6859, "step": 4391 }, { "epoch": 0.12822983270561444, "grad_norm": 0.634754324441202, "learning_rate": 4.84330900243309e-05, "loss": 0.6646, "step": 4392 }, { "epoch": 0.1282590289334618, "grad_norm": 0.6597487248972204, "learning_rate": 4.843146796431468e-05, "loss": 0.7271, "step": 4393 }, { "epoch": 0.12828822516130917, "grad_norm": 0.6373324767624329, "learning_rate": 4.842984590429846e-05, "loss": 0.6211, "step": 4394 }, { "epoch": 0.12831742138915653, "grad_norm": 0.6469066829657851, "learning_rate": 4.842822384428224e-05, "loss": 0.6654, "step": 4395 }, { "epoch": 0.1283466176170039, "grad_norm": 1.3146777223690675, "learning_rate": 4.842660178426602e-05, "loss": 0.7893, "step": 4396 }, { "epoch": 0.12837581384485125, "grad_norm": 0.6293359975561836, "learning_rate": 4.8424979724249796e-05, "loss": 0.7011, "step": 4397 }, { "epoch": 0.1284050100726986, "grad_norm": 0.6518447105703903, "learning_rate": 4.842335766423358e-05, "loss": 0.6428, "step": 4398 }, { "epoch": 0.12843420630054597, "grad_norm": 0.6330998362801523, "learning_rate": 4.842173560421736e-05, "loss": 0.6999, "step": 4399 }, { "epoch": 0.12846340252839333, "grad_norm": 0.5958949533361846, "learning_rate": 4.842011354420114e-05, "loss": 0.6107, "step": 4400 }, { "epoch": 0.1284925987562407, "grad_norm": 0.6230638601055055, "learning_rate": 4.8418491484184916e-05, "loss": 0.6943, "step": 4401 }, { "epoch": 0.12852179498408806, "grad_norm": 0.6636145841623888, "learning_rate": 4.84168694241687e-05, "loss": 0.7244, "step": 4402 }, { "epoch": 0.12855099121193542, "grad_norm": 0.6585532204565605, "learning_rate": 4.841524736415248e-05, "loss": 0.7257, "step": 4403 }, { "epoch": 0.12858018743978278, "grad_norm": 0.6556091587359911, "learning_rate": 4.8413625304136255e-05, "loss": 0.6589, "step": 4404 }, { "epoch": 0.12860938366763014, "grad_norm": 0.6605306943090385, "learning_rate": 4.841200324412004e-05, "loss": 0.7236, "step": 4405 }, { "epoch": 0.1286385798954775, "grad_norm": 0.6552312932541516, "learning_rate": 4.841038118410381e-05, "loss": 0.7443, "step": 4406 }, { "epoch": 0.12866777612332486, "grad_norm": 0.7014257126965033, "learning_rate": 4.8408759124087593e-05, "loss": 0.8129, "step": 4407 }, { "epoch": 0.12869697235117222, "grad_norm": 0.5613528650538688, "learning_rate": 4.8407137064071375e-05, "loss": 0.6122, "step": 4408 }, { "epoch": 0.12872616857901958, "grad_norm": 0.6586327481965976, "learning_rate": 4.840551500405515e-05, "loss": 0.7485, "step": 4409 }, { "epoch": 0.12875536480686695, "grad_norm": 0.6206226193741783, "learning_rate": 4.840389294403893e-05, "loss": 0.6086, "step": 4410 }, { "epoch": 0.1287845610347143, "grad_norm": 0.5980606630315315, "learning_rate": 4.840227088402271e-05, "loss": 0.6486, "step": 4411 }, { "epoch": 0.12881375726256167, "grad_norm": 0.5734534756258733, "learning_rate": 4.840064882400649e-05, "loss": 0.6413, "step": 4412 }, { "epoch": 0.12884295349040903, "grad_norm": 0.6106641046097415, "learning_rate": 4.839902676399027e-05, "loss": 0.7019, "step": 4413 }, { "epoch": 0.1288721497182564, "grad_norm": 0.6528237586466685, "learning_rate": 4.8397404703974045e-05, "loss": 0.6771, "step": 4414 }, { "epoch": 0.12890134594610375, "grad_norm": 0.5371069985641155, "learning_rate": 4.839578264395783e-05, "loss": 0.5645, "step": 4415 }, { "epoch": 0.1289305421739511, "grad_norm": 0.6241583326746633, "learning_rate": 4.839416058394161e-05, "loss": 0.7052, "step": 4416 }, { "epoch": 0.12895973840179847, "grad_norm": 0.6750710329971772, "learning_rate": 4.8392538523925384e-05, "loss": 0.7286, "step": 4417 }, { "epoch": 0.12898893462964586, "grad_norm": 0.6439769544335647, "learning_rate": 4.839091646390917e-05, "loss": 0.6806, "step": 4418 }, { "epoch": 0.12901813085749322, "grad_norm": 0.6249825197606278, "learning_rate": 4.838929440389295e-05, "loss": 0.709, "step": 4419 }, { "epoch": 0.12904732708534059, "grad_norm": 0.5950496596877753, "learning_rate": 4.838767234387673e-05, "loss": 0.6058, "step": 4420 }, { "epoch": 0.12907652331318795, "grad_norm": 0.6279425685442894, "learning_rate": 4.8386050283860504e-05, "loss": 0.699, "step": 4421 }, { "epoch": 0.1291057195410353, "grad_norm": 0.6366982108283236, "learning_rate": 4.8384428223844286e-05, "loss": 0.713, "step": 4422 }, { "epoch": 0.12913491576888267, "grad_norm": 0.5864035799617333, "learning_rate": 4.838280616382807e-05, "loss": 0.6354, "step": 4423 }, { "epoch": 0.12916411199673003, "grad_norm": 0.6258582594525808, "learning_rate": 4.838118410381184e-05, "loss": 0.6856, "step": 4424 }, { "epoch": 0.1291933082245774, "grad_norm": 0.7657449250403761, "learning_rate": 4.8379562043795625e-05, "loss": 0.6912, "step": 4425 }, { "epoch": 0.12922250445242475, "grad_norm": 0.7084657499730249, "learning_rate": 4.83779399837794e-05, "loss": 0.7271, "step": 4426 }, { "epoch": 0.1292517006802721, "grad_norm": 0.6549273656453487, "learning_rate": 4.837631792376318e-05, "loss": 0.7243, "step": 4427 }, { "epoch": 0.12928089690811947, "grad_norm": 0.6140492930770151, "learning_rate": 4.837469586374696e-05, "loss": 0.6615, "step": 4428 }, { "epoch": 0.12931009313596684, "grad_norm": 0.6010316932333747, "learning_rate": 4.837307380373074e-05, "loss": 0.6544, "step": 4429 }, { "epoch": 0.1293392893638142, "grad_norm": 0.6679665997371717, "learning_rate": 4.837145174371452e-05, "loss": 0.6853, "step": 4430 }, { "epoch": 0.12936848559166156, "grad_norm": 0.6377351056157475, "learning_rate": 4.8369829683698295e-05, "loss": 0.769, "step": 4431 }, { "epoch": 0.12939768181950892, "grad_norm": 0.6232158042652702, "learning_rate": 4.8368207623682077e-05, "loss": 0.6527, "step": 4432 }, { "epoch": 0.12942687804735628, "grad_norm": 0.5891666968013435, "learning_rate": 4.836658556366586e-05, "loss": 0.659, "step": 4433 }, { "epoch": 0.12945607427520364, "grad_norm": 0.5999968216415553, "learning_rate": 4.836496350364963e-05, "loss": 0.6295, "step": 4434 }, { "epoch": 0.129485270503051, "grad_norm": 0.6391681798947177, "learning_rate": 4.8363341443633415e-05, "loss": 0.7511, "step": 4435 }, { "epoch": 0.12951446673089836, "grad_norm": 0.6339792653941662, "learning_rate": 4.83617193836172e-05, "loss": 0.7084, "step": 4436 }, { "epoch": 0.12954366295874573, "grad_norm": 0.6145486745458135, "learning_rate": 4.836009732360098e-05, "loss": 0.6643, "step": 4437 }, { "epoch": 0.1295728591865931, "grad_norm": 0.6227848010202296, "learning_rate": 4.835847526358476e-05, "loss": 0.7161, "step": 4438 }, { "epoch": 0.12960205541444045, "grad_norm": 0.5795752476365493, "learning_rate": 4.8356853203568535e-05, "loss": 0.628, "step": 4439 }, { "epoch": 0.1296312516422878, "grad_norm": 0.5900335616174499, "learning_rate": 4.835523114355232e-05, "loss": 0.6448, "step": 4440 }, { "epoch": 0.12966044787013517, "grad_norm": 0.6269150500525477, "learning_rate": 4.835360908353609e-05, "loss": 0.6753, "step": 4441 }, { "epoch": 0.12968964409798253, "grad_norm": 0.6023859013772986, "learning_rate": 4.8351987023519874e-05, "loss": 0.7243, "step": 4442 }, { "epoch": 0.1297188403258299, "grad_norm": 0.8229173434370003, "learning_rate": 4.8350364963503656e-05, "loss": 0.8241, "step": 4443 }, { "epoch": 0.12974803655367725, "grad_norm": 0.627017701062934, "learning_rate": 4.834874290348743e-05, "loss": 0.6341, "step": 4444 }, { "epoch": 0.12977723278152462, "grad_norm": 0.6117431214266694, "learning_rate": 4.834712084347121e-05, "loss": 0.6396, "step": 4445 }, { "epoch": 0.12980642900937198, "grad_norm": 0.7069430813353051, "learning_rate": 4.834549878345499e-05, "loss": 0.771, "step": 4446 }, { "epoch": 0.12983562523721934, "grad_norm": 0.6027286215521435, "learning_rate": 4.834387672343877e-05, "loss": 0.6427, "step": 4447 }, { "epoch": 0.12986482146506673, "grad_norm": 0.6266499049276218, "learning_rate": 4.834225466342255e-05, "loss": 0.6397, "step": 4448 }, { "epoch": 0.1298940176929141, "grad_norm": 0.5867922452775913, "learning_rate": 4.8340632603406326e-05, "loss": 0.6492, "step": 4449 }, { "epoch": 0.12992321392076145, "grad_norm": 0.5580209288158048, "learning_rate": 4.833901054339011e-05, "loss": 0.577, "step": 4450 }, { "epoch": 0.1299524101486088, "grad_norm": 0.6578365877703293, "learning_rate": 4.833738848337388e-05, "loss": 0.7395, "step": 4451 }, { "epoch": 0.12998160637645617, "grad_norm": 0.6285985496232657, "learning_rate": 4.8335766423357664e-05, "loss": 0.6644, "step": 4452 }, { "epoch": 0.13001080260430353, "grad_norm": 0.6106415734525616, "learning_rate": 4.8334144363341446e-05, "loss": 0.6311, "step": 4453 }, { "epoch": 0.1300399988321509, "grad_norm": 0.6174768423960877, "learning_rate": 4.833252230332522e-05, "loss": 0.6634, "step": 4454 }, { "epoch": 0.13006919505999825, "grad_norm": 0.6067867060885913, "learning_rate": 4.833090024330901e-05, "loss": 0.6338, "step": 4455 }, { "epoch": 0.13009839128784562, "grad_norm": 0.5815284084812685, "learning_rate": 4.8329278183292785e-05, "loss": 0.6342, "step": 4456 }, { "epoch": 0.13012758751569298, "grad_norm": 0.5598930313183638, "learning_rate": 4.8327656123276566e-05, "loss": 0.5667, "step": 4457 }, { "epoch": 0.13015678374354034, "grad_norm": 0.6830912329890017, "learning_rate": 4.832603406326035e-05, "loss": 0.7869, "step": 4458 }, { "epoch": 0.1301859799713877, "grad_norm": 0.6249808813088927, "learning_rate": 4.832441200324412e-05, "loss": 0.6556, "step": 4459 }, { "epoch": 0.13021517619923506, "grad_norm": 0.6180430467714406, "learning_rate": 4.8322789943227905e-05, "loss": 0.6864, "step": 4460 }, { "epoch": 0.13024437242708242, "grad_norm": 0.623300552301565, "learning_rate": 4.832116788321168e-05, "loss": 0.6138, "step": 4461 }, { "epoch": 0.13027356865492978, "grad_norm": 0.624839561298944, "learning_rate": 4.831954582319546e-05, "loss": 0.6799, "step": 4462 }, { "epoch": 0.13030276488277714, "grad_norm": 0.7055101864194121, "learning_rate": 4.8317923763179243e-05, "loss": 0.7123, "step": 4463 }, { "epoch": 0.1303319611106245, "grad_norm": 0.6423021572363227, "learning_rate": 4.831630170316302e-05, "loss": 0.7073, "step": 4464 }, { "epoch": 0.13036115733847187, "grad_norm": 0.602932657618116, "learning_rate": 4.83146796431468e-05, "loss": 0.6924, "step": 4465 }, { "epoch": 0.13039035356631923, "grad_norm": 0.6281299152893828, "learning_rate": 4.8313057583130575e-05, "loss": 0.7308, "step": 4466 }, { "epoch": 0.1304195497941666, "grad_norm": 0.646224949074376, "learning_rate": 4.831143552311436e-05, "loss": 0.7285, "step": 4467 }, { "epoch": 0.13044874602201395, "grad_norm": 0.6832662125975801, "learning_rate": 4.830981346309814e-05, "loss": 0.7445, "step": 4468 }, { "epoch": 0.1304779422498613, "grad_norm": 0.6612154659000808, "learning_rate": 4.8308191403081914e-05, "loss": 0.6768, "step": 4469 }, { "epoch": 0.13050713847770867, "grad_norm": 0.6092159014762336, "learning_rate": 4.8306569343065695e-05, "loss": 0.657, "step": 4470 }, { "epoch": 0.13053633470555603, "grad_norm": 0.5988631572185878, "learning_rate": 4.830494728304947e-05, "loss": 0.6857, "step": 4471 }, { "epoch": 0.1305655309334034, "grad_norm": 0.6029956606030195, "learning_rate": 4.830332522303325e-05, "loss": 0.6712, "step": 4472 }, { "epoch": 0.13059472716125076, "grad_norm": 0.7326179146375563, "learning_rate": 4.8301703163017034e-05, "loss": 0.7178, "step": 4473 }, { "epoch": 0.13062392338909812, "grad_norm": 0.5816600582360877, "learning_rate": 4.8300081103000816e-05, "loss": 0.6104, "step": 4474 }, { "epoch": 0.13065311961694548, "grad_norm": 0.5936713349811935, "learning_rate": 4.82984590429846e-05, "loss": 0.641, "step": 4475 }, { "epoch": 0.13068231584479284, "grad_norm": 0.6584626104032792, "learning_rate": 4.829683698296837e-05, "loss": 0.659, "step": 4476 }, { "epoch": 0.1307115120726402, "grad_norm": 0.5834056766165513, "learning_rate": 4.8295214922952154e-05, "loss": 0.6168, "step": 4477 }, { "epoch": 0.1307407083004876, "grad_norm": 0.6485234889303626, "learning_rate": 4.829359286293593e-05, "loss": 0.7364, "step": 4478 }, { "epoch": 0.13076990452833495, "grad_norm": 0.6079025480402221, "learning_rate": 4.829197080291971e-05, "loss": 0.6382, "step": 4479 }, { "epoch": 0.1307991007561823, "grad_norm": 0.7473720128973651, "learning_rate": 4.829034874290349e-05, "loss": 0.7911, "step": 4480 }, { "epoch": 0.13082829698402967, "grad_norm": 0.6384787114068426, "learning_rate": 4.828872668288727e-05, "loss": 0.6245, "step": 4481 }, { "epoch": 0.13085749321187704, "grad_norm": 0.6568517037383979, "learning_rate": 4.828710462287105e-05, "loss": 0.8027, "step": 4482 }, { "epoch": 0.1308866894397244, "grad_norm": 0.6692384826976379, "learning_rate": 4.828548256285483e-05, "loss": 0.6701, "step": 4483 }, { "epoch": 0.13091588566757176, "grad_norm": 0.645183755539454, "learning_rate": 4.8283860502838606e-05, "loss": 0.6925, "step": 4484 }, { "epoch": 0.13094508189541912, "grad_norm": 0.6596768483451105, "learning_rate": 4.828223844282239e-05, "loss": 0.7893, "step": 4485 }, { "epoch": 0.13097427812326648, "grad_norm": 0.6049272439902046, "learning_rate": 4.828061638280616e-05, "loss": 0.699, "step": 4486 }, { "epoch": 0.13100347435111384, "grad_norm": 0.6938033004168296, "learning_rate": 4.8278994322789945e-05, "loss": 0.6786, "step": 4487 }, { "epoch": 0.1310326705789612, "grad_norm": 0.6598515428775695, "learning_rate": 4.8277372262773726e-05, "loss": 0.7289, "step": 4488 }, { "epoch": 0.13106186680680856, "grad_norm": 0.6441427698601828, "learning_rate": 4.82757502027575e-05, "loss": 0.7011, "step": 4489 }, { "epoch": 0.13109106303465592, "grad_norm": 0.6443610322513909, "learning_rate": 4.827412814274128e-05, "loss": 0.7082, "step": 4490 }, { "epoch": 0.13112025926250329, "grad_norm": 0.6211891260034994, "learning_rate": 4.827250608272506e-05, "loss": 0.6958, "step": 4491 }, { "epoch": 0.13114945549035065, "grad_norm": 0.6211091124745971, "learning_rate": 4.827088402270884e-05, "loss": 0.6893, "step": 4492 }, { "epoch": 0.131178651718198, "grad_norm": 0.6336302962861832, "learning_rate": 4.826926196269262e-05, "loss": 0.6713, "step": 4493 }, { "epoch": 0.13120784794604537, "grad_norm": 0.6940516106245044, "learning_rate": 4.8267639902676404e-05, "loss": 0.6999, "step": 4494 }, { "epoch": 0.13123704417389273, "grad_norm": 0.5987309916236974, "learning_rate": 4.8266017842660185e-05, "loss": 0.631, "step": 4495 }, { "epoch": 0.1312662404017401, "grad_norm": 0.8006407397982682, "learning_rate": 4.826439578264396e-05, "loss": 0.8845, "step": 4496 }, { "epoch": 0.13129543662958745, "grad_norm": 0.5754497075324629, "learning_rate": 4.826277372262774e-05, "loss": 0.6196, "step": 4497 }, { "epoch": 0.13132463285743481, "grad_norm": 0.6261922573681434, "learning_rate": 4.826115166261152e-05, "loss": 0.7502, "step": 4498 }, { "epoch": 0.13135382908528218, "grad_norm": 0.5933662913868389, "learning_rate": 4.82595296025953e-05, "loss": 0.6825, "step": 4499 }, { "epoch": 0.13138302531312954, "grad_norm": 0.6184811967816961, "learning_rate": 4.825790754257908e-05, "loss": 0.6684, "step": 4500 }, { "epoch": 0.1314122215409769, "grad_norm": 0.6125748904735776, "learning_rate": 4.8256285482562856e-05, "loss": 0.6876, "step": 4501 }, { "epoch": 0.13144141776882426, "grad_norm": 0.607018690884306, "learning_rate": 4.825466342254664e-05, "loss": 0.5935, "step": 4502 }, { "epoch": 0.13147061399667162, "grad_norm": 0.613813160683396, "learning_rate": 4.825304136253041e-05, "loss": 0.6226, "step": 4503 }, { "epoch": 0.13149981022451898, "grad_norm": 0.6699283996327625, "learning_rate": 4.8251419302514194e-05, "loss": 0.7994, "step": 4504 }, { "epoch": 0.13152900645236634, "grad_norm": 0.605924911330662, "learning_rate": 4.8249797242497976e-05, "loss": 0.6368, "step": 4505 }, { "epoch": 0.1315582026802137, "grad_norm": 0.5659100030810652, "learning_rate": 4.824817518248175e-05, "loss": 0.5994, "step": 4506 }, { "epoch": 0.13158739890806107, "grad_norm": 0.6087725196733466, "learning_rate": 4.824655312246553e-05, "loss": 0.7011, "step": 4507 }, { "epoch": 0.13161659513590845, "grad_norm": 0.619933137346019, "learning_rate": 4.8244931062449314e-05, "loss": 0.5722, "step": 4508 }, { "epoch": 0.13164579136375582, "grad_norm": 0.6411400520066661, "learning_rate": 4.824330900243309e-05, "loss": 0.7445, "step": 4509 }, { "epoch": 0.13167498759160318, "grad_norm": 0.6026244687842075, "learning_rate": 4.824168694241687e-05, "loss": 0.6447, "step": 4510 }, { "epoch": 0.13170418381945054, "grad_norm": 0.6993601355957733, "learning_rate": 4.8240064882400646e-05, "loss": 0.7711, "step": 4511 }, { "epoch": 0.1317333800472979, "grad_norm": 0.5829100383567416, "learning_rate": 4.8238442822384435e-05, "loss": 0.6241, "step": 4512 }, { "epoch": 0.13176257627514526, "grad_norm": 0.6313311452614776, "learning_rate": 4.823682076236821e-05, "loss": 0.6997, "step": 4513 }, { "epoch": 0.13179177250299262, "grad_norm": 0.6748703554104359, "learning_rate": 4.823519870235199e-05, "loss": 0.7422, "step": 4514 }, { "epoch": 0.13182096873083998, "grad_norm": 0.6972698088840944, "learning_rate": 4.823357664233577e-05, "loss": 0.7322, "step": 4515 }, { "epoch": 0.13185016495868734, "grad_norm": 0.6080781688557763, "learning_rate": 4.823195458231955e-05, "loss": 0.646, "step": 4516 }, { "epoch": 0.1318793611865347, "grad_norm": 0.6029963614252295, "learning_rate": 4.823033252230333e-05, "loss": 0.5977, "step": 4517 }, { "epoch": 0.13190855741438207, "grad_norm": 0.7086322576583305, "learning_rate": 4.8228710462287105e-05, "loss": 0.7053, "step": 4518 }, { "epoch": 0.13193775364222943, "grad_norm": 0.6238339129170847, "learning_rate": 4.8227088402270887e-05, "loss": 0.687, "step": 4519 }, { "epoch": 0.1319669498700768, "grad_norm": 0.5835767910308298, "learning_rate": 4.822546634225467e-05, "loss": 0.641, "step": 4520 }, { "epoch": 0.13199614609792415, "grad_norm": 0.6716896310267239, "learning_rate": 4.822384428223844e-05, "loss": 0.695, "step": 4521 }, { "epoch": 0.1320253423257715, "grad_norm": 0.6234220246066126, "learning_rate": 4.8222222222222225e-05, "loss": 0.6881, "step": 4522 }, { "epoch": 0.13205453855361887, "grad_norm": 0.5930917069257323, "learning_rate": 4.8220600162206e-05, "loss": 0.6259, "step": 4523 }, { "epoch": 0.13208373478146623, "grad_norm": 0.6586529881164992, "learning_rate": 4.821897810218978e-05, "loss": 0.74, "step": 4524 }, { "epoch": 0.1321129310093136, "grad_norm": 0.6096065723803393, "learning_rate": 4.8217356042173564e-05, "loss": 0.6659, "step": 4525 }, { "epoch": 0.13214212723716096, "grad_norm": 0.6217551276446756, "learning_rate": 4.821573398215734e-05, "loss": 0.66, "step": 4526 }, { "epoch": 0.13217132346500832, "grad_norm": 0.6066795610706618, "learning_rate": 4.821411192214112e-05, "loss": 0.6428, "step": 4527 }, { "epoch": 0.13220051969285568, "grad_norm": 0.6365455998899747, "learning_rate": 4.82124898621249e-05, "loss": 0.7192, "step": 4528 }, { "epoch": 0.13222971592070304, "grad_norm": 0.6306744106403396, "learning_rate": 4.821086780210868e-05, "loss": 0.7112, "step": 4529 }, { "epoch": 0.1322589121485504, "grad_norm": 0.868809081267534, "learning_rate": 4.820924574209246e-05, "loss": 0.8259, "step": 4530 }, { "epoch": 0.13228810837639776, "grad_norm": 0.6437512797402283, "learning_rate": 4.820762368207624e-05, "loss": 0.7109, "step": 4531 }, { "epoch": 0.13231730460424512, "grad_norm": 0.632363901413271, "learning_rate": 4.820600162206002e-05, "loss": 0.69, "step": 4532 }, { "epoch": 0.13234650083209248, "grad_norm": 0.7076004398299849, "learning_rate": 4.82043795620438e-05, "loss": 0.7666, "step": 4533 }, { "epoch": 0.13237569705993985, "grad_norm": 0.5936813182392223, "learning_rate": 4.820275750202758e-05, "loss": 0.637, "step": 4534 }, { "epoch": 0.1324048932877872, "grad_norm": 0.652490470473705, "learning_rate": 4.820113544201136e-05, "loss": 0.7455, "step": 4535 }, { "epoch": 0.13243408951563457, "grad_norm": 0.6522405050432225, "learning_rate": 4.8199513381995136e-05, "loss": 0.7737, "step": 4536 }, { "epoch": 0.13246328574348193, "grad_norm": 0.6052162010095377, "learning_rate": 4.819789132197892e-05, "loss": 0.6684, "step": 4537 }, { "epoch": 0.13249248197132932, "grad_norm": 0.7129808181528415, "learning_rate": 4.819626926196269e-05, "loss": 0.7445, "step": 4538 }, { "epoch": 0.13252167819917668, "grad_norm": 0.6089394515005144, "learning_rate": 4.8194647201946474e-05, "loss": 0.6407, "step": 4539 }, { "epoch": 0.13255087442702404, "grad_norm": 0.7033272135263179, "learning_rate": 4.8193025141930256e-05, "loss": 0.6558, "step": 4540 }, { "epoch": 0.1325800706548714, "grad_norm": 0.6666056797592784, "learning_rate": 4.819140308191403e-05, "loss": 0.7536, "step": 4541 }, { "epoch": 0.13260926688271876, "grad_norm": 0.6359610821782535, "learning_rate": 4.818978102189781e-05, "loss": 0.664, "step": 4542 }, { "epoch": 0.13263846311056612, "grad_norm": 0.627850403988904, "learning_rate": 4.818815896188159e-05, "loss": 0.6688, "step": 4543 }, { "epoch": 0.13266765933841349, "grad_norm": 0.5812817911732109, "learning_rate": 4.818653690186537e-05, "loss": 0.6428, "step": 4544 }, { "epoch": 0.13269685556626085, "grad_norm": 0.5803083321043866, "learning_rate": 4.818491484184915e-05, "loss": 0.6106, "step": 4545 }, { "epoch": 0.1327260517941082, "grad_norm": 0.5706974031550934, "learning_rate": 4.8183292781832926e-05, "loss": 0.6575, "step": 4546 }, { "epoch": 0.13275524802195557, "grad_norm": 0.6130549291329531, "learning_rate": 4.818167072181671e-05, "loss": 0.6918, "step": 4547 }, { "epoch": 0.13278444424980293, "grad_norm": 0.6582022769207737, "learning_rate": 4.818004866180048e-05, "loss": 0.7008, "step": 4548 }, { "epoch": 0.1328136404776503, "grad_norm": 0.6261146367967926, "learning_rate": 4.8178426601784265e-05, "loss": 0.6784, "step": 4549 }, { "epoch": 0.13284283670549765, "grad_norm": 0.6219000802019878, "learning_rate": 4.8176804541768053e-05, "loss": 0.6713, "step": 4550 }, { "epoch": 0.132872032933345, "grad_norm": 0.5638992866379684, "learning_rate": 4.817518248175183e-05, "loss": 0.5901, "step": 4551 }, { "epoch": 0.13290122916119237, "grad_norm": 0.6051625451303938, "learning_rate": 4.817356042173561e-05, "loss": 0.6649, "step": 4552 }, { "epoch": 0.13293042538903974, "grad_norm": 0.6018851195043835, "learning_rate": 4.8171938361719385e-05, "loss": 0.6227, "step": 4553 }, { "epoch": 0.1329596216168871, "grad_norm": 0.6856720756114366, "learning_rate": 4.817031630170317e-05, "loss": 0.7849, "step": 4554 }, { "epoch": 0.13298881784473446, "grad_norm": 0.6172842035189419, "learning_rate": 4.816869424168695e-05, "loss": 0.6138, "step": 4555 }, { "epoch": 0.13301801407258182, "grad_norm": 0.664791653814445, "learning_rate": 4.8167072181670724e-05, "loss": 0.6534, "step": 4556 }, { "epoch": 0.13304721030042918, "grad_norm": 0.652762168479457, "learning_rate": 4.8165450121654505e-05, "loss": 0.6789, "step": 4557 }, { "epoch": 0.13307640652827654, "grad_norm": 0.6303831390779098, "learning_rate": 4.816382806163828e-05, "loss": 0.623, "step": 4558 }, { "epoch": 0.1331056027561239, "grad_norm": 0.6347743694990099, "learning_rate": 4.816220600162206e-05, "loss": 0.6835, "step": 4559 }, { "epoch": 0.13313479898397126, "grad_norm": 0.5666186331108644, "learning_rate": 4.8160583941605844e-05, "loss": 0.5833, "step": 4560 }, { "epoch": 0.13316399521181863, "grad_norm": 0.5724535914448676, "learning_rate": 4.815896188158962e-05, "loss": 0.6164, "step": 4561 }, { "epoch": 0.133193191439666, "grad_norm": 0.5869659473134429, "learning_rate": 4.81573398215734e-05, "loss": 0.6038, "step": 4562 }, { "epoch": 0.13322238766751335, "grad_norm": 0.6087775216553868, "learning_rate": 4.8155717761557176e-05, "loss": 0.6757, "step": 4563 }, { "epoch": 0.1332515838953607, "grad_norm": 0.6177234377582217, "learning_rate": 4.815409570154096e-05, "loss": 0.6758, "step": 4564 }, { "epoch": 0.13328078012320807, "grad_norm": 0.6452264837207677, "learning_rate": 4.815247364152474e-05, "loss": 0.7085, "step": 4565 }, { "epoch": 0.13330997635105543, "grad_norm": 0.6956044493215511, "learning_rate": 4.8150851581508514e-05, "loss": 0.7527, "step": 4566 }, { "epoch": 0.1333391725789028, "grad_norm": 0.6748230198017411, "learning_rate": 4.8149229521492296e-05, "loss": 0.8112, "step": 4567 }, { "epoch": 0.13336836880675018, "grad_norm": 0.6181867127746054, "learning_rate": 4.814760746147607e-05, "loss": 0.6687, "step": 4568 }, { "epoch": 0.13339756503459754, "grad_norm": 0.6457146851953551, "learning_rate": 4.814598540145986e-05, "loss": 0.6997, "step": 4569 }, { "epoch": 0.1334267612624449, "grad_norm": 0.6554874643292737, "learning_rate": 4.814436334144364e-05, "loss": 0.7018, "step": 4570 }, { "epoch": 0.13345595749029227, "grad_norm": 0.6199704375868872, "learning_rate": 4.8142741281427416e-05, "loss": 0.6951, "step": 4571 }, { "epoch": 0.13348515371813963, "grad_norm": 0.643188775876153, "learning_rate": 4.81411192214112e-05, "loss": 0.7396, "step": 4572 }, { "epoch": 0.133514349945987, "grad_norm": 0.6224506549619732, "learning_rate": 4.813949716139497e-05, "loss": 0.6859, "step": 4573 }, { "epoch": 0.13354354617383435, "grad_norm": 0.5921628578479594, "learning_rate": 4.8137875101378755e-05, "loss": 0.671, "step": 4574 }, { "epoch": 0.1335727424016817, "grad_norm": 0.5687412227369593, "learning_rate": 4.8136253041362536e-05, "loss": 0.6218, "step": 4575 }, { "epoch": 0.13360193862952907, "grad_norm": 0.5925364097525924, "learning_rate": 4.813463098134631e-05, "loss": 0.6173, "step": 4576 }, { "epoch": 0.13363113485737643, "grad_norm": 0.6215344368969179, "learning_rate": 4.813300892133009e-05, "loss": 0.6727, "step": 4577 }, { "epoch": 0.1336603310852238, "grad_norm": 0.621080102363637, "learning_rate": 4.813138686131387e-05, "loss": 0.653, "step": 4578 }, { "epoch": 0.13368952731307115, "grad_norm": 0.6475276790353385, "learning_rate": 4.812976480129765e-05, "loss": 0.7733, "step": 4579 }, { "epoch": 0.13371872354091852, "grad_norm": 0.6333182862366051, "learning_rate": 4.812814274128143e-05, "loss": 0.6941, "step": 4580 }, { "epoch": 0.13374791976876588, "grad_norm": 0.6415245497091807, "learning_rate": 4.812652068126521e-05, "loss": 0.7428, "step": 4581 }, { "epoch": 0.13377711599661324, "grad_norm": 0.6433600729476944, "learning_rate": 4.812489862124899e-05, "loss": 0.7524, "step": 4582 }, { "epoch": 0.1338063122244606, "grad_norm": 0.5866785295594157, "learning_rate": 4.8123276561232763e-05, "loss": 0.6044, "step": 4583 }, { "epoch": 0.13383550845230796, "grad_norm": 0.6395392378486086, "learning_rate": 4.8121654501216545e-05, "loss": 0.7056, "step": 4584 }, { "epoch": 0.13386470468015532, "grad_norm": 0.6598371434358395, "learning_rate": 4.812003244120033e-05, "loss": 0.7204, "step": 4585 }, { "epoch": 0.13389390090800268, "grad_norm": 0.6284123605604175, "learning_rate": 4.81184103811841e-05, "loss": 0.6168, "step": 4586 }, { "epoch": 0.13392309713585004, "grad_norm": 0.703535640501545, "learning_rate": 4.811678832116789e-05, "loss": 0.7276, "step": 4587 }, { "epoch": 0.1339522933636974, "grad_norm": 0.5603226759983458, "learning_rate": 4.8115166261151666e-05, "loss": 0.5729, "step": 4588 }, { "epoch": 0.13398148959154477, "grad_norm": 0.6952596381065598, "learning_rate": 4.811354420113545e-05, "loss": 0.6901, "step": 4589 }, { "epoch": 0.13401068581939213, "grad_norm": 0.801670625216935, "learning_rate": 4.811192214111922e-05, "loss": 0.6986, "step": 4590 }, { "epoch": 0.1340398820472395, "grad_norm": 0.6233875540312306, "learning_rate": 4.8110300081103004e-05, "loss": 0.6822, "step": 4591 }, { "epoch": 0.13406907827508685, "grad_norm": 0.8007414485410422, "learning_rate": 4.8108678021086786e-05, "loss": 0.6266, "step": 4592 }, { "epoch": 0.1340982745029342, "grad_norm": 0.6181578501638587, "learning_rate": 4.810705596107056e-05, "loss": 0.7378, "step": 4593 }, { "epoch": 0.13412747073078157, "grad_norm": 0.6191666168704253, "learning_rate": 4.810543390105434e-05, "loss": 0.7105, "step": 4594 }, { "epoch": 0.13415666695862893, "grad_norm": 0.6581155202812792, "learning_rate": 4.8103811841038124e-05, "loss": 0.7041, "step": 4595 }, { "epoch": 0.1341858631864763, "grad_norm": 0.6036106107445496, "learning_rate": 4.81021897810219e-05, "loss": 0.6729, "step": 4596 }, { "epoch": 0.13421505941432366, "grad_norm": 0.6473917854367925, "learning_rate": 4.810056772100568e-05, "loss": 0.6952, "step": 4597 }, { "epoch": 0.13424425564217105, "grad_norm": 0.6453919320891436, "learning_rate": 4.8098945660989456e-05, "loss": 0.6672, "step": 4598 }, { "epoch": 0.1342734518700184, "grad_norm": 0.6809351827851312, "learning_rate": 4.809732360097324e-05, "loss": 0.7748, "step": 4599 }, { "epoch": 0.13430264809786577, "grad_norm": 0.6343737985622734, "learning_rate": 4.809570154095702e-05, "loss": 0.6058, "step": 4600 }, { "epoch": 0.13433184432571313, "grad_norm": 0.5938215300031789, "learning_rate": 4.8094079480940795e-05, "loss": 0.7062, "step": 4601 }, { "epoch": 0.1343610405535605, "grad_norm": 0.6339016386613477, "learning_rate": 4.8092457420924576e-05, "loss": 0.66, "step": 4602 }, { "epoch": 0.13439023678140785, "grad_norm": 0.6240578336758136, "learning_rate": 4.809083536090835e-05, "loss": 0.714, "step": 4603 }, { "epoch": 0.1344194330092552, "grad_norm": 0.5984428864924847, "learning_rate": 4.808921330089213e-05, "loss": 0.6868, "step": 4604 }, { "epoch": 0.13444862923710257, "grad_norm": 0.6955378567401314, "learning_rate": 4.8087591240875915e-05, "loss": 0.7095, "step": 4605 }, { "epoch": 0.13447782546494993, "grad_norm": 0.6132407915511889, "learning_rate": 4.8085969180859697e-05, "loss": 0.649, "step": 4606 }, { "epoch": 0.1345070216927973, "grad_norm": 0.6014914693711402, "learning_rate": 4.808434712084348e-05, "loss": 0.6303, "step": 4607 }, { "epoch": 0.13453621792064466, "grad_norm": 0.6511711774586006, "learning_rate": 4.808272506082725e-05, "loss": 0.7025, "step": 4608 }, { "epoch": 0.13456541414849202, "grad_norm": 0.5711003128389025, "learning_rate": 4.8081103000811035e-05, "loss": 0.5927, "step": 4609 }, { "epoch": 0.13459461037633938, "grad_norm": 0.558609333781028, "learning_rate": 4.807948094079481e-05, "loss": 0.5766, "step": 4610 }, { "epoch": 0.13462380660418674, "grad_norm": 0.584784311815249, "learning_rate": 4.807785888077859e-05, "loss": 0.6257, "step": 4611 }, { "epoch": 0.1346530028320341, "grad_norm": 0.5954109486833468, "learning_rate": 4.8076236820762374e-05, "loss": 0.6315, "step": 4612 }, { "epoch": 0.13468219905988146, "grad_norm": 0.6428781065665385, "learning_rate": 4.807461476074615e-05, "loss": 0.7272, "step": 4613 }, { "epoch": 0.13471139528772882, "grad_norm": 0.6494061685245132, "learning_rate": 4.807299270072993e-05, "loss": 0.6979, "step": 4614 }, { "epoch": 0.13474059151557619, "grad_norm": 0.6669225935158272, "learning_rate": 4.807137064071371e-05, "loss": 0.7043, "step": 4615 }, { "epoch": 0.13476978774342355, "grad_norm": 0.6359060605254332, "learning_rate": 4.806974858069749e-05, "loss": 0.6784, "step": 4616 }, { "epoch": 0.1347989839712709, "grad_norm": 0.655323328916634, "learning_rate": 4.806812652068127e-05, "loss": 0.6148, "step": 4617 }, { "epoch": 0.13482818019911827, "grad_norm": 0.5988711951197662, "learning_rate": 4.8066504460665044e-05, "loss": 0.6343, "step": 4618 }, { "epoch": 0.13485737642696563, "grad_norm": 0.6497322490682998, "learning_rate": 4.8064882400648826e-05, "loss": 0.7189, "step": 4619 }, { "epoch": 0.134886572654813, "grad_norm": 0.7691412192014184, "learning_rate": 4.806326034063261e-05, "loss": 0.8131, "step": 4620 }, { "epoch": 0.13491576888266035, "grad_norm": 0.7143000558317907, "learning_rate": 4.806163828061638e-05, "loss": 0.7117, "step": 4621 }, { "epoch": 0.13494496511050771, "grad_norm": 0.639333931705021, "learning_rate": 4.8060016220600164e-05, "loss": 0.64, "step": 4622 }, { "epoch": 0.13497416133835508, "grad_norm": 0.6190709258506039, "learning_rate": 4.805839416058394e-05, "loss": 0.7065, "step": 4623 }, { "epoch": 0.13500335756620244, "grad_norm": 0.7503303392472473, "learning_rate": 4.805677210056772e-05, "loss": 0.7631, "step": 4624 }, { "epoch": 0.1350325537940498, "grad_norm": 0.5716902738977205, "learning_rate": 4.80551500405515e-05, "loss": 0.6351, "step": 4625 }, { "epoch": 0.13506175002189716, "grad_norm": 0.6002227980020083, "learning_rate": 4.8053527980535284e-05, "loss": 0.6815, "step": 4626 }, { "epoch": 0.13509094624974452, "grad_norm": 0.6323117118862681, "learning_rate": 4.8051905920519066e-05, "loss": 0.643, "step": 4627 }, { "epoch": 0.13512014247759188, "grad_norm": 0.5753055851565773, "learning_rate": 4.805028386050284e-05, "loss": 0.5333, "step": 4628 }, { "epoch": 0.13514933870543927, "grad_norm": 0.6037630031660188, "learning_rate": 4.804866180048662e-05, "loss": 0.6542, "step": 4629 }, { "epoch": 0.13517853493328663, "grad_norm": 0.6505599136320798, "learning_rate": 4.80470397404704e-05, "loss": 0.695, "step": 4630 }, { "epoch": 0.135207731161134, "grad_norm": 0.6305599613841846, "learning_rate": 4.804541768045418e-05, "loss": 0.644, "step": 4631 }, { "epoch": 0.13523692738898135, "grad_norm": 0.6212310569799702, "learning_rate": 4.804379562043796e-05, "loss": 0.7038, "step": 4632 }, { "epoch": 0.13526612361682872, "grad_norm": 0.6058754283543134, "learning_rate": 4.8042173560421736e-05, "loss": 0.6747, "step": 4633 }, { "epoch": 0.13529531984467608, "grad_norm": 0.799686170701498, "learning_rate": 4.804055150040552e-05, "loss": 0.7172, "step": 4634 }, { "epoch": 0.13532451607252344, "grad_norm": 0.6175921331633148, "learning_rate": 4.803892944038929e-05, "loss": 0.6594, "step": 4635 }, { "epoch": 0.1353537123003708, "grad_norm": 0.6442531446774411, "learning_rate": 4.8037307380373075e-05, "loss": 0.6818, "step": 4636 }, { "epoch": 0.13538290852821816, "grad_norm": 0.7022915916789827, "learning_rate": 4.803568532035686e-05, "loss": 0.7666, "step": 4637 }, { "epoch": 0.13541210475606552, "grad_norm": 0.5922374100900096, "learning_rate": 4.803406326034063e-05, "loss": 0.6224, "step": 4638 }, { "epoch": 0.13544130098391288, "grad_norm": 0.6229534464770348, "learning_rate": 4.8032441200324413e-05, "loss": 0.7121, "step": 4639 }, { "epoch": 0.13547049721176024, "grad_norm": 0.616252054761285, "learning_rate": 4.8030819140308195e-05, "loss": 0.7465, "step": 4640 }, { "epoch": 0.1354996934396076, "grad_norm": 0.6830146540938626, "learning_rate": 4.802919708029197e-05, "loss": 0.7388, "step": 4641 }, { "epoch": 0.13552888966745497, "grad_norm": 0.6736931795130121, "learning_rate": 4.802757502027575e-05, "loss": 0.7802, "step": 4642 }, { "epoch": 0.13555808589530233, "grad_norm": 0.6012853394535129, "learning_rate": 4.802595296025953e-05, "loss": 0.716, "step": 4643 }, { "epoch": 0.1355872821231497, "grad_norm": 0.6464448469985605, "learning_rate": 4.8024330900243315e-05, "loss": 0.7167, "step": 4644 }, { "epoch": 0.13561647835099705, "grad_norm": 0.5990250817267875, "learning_rate": 4.802270884022709e-05, "loss": 0.716, "step": 4645 }, { "epoch": 0.1356456745788444, "grad_norm": 0.6292188667515356, "learning_rate": 4.802108678021087e-05, "loss": 0.7072, "step": 4646 }, { "epoch": 0.13567487080669177, "grad_norm": 0.8082537091153696, "learning_rate": 4.8019464720194654e-05, "loss": 0.8939, "step": 4647 }, { "epoch": 0.13570406703453913, "grad_norm": 0.6095380837063668, "learning_rate": 4.801784266017843e-05, "loss": 0.6956, "step": 4648 }, { "epoch": 0.1357332632623865, "grad_norm": 0.6364477468849415, "learning_rate": 4.801622060016221e-05, "loss": 0.6982, "step": 4649 }, { "epoch": 0.13576245949023386, "grad_norm": 0.6074010851047457, "learning_rate": 4.8014598540145986e-05, "loss": 0.7195, "step": 4650 }, { "epoch": 0.13579165571808122, "grad_norm": 0.6250267659920877, "learning_rate": 4.801297648012977e-05, "loss": 0.7032, "step": 4651 }, { "epoch": 0.13582085194592858, "grad_norm": 0.605194259117859, "learning_rate": 4.801135442011355e-05, "loss": 0.6818, "step": 4652 }, { "epoch": 0.13585004817377594, "grad_norm": 0.6956641682893303, "learning_rate": 4.8009732360097324e-05, "loss": 0.7194, "step": 4653 }, { "epoch": 0.1358792444016233, "grad_norm": 0.6005933473220391, "learning_rate": 4.8008110300081106e-05, "loss": 0.6627, "step": 4654 }, { "epoch": 0.13590844062947066, "grad_norm": 0.6326182460331197, "learning_rate": 4.800648824006488e-05, "loss": 0.6684, "step": 4655 }, { "epoch": 0.13593763685731802, "grad_norm": 0.6615370854397652, "learning_rate": 4.800486618004866e-05, "loss": 0.6782, "step": 4656 }, { "epoch": 0.13596683308516538, "grad_norm": 0.704292889915849, "learning_rate": 4.8003244120032444e-05, "loss": 0.666, "step": 4657 }, { "epoch": 0.13599602931301275, "grad_norm": 0.5900970020407653, "learning_rate": 4.800162206001622e-05, "loss": 0.6322, "step": 4658 }, { "epoch": 0.13602522554086013, "grad_norm": 0.7074400337936457, "learning_rate": 4.8e-05, "loss": 0.8127, "step": 4659 }, { "epoch": 0.1360544217687075, "grad_norm": 0.6425594098864909, "learning_rate": 4.799837793998378e-05, "loss": 0.6818, "step": 4660 }, { "epoch": 0.13608361799655486, "grad_norm": 0.598056891272101, "learning_rate": 4.799675587996756e-05, "loss": 0.6841, "step": 4661 }, { "epoch": 0.13611281422440222, "grad_norm": 0.6082202599843595, "learning_rate": 4.799513381995134e-05, "loss": 0.6923, "step": 4662 }, { "epoch": 0.13614201045224958, "grad_norm": 0.6588372231335545, "learning_rate": 4.799351175993512e-05, "loss": 0.7415, "step": 4663 }, { "epoch": 0.13617120668009694, "grad_norm": 0.6451076194717581, "learning_rate": 4.79918896999189e-05, "loss": 0.7295, "step": 4664 }, { "epoch": 0.1362004029079443, "grad_norm": 0.7958995098145815, "learning_rate": 4.799026763990268e-05, "loss": 0.7534, "step": 4665 }, { "epoch": 0.13622959913579166, "grad_norm": 0.5579855374487642, "learning_rate": 4.798864557988646e-05, "loss": 0.6131, "step": 4666 }, { "epoch": 0.13625879536363902, "grad_norm": 0.5754577687964049, "learning_rate": 4.798702351987024e-05, "loss": 0.6165, "step": 4667 }, { "epoch": 0.13628799159148638, "grad_norm": 0.6462760859731509, "learning_rate": 4.798540145985402e-05, "loss": 0.6913, "step": 4668 }, { "epoch": 0.13631718781933375, "grad_norm": 0.5330962757571172, "learning_rate": 4.79837793998378e-05, "loss": 0.556, "step": 4669 }, { "epoch": 0.1363463840471811, "grad_norm": 0.6760774420090008, "learning_rate": 4.7982157339821573e-05, "loss": 0.7912, "step": 4670 }, { "epoch": 0.13637558027502847, "grad_norm": 0.6036043669767657, "learning_rate": 4.7980535279805355e-05, "loss": 0.6257, "step": 4671 }, { "epoch": 0.13640477650287583, "grad_norm": 0.6688285385237251, "learning_rate": 4.797891321978914e-05, "loss": 0.7643, "step": 4672 }, { "epoch": 0.1364339727307232, "grad_norm": 0.6559200921089838, "learning_rate": 4.797729115977291e-05, "loss": 0.7301, "step": 4673 }, { "epoch": 0.13646316895857055, "grad_norm": 0.5716107890978906, "learning_rate": 4.7975669099756694e-05, "loss": 0.6094, "step": 4674 }, { "epoch": 0.1364923651864179, "grad_norm": 0.6522939041219586, "learning_rate": 4.797404703974047e-05, "loss": 0.7695, "step": 4675 }, { "epoch": 0.13652156141426527, "grad_norm": 0.5866504656625482, "learning_rate": 4.797242497972425e-05, "loss": 0.6632, "step": 4676 }, { "epoch": 0.13655075764211264, "grad_norm": 0.5322237300283329, "learning_rate": 4.797080291970803e-05, "loss": 0.5384, "step": 4677 }, { "epoch": 0.13657995386996, "grad_norm": 0.5692984781799986, "learning_rate": 4.796918085969181e-05, "loss": 0.6197, "step": 4678 }, { "epoch": 0.13660915009780736, "grad_norm": 0.6678593420129475, "learning_rate": 4.796755879967559e-05, "loss": 0.7912, "step": 4679 }, { "epoch": 0.13663834632565472, "grad_norm": 0.5867149180937494, "learning_rate": 4.7965936739659364e-05, "loss": 0.6024, "step": 4680 }, { "epoch": 0.13666754255350208, "grad_norm": 0.6575005313488539, "learning_rate": 4.7964314679643146e-05, "loss": 0.7315, "step": 4681 }, { "epoch": 0.13669673878134944, "grad_norm": 0.6907698214449731, "learning_rate": 4.7962692619626934e-05, "loss": 0.7693, "step": 4682 }, { "epoch": 0.1367259350091968, "grad_norm": 0.6144863766423505, "learning_rate": 4.796107055961071e-05, "loss": 0.6882, "step": 4683 }, { "epoch": 0.13675513123704416, "grad_norm": 0.6206548011976961, "learning_rate": 4.795944849959449e-05, "loss": 0.6291, "step": 4684 }, { "epoch": 0.13678432746489153, "grad_norm": 0.575099755004356, "learning_rate": 4.7957826439578266e-05, "loss": 0.6083, "step": 4685 }, { "epoch": 0.1368135236927389, "grad_norm": 0.5693759430822665, "learning_rate": 4.795620437956205e-05, "loss": 0.6394, "step": 4686 }, { "epoch": 0.13684271992058625, "grad_norm": 0.6290423485542481, "learning_rate": 4.795458231954583e-05, "loss": 0.7306, "step": 4687 }, { "epoch": 0.1368719161484336, "grad_norm": 0.643288719589234, "learning_rate": 4.7952960259529605e-05, "loss": 0.714, "step": 4688 }, { "epoch": 0.136901112376281, "grad_norm": 0.5717705454774227, "learning_rate": 4.7951338199513386e-05, "loss": 0.5679, "step": 4689 }, { "epoch": 0.13693030860412836, "grad_norm": 0.6339952564305762, "learning_rate": 4.794971613949716e-05, "loss": 0.7556, "step": 4690 }, { "epoch": 0.13695950483197572, "grad_norm": 0.5822333647530575, "learning_rate": 4.794809407948094e-05, "loss": 0.6218, "step": 4691 }, { "epoch": 0.13698870105982308, "grad_norm": 0.7495940101001203, "learning_rate": 4.7946472019464725e-05, "loss": 0.7387, "step": 4692 }, { "epoch": 0.13701789728767044, "grad_norm": 0.6086000347691897, "learning_rate": 4.79448499594485e-05, "loss": 0.6703, "step": 4693 }, { "epoch": 0.1370470935155178, "grad_norm": 0.6082171154898366, "learning_rate": 4.794322789943228e-05, "loss": 0.6719, "step": 4694 }, { "epoch": 0.13707628974336517, "grad_norm": 0.6905818002264902, "learning_rate": 4.7941605839416057e-05, "loss": 0.7251, "step": 4695 }, { "epoch": 0.13710548597121253, "grad_norm": 0.6818597505966595, "learning_rate": 4.793998377939984e-05, "loss": 0.7905, "step": 4696 }, { "epoch": 0.1371346821990599, "grad_norm": 0.5979244873901322, "learning_rate": 4.793836171938362e-05, "loss": 0.6923, "step": 4697 }, { "epoch": 0.13716387842690725, "grad_norm": 0.602617877084608, "learning_rate": 4.7936739659367395e-05, "loss": 0.6731, "step": 4698 }, { "epoch": 0.1371930746547546, "grad_norm": 0.5782898722416322, "learning_rate": 4.793511759935118e-05, "loss": 0.6753, "step": 4699 }, { "epoch": 0.13722227088260197, "grad_norm": 0.6865958801095096, "learning_rate": 4.793349553933495e-05, "loss": 0.749, "step": 4700 }, { "epoch": 0.13725146711044933, "grad_norm": 0.6088610225232425, "learning_rate": 4.793187347931874e-05, "loss": 0.6712, "step": 4701 }, { "epoch": 0.1372806633382967, "grad_norm": 0.5944699299083356, "learning_rate": 4.7930251419302515e-05, "loss": 0.6797, "step": 4702 }, { "epoch": 0.13730985956614405, "grad_norm": 0.6640464040534525, "learning_rate": 4.79286293592863e-05, "loss": 0.7238, "step": 4703 }, { "epoch": 0.13733905579399142, "grad_norm": 0.6134221899199561, "learning_rate": 4.792700729927008e-05, "loss": 0.6542, "step": 4704 }, { "epoch": 0.13736825202183878, "grad_norm": 0.6303607964805114, "learning_rate": 4.7925385239253854e-05, "loss": 0.6914, "step": 4705 }, { "epoch": 0.13739744824968614, "grad_norm": 0.6504130278701692, "learning_rate": 4.7923763179237636e-05, "loss": 0.7089, "step": 4706 }, { "epoch": 0.1374266444775335, "grad_norm": 0.6630396599404439, "learning_rate": 4.792214111922142e-05, "loss": 0.7325, "step": 4707 }, { "epoch": 0.13745584070538086, "grad_norm": 0.6264678434668461, "learning_rate": 4.792051905920519e-05, "loss": 0.718, "step": 4708 }, { "epoch": 0.13748503693322822, "grad_norm": 0.5947541194014562, "learning_rate": 4.7918896999188974e-05, "loss": 0.6824, "step": 4709 }, { "epoch": 0.13751423316107558, "grad_norm": 0.7326115553790711, "learning_rate": 4.791727493917275e-05, "loss": 0.7577, "step": 4710 }, { "epoch": 0.13754342938892294, "grad_norm": 0.6187449470411671, "learning_rate": 4.791565287915653e-05, "loss": 0.7309, "step": 4711 }, { "epoch": 0.1375726256167703, "grad_norm": 0.5757212847947665, "learning_rate": 4.791403081914031e-05, "loss": 0.6061, "step": 4712 }, { "epoch": 0.13760182184461767, "grad_norm": 0.7397656924727319, "learning_rate": 4.791240875912409e-05, "loss": 0.7447, "step": 4713 }, { "epoch": 0.13763101807246503, "grad_norm": 0.6086868285544569, "learning_rate": 4.791078669910787e-05, "loss": 0.6935, "step": 4714 }, { "epoch": 0.1376602143003124, "grad_norm": 0.6227688809475279, "learning_rate": 4.7909164639091644e-05, "loss": 0.7183, "step": 4715 }, { "epoch": 0.13768941052815975, "grad_norm": 0.6063310425902074, "learning_rate": 4.7907542579075426e-05, "loss": 0.6791, "step": 4716 }, { "epoch": 0.1377186067560071, "grad_norm": 0.6245997555367214, "learning_rate": 4.790592051905921e-05, "loss": 0.7234, "step": 4717 }, { "epoch": 0.13774780298385447, "grad_norm": 0.636153428347805, "learning_rate": 4.790429845904298e-05, "loss": 0.7447, "step": 4718 }, { "epoch": 0.13777699921170186, "grad_norm": 0.8755457392531653, "learning_rate": 4.7902676399026765e-05, "loss": 0.6257, "step": 4719 }, { "epoch": 0.13780619543954922, "grad_norm": 0.6327999088868412, "learning_rate": 4.7901054339010546e-05, "loss": 0.7551, "step": 4720 }, { "epoch": 0.13783539166739658, "grad_norm": 0.6359914358843104, "learning_rate": 4.789943227899433e-05, "loss": 0.7344, "step": 4721 }, { "epoch": 0.13786458789524395, "grad_norm": 0.603407386151949, "learning_rate": 4.78978102189781e-05, "loss": 0.6295, "step": 4722 }, { "epoch": 0.1378937841230913, "grad_norm": 0.669414363153727, "learning_rate": 4.7896188158961885e-05, "loss": 0.7277, "step": 4723 }, { "epoch": 0.13792298035093867, "grad_norm": 0.6395375325928536, "learning_rate": 4.789456609894567e-05, "loss": 0.7082, "step": 4724 }, { "epoch": 0.13795217657878603, "grad_norm": 0.668111804947228, "learning_rate": 4.789294403892944e-05, "loss": 0.7468, "step": 4725 }, { "epoch": 0.1379813728066334, "grad_norm": 0.6138745667026053, "learning_rate": 4.7891321978913223e-05, "loss": 0.7466, "step": 4726 }, { "epoch": 0.13801056903448075, "grad_norm": 0.6144980370802146, "learning_rate": 4.7889699918897005e-05, "loss": 0.6899, "step": 4727 }, { "epoch": 0.1380397652623281, "grad_norm": 0.5819347708020502, "learning_rate": 4.788807785888078e-05, "loss": 0.5878, "step": 4728 }, { "epoch": 0.13806896149017547, "grad_norm": 0.5656684385114438, "learning_rate": 4.788645579886456e-05, "loss": 0.5581, "step": 4729 }, { "epoch": 0.13809815771802283, "grad_norm": 0.5598540043759828, "learning_rate": 4.788483373884834e-05, "loss": 0.5878, "step": 4730 }, { "epoch": 0.1381273539458702, "grad_norm": 0.5780475173115907, "learning_rate": 4.788321167883212e-05, "loss": 0.6159, "step": 4731 }, { "epoch": 0.13815655017371756, "grad_norm": 0.6109269401200651, "learning_rate": 4.78815896188159e-05, "loss": 0.711, "step": 4732 }, { "epoch": 0.13818574640156492, "grad_norm": 0.5509286446086787, "learning_rate": 4.7879967558799675e-05, "loss": 0.5774, "step": 4733 }, { "epoch": 0.13821494262941228, "grad_norm": 0.6092690155775841, "learning_rate": 4.787834549878346e-05, "loss": 0.634, "step": 4734 }, { "epoch": 0.13824413885725964, "grad_norm": 0.6434652222981825, "learning_rate": 4.787672343876723e-05, "loss": 0.7072, "step": 4735 }, { "epoch": 0.138273335085107, "grad_norm": 0.5805334396017694, "learning_rate": 4.7875101378751014e-05, "loss": 0.6246, "step": 4736 }, { "epoch": 0.13830253131295436, "grad_norm": 0.6270299462554603, "learning_rate": 4.7873479318734796e-05, "loss": 0.6796, "step": 4737 }, { "epoch": 0.13833172754080172, "grad_norm": 0.6171792881558203, "learning_rate": 4.787185725871858e-05, "loss": 0.6682, "step": 4738 }, { "epoch": 0.13836092376864909, "grad_norm": 0.7078697714299068, "learning_rate": 4.787023519870236e-05, "loss": 0.7483, "step": 4739 }, { "epoch": 0.13839011999649645, "grad_norm": 0.6350838611885298, "learning_rate": 4.7868613138686134e-05, "loss": 0.6893, "step": 4740 }, { "epoch": 0.1384193162243438, "grad_norm": 0.6200293600862353, "learning_rate": 4.7866991078669916e-05, "loss": 0.6727, "step": 4741 }, { "epoch": 0.13844851245219117, "grad_norm": 0.5873410759792316, "learning_rate": 4.786536901865369e-05, "loss": 0.5675, "step": 4742 }, { "epoch": 0.13847770868003853, "grad_norm": 0.6419923237349436, "learning_rate": 4.786374695863747e-05, "loss": 0.6942, "step": 4743 }, { "epoch": 0.1385069049078859, "grad_norm": 0.7203454536370719, "learning_rate": 4.7862124898621254e-05, "loss": 0.6938, "step": 4744 }, { "epoch": 0.13853610113573325, "grad_norm": 0.5887813052237585, "learning_rate": 4.786050283860503e-05, "loss": 0.6066, "step": 4745 }, { "epoch": 0.13856529736358061, "grad_norm": 0.6899661543708501, "learning_rate": 4.785888077858881e-05, "loss": 0.7274, "step": 4746 }, { "epoch": 0.13859449359142798, "grad_norm": 0.628026918891395, "learning_rate": 4.7857258718572586e-05, "loss": 0.759, "step": 4747 }, { "epoch": 0.13862368981927534, "grad_norm": 0.6091382594778485, "learning_rate": 4.785563665855637e-05, "loss": 0.6078, "step": 4748 }, { "epoch": 0.13865288604712273, "grad_norm": 0.6082736383027362, "learning_rate": 4.785401459854015e-05, "loss": 0.6437, "step": 4749 }, { "epoch": 0.1386820822749701, "grad_norm": 0.6687311313627115, "learning_rate": 4.7852392538523925e-05, "loss": 0.7736, "step": 4750 }, { "epoch": 0.13871127850281745, "grad_norm": 0.6369582360855932, "learning_rate": 4.7850770478507706e-05, "loss": 0.7617, "step": 4751 }, { "epoch": 0.1387404747306648, "grad_norm": 0.5784768353535217, "learning_rate": 4.784914841849149e-05, "loss": 0.5647, "step": 4752 }, { "epoch": 0.13876967095851217, "grad_norm": 0.5811234750863016, "learning_rate": 4.784752635847526e-05, "loss": 0.6399, "step": 4753 }, { "epoch": 0.13879886718635953, "grad_norm": 0.602520597548305, "learning_rate": 4.7845904298459045e-05, "loss": 0.6392, "step": 4754 }, { "epoch": 0.1388280634142069, "grad_norm": 0.6112886511466853, "learning_rate": 4.784428223844282e-05, "loss": 0.6869, "step": 4755 }, { "epoch": 0.13885725964205425, "grad_norm": 0.5561078983116462, "learning_rate": 4.78426601784266e-05, "loss": 0.5807, "step": 4756 }, { "epoch": 0.13888645586990161, "grad_norm": 0.7684201084499872, "learning_rate": 4.7841038118410384e-05, "loss": 0.7302, "step": 4757 }, { "epoch": 0.13891565209774898, "grad_norm": 0.6955092002928761, "learning_rate": 4.7839416058394165e-05, "loss": 0.8472, "step": 4758 }, { "epoch": 0.13894484832559634, "grad_norm": 0.5855523543353698, "learning_rate": 4.783779399837795e-05, "loss": 0.6505, "step": 4759 }, { "epoch": 0.1389740445534437, "grad_norm": 0.6112189031279609, "learning_rate": 4.783617193836172e-05, "loss": 0.6337, "step": 4760 }, { "epoch": 0.13900324078129106, "grad_norm": 0.6011670095713589, "learning_rate": 4.7834549878345504e-05, "loss": 0.6155, "step": 4761 }, { "epoch": 0.13903243700913842, "grad_norm": 0.6161260875415474, "learning_rate": 4.783292781832928e-05, "loss": 0.6846, "step": 4762 }, { "epoch": 0.13906163323698578, "grad_norm": 0.618287914572716, "learning_rate": 4.783130575831306e-05, "loss": 0.6161, "step": 4763 }, { "epoch": 0.13909082946483314, "grad_norm": 0.6213301532914939, "learning_rate": 4.782968369829684e-05, "loss": 0.6445, "step": 4764 }, { "epoch": 0.1391200256926805, "grad_norm": 0.6084502857107619, "learning_rate": 4.782806163828062e-05, "loss": 0.6261, "step": 4765 }, { "epoch": 0.13914922192052787, "grad_norm": 0.6324928488997538, "learning_rate": 4.78264395782644e-05, "loss": 0.7019, "step": 4766 }, { "epoch": 0.13917841814837523, "grad_norm": 0.6228029686138898, "learning_rate": 4.7824817518248174e-05, "loss": 0.6774, "step": 4767 }, { "epoch": 0.1392076143762226, "grad_norm": 0.5644171584951607, "learning_rate": 4.7823195458231956e-05, "loss": 0.5794, "step": 4768 }, { "epoch": 0.13923681060406995, "grad_norm": 0.6792938949145809, "learning_rate": 4.782157339821574e-05, "loss": 0.7123, "step": 4769 }, { "epoch": 0.1392660068319173, "grad_norm": 0.6187477542528593, "learning_rate": 4.781995133819951e-05, "loss": 0.7663, "step": 4770 }, { "epoch": 0.13929520305976467, "grad_norm": 0.6308811897985862, "learning_rate": 4.7818329278183294e-05, "loss": 0.697, "step": 4771 }, { "epoch": 0.13932439928761203, "grad_norm": 0.6735575337803018, "learning_rate": 4.7816707218167076e-05, "loss": 0.7038, "step": 4772 }, { "epoch": 0.1393535955154594, "grad_norm": 0.5972375666633267, "learning_rate": 4.781508515815085e-05, "loss": 0.669, "step": 4773 }, { "epoch": 0.13938279174330676, "grad_norm": 0.6289404620964626, "learning_rate": 4.781346309813463e-05, "loss": 0.6869, "step": 4774 }, { "epoch": 0.13941198797115412, "grad_norm": 0.6608899021221072, "learning_rate": 4.781184103811841e-05, "loss": 0.7288, "step": 4775 }, { "epoch": 0.13944118419900148, "grad_norm": 0.5959651132529746, "learning_rate": 4.7810218978102196e-05, "loss": 0.6687, "step": 4776 }, { "epoch": 0.13947038042684884, "grad_norm": 0.6374869682405503, "learning_rate": 4.780859691808597e-05, "loss": 0.7494, "step": 4777 }, { "epoch": 0.1394995766546962, "grad_norm": 0.6030350844742911, "learning_rate": 4.780697485806975e-05, "loss": 0.6647, "step": 4778 }, { "epoch": 0.1395287728825436, "grad_norm": 0.5843930383808945, "learning_rate": 4.7805352798053535e-05, "loss": 0.613, "step": 4779 }, { "epoch": 0.13955796911039095, "grad_norm": 0.6405786565459104, "learning_rate": 4.780373073803731e-05, "loss": 0.7342, "step": 4780 }, { "epoch": 0.1395871653382383, "grad_norm": 0.6356899794172437, "learning_rate": 4.780210867802109e-05, "loss": 0.6849, "step": 4781 }, { "epoch": 0.13961636156608567, "grad_norm": 0.6921684106955293, "learning_rate": 4.7800486618004867e-05, "loss": 0.7998, "step": 4782 }, { "epoch": 0.13964555779393303, "grad_norm": 0.6550668633002195, "learning_rate": 4.779886455798865e-05, "loss": 0.695, "step": 4783 }, { "epoch": 0.1396747540217804, "grad_norm": 0.5550889110386559, "learning_rate": 4.779724249797243e-05, "loss": 0.6132, "step": 4784 }, { "epoch": 0.13970395024962776, "grad_norm": 0.6108818098162803, "learning_rate": 4.7795620437956205e-05, "loss": 0.6551, "step": 4785 }, { "epoch": 0.13973314647747512, "grad_norm": 0.5389166953424771, "learning_rate": 4.779399837793999e-05, "loss": 0.5314, "step": 4786 }, { "epoch": 0.13976234270532248, "grad_norm": 0.6117961201202452, "learning_rate": 4.779237631792376e-05, "loss": 0.6607, "step": 4787 }, { "epoch": 0.13979153893316984, "grad_norm": 0.6929007446066482, "learning_rate": 4.7790754257907544e-05, "loss": 0.7599, "step": 4788 }, { "epoch": 0.1398207351610172, "grad_norm": 0.6234468616738529, "learning_rate": 4.7789132197891325e-05, "loss": 0.7224, "step": 4789 }, { "epoch": 0.13984993138886456, "grad_norm": 0.681466368854074, "learning_rate": 4.77875101378751e-05, "loss": 0.6449, "step": 4790 }, { "epoch": 0.13987912761671192, "grad_norm": 0.6321501824279973, "learning_rate": 4.778588807785888e-05, "loss": 0.7354, "step": 4791 }, { "epoch": 0.13990832384455928, "grad_norm": 0.635907592576769, "learning_rate": 4.778426601784266e-05, "loss": 0.6848, "step": 4792 }, { "epoch": 0.13993752007240665, "grad_norm": 0.6307040221421223, "learning_rate": 4.778264395782644e-05, "loss": 0.6679, "step": 4793 }, { "epoch": 0.139966716300254, "grad_norm": 0.6101652922912917, "learning_rate": 4.778102189781022e-05, "loss": 0.6579, "step": 4794 }, { "epoch": 0.13999591252810137, "grad_norm": 0.6173627742994965, "learning_rate": 4.7779399837794e-05, "loss": 0.7066, "step": 4795 }, { "epoch": 0.14002510875594873, "grad_norm": 0.6364752880304564, "learning_rate": 4.7777777777777784e-05, "loss": 0.709, "step": 4796 }, { "epoch": 0.1400543049837961, "grad_norm": 0.6533423723172819, "learning_rate": 4.777615571776156e-05, "loss": 0.6388, "step": 4797 }, { "epoch": 0.14008350121164345, "grad_norm": 0.7835736463388517, "learning_rate": 4.777453365774534e-05, "loss": 0.7559, "step": 4798 }, { "epoch": 0.1401126974394908, "grad_norm": 0.6406511564704438, "learning_rate": 4.777291159772912e-05, "loss": 0.7153, "step": 4799 }, { "epoch": 0.14014189366733817, "grad_norm": 0.5822254342628954, "learning_rate": 4.77712895377129e-05, "loss": 0.6792, "step": 4800 }, { "epoch": 0.14017108989518554, "grad_norm": 0.6503480216886856, "learning_rate": 4.776966747769668e-05, "loss": 0.7637, "step": 4801 }, { "epoch": 0.1402002861230329, "grad_norm": 0.5784013089436791, "learning_rate": 4.7768045417680454e-05, "loss": 0.6455, "step": 4802 }, { "epoch": 0.14022948235088026, "grad_norm": 0.5956818561938152, "learning_rate": 4.7766423357664236e-05, "loss": 0.6616, "step": 4803 }, { "epoch": 0.14025867857872762, "grad_norm": 0.5880166342487154, "learning_rate": 4.776480129764802e-05, "loss": 0.6118, "step": 4804 }, { "epoch": 0.14028787480657498, "grad_norm": 0.5468712227342439, "learning_rate": 4.776317923763179e-05, "loss": 0.6187, "step": 4805 }, { "epoch": 0.14031707103442234, "grad_norm": 0.5878104109872585, "learning_rate": 4.7761557177615575e-05, "loss": 0.6681, "step": 4806 }, { "epoch": 0.1403462672622697, "grad_norm": 0.5829526005892451, "learning_rate": 4.775993511759935e-05, "loss": 0.6652, "step": 4807 }, { "epoch": 0.14037546349011706, "grad_norm": 0.5848028982147636, "learning_rate": 4.775831305758313e-05, "loss": 0.6319, "step": 4808 }, { "epoch": 0.14040465971796443, "grad_norm": 0.6437479165117496, "learning_rate": 4.775669099756691e-05, "loss": 0.6875, "step": 4809 }, { "epoch": 0.14043385594581181, "grad_norm": 0.684164369997831, "learning_rate": 4.775506893755069e-05, "loss": 0.8104, "step": 4810 }, { "epoch": 0.14046305217365918, "grad_norm": 0.6267086208965565, "learning_rate": 4.775344687753447e-05, "loss": 0.7024, "step": 4811 }, { "epoch": 0.14049224840150654, "grad_norm": 0.622347236095617, "learning_rate": 4.7751824817518245e-05, "loss": 0.6408, "step": 4812 }, { "epoch": 0.1405214446293539, "grad_norm": 0.6203366206838851, "learning_rate": 4.775020275750203e-05, "loss": 0.7048, "step": 4813 }, { "epoch": 0.14055064085720126, "grad_norm": 0.556504085594549, "learning_rate": 4.7748580697485815e-05, "loss": 0.586, "step": 4814 }, { "epoch": 0.14057983708504862, "grad_norm": 0.6224265044426183, "learning_rate": 4.774695863746959e-05, "loss": 0.7025, "step": 4815 }, { "epoch": 0.14060903331289598, "grad_norm": 0.6165407940150479, "learning_rate": 4.774533657745337e-05, "loss": 0.6836, "step": 4816 }, { "epoch": 0.14063822954074334, "grad_norm": 0.6449877771189035, "learning_rate": 4.774371451743715e-05, "loss": 0.7367, "step": 4817 }, { "epoch": 0.1406674257685907, "grad_norm": 0.5778800272181823, "learning_rate": 4.774209245742093e-05, "loss": 0.6806, "step": 4818 }, { "epoch": 0.14069662199643806, "grad_norm": 0.7024633986680736, "learning_rate": 4.774047039740471e-05, "loss": 0.7246, "step": 4819 }, { "epoch": 0.14072581822428543, "grad_norm": 0.6824127171239827, "learning_rate": 4.7738848337388485e-05, "loss": 0.8113, "step": 4820 }, { "epoch": 0.1407550144521328, "grad_norm": 0.661657859800006, "learning_rate": 4.773722627737227e-05, "loss": 0.808, "step": 4821 }, { "epoch": 0.14078421067998015, "grad_norm": 0.5705018688633374, "learning_rate": 4.773560421735604e-05, "loss": 0.6233, "step": 4822 }, { "epoch": 0.1408134069078275, "grad_norm": 0.6128828523619468, "learning_rate": 4.7733982157339824e-05, "loss": 0.6321, "step": 4823 }, { "epoch": 0.14084260313567487, "grad_norm": 0.5891697614400384, "learning_rate": 4.7732360097323606e-05, "loss": 0.6649, "step": 4824 }, { "epoch": 0.14087179936352223, "grad_norm": 0.7679076050566033, "learning_rate": 4.773073803730738e-05, "loss": 0.6609, "step": 4825 }, { "epoch": 0.1409009955913696, "grad_norm": 0.5892106589404453, "learning_rate": 4.772911597729116e-05, "loss": 0.6366, "step": 4826 }, { "epoch": 0.14093019181921695, "grad_norm": 0.5809817305055683, "learning_rate": 4.772749391727494e-05, "loss": 0.6014, "step": 4827 }, { "epoch": 0.14095938804706432, "grad_norm": 0.8878504775949152, "learning_rate": 4.772587185725872e-05, "loss": 0.6573, "step": 4828 }, { "epoch": 0.14098858427491168, "grad_norm": 0.6630602079274304, "learning_rate": 4.77242497972425e-05, "loss": 0.6845, "step": 4829 }, { "epoch": 0.14101778050275904, "grad_norm": 0.6293979501221015, "learning_rate": 4.7722627737226276e-05, "loss": 0.6896, "step": 4830 }, { "epoch": 0.1410469767306064, "grad_norm": 0.618837025340333, "learning_rate": 4.772100567721006e-05, "loss": 0.6666, "step": 4831 }, { "epoch": 0.14107617295845376, "grad_norm": 0.6039252520276241, "learning_rate": 4.771938361719383e-05, "loss": 0.6891, "step": 4832 }, { "epoch": 0.14110536918630112, "grad_norm": 0.6388886769891591, "learning_rate": 4.771776155717762e-05, "loss": 0.716, "step": 4833 }, { "epoch": 0.14113456541414848, "grad_norm": 0.5321337004194493, "learning_rate": 4.7716139497161396e-05, "loss": 0.5555, "step": 4834 }, { "epoch": 0.14116376164199584, "grad_norm": 0.6393819178931025, "learning_rate": 4.771451743714518e-05, "loss": 0.7219, "step": 4835 }, { "epoch": 0.1411929578698432, "grad_norm": 0.6760269989187891, "learning_rate": 4.771289537712896e-05, "loss": 0.7574, "step": 4836 }, { "epoch": 0.14122215409769057, "grad_norm": 0.7013705189291058, "learning_rate": 4.7711273317112735e-05, "loss": 0.7334, "step": 4837 }, { "epoch": 0.14125135032553793, "grad_norm": 0.6442671743695744, "learning_rate": 4.7709651257096516e-05, "loss": 0.7549, "step": 4838 }, { "epoch": 0.1412805465533853, "grad_norm": 0.574605897732519, "learning_rate": 4.77080291970803e-05, "loss": 0.5511, "step": 4839 }, { "epoch": 0.14130974278123268, "grad_norm": 0.6359169215258563, "learning_rate": 4.770640713706407e-05, "loss": 0.6876, "step": 4840 }, { "epoch": 0.14133893900908004, "grad_norm": 0.6333421409070595, "learning_rate": 4.7704785077047855e-05, "loss": 0.685, "step": 4841 }, { "epoch": 0.1413681352369274, "grad_norm": 0.6575672847203745, "learning_rate": 4.770316301703163e-05, "loss": 0.7285, "step": 4842 }, { "epoch": 0.14139733146477476, "grad_norm": 0.5953924421164526, "learning_rate": 4.770154095701541e-05, "loss": 0.6437, "step": 4843 }, { "epoch": 0.14142652769262212, "grad_norm": 0.59757808305863, "learning_rate": 4.7699918896999194e-05, "loss": 0.6324, "step": 4844 }, { "epoch": 0.14145572392046948, "grad_norm": 0.687257725270679, "learning_rate": 4.769829683698297e-05, "loss": 0.7335, "step": 4845 }, { "epoch": 0.14148492014831685, "grad_norm": 0.6477879243570488, "learning_rate": 4.769667477696675e-05, "loss": 0.6923, "step": 4846 }, { "epoch": 0.1415141163761642, "grad_norm": 0.7350031929470701, "learning_rate": 4.7695052716950525e-05, "loss": 0.8115, "step": 4847 }, { "epoch": 0.14154331260401157, "grad_norm": 0.654400724565578, "learning_rate": 4.769343065693431e-05, "loss": 0.697, "step": 4848 }, { "epoch": 0.14157250883185893, "grad_norm": 0.6908814278587146, "learning_rate": 4.769180859691809e-05, "loss": 0.7533, "step": 4849 }, { "epoch": 0.1416017050597063, "grad_norm": 0.7115863180658791, "learning_rate": 4.7690186536901864e-05, "loss": 0.7744, "step": 4850 }, { "epoch": 0.14163090128755365, "grad_norm": 0.6355956657046252, "learning_rate": 4.7688564476885646e-05, "loss": 0.7107, "step": 4851 }, { "epoch": 0.141660097515401, "grad_norm": 0.6500487066443417, "learning_rate": 4.768694241686943e-05, "loss": 0.7522, "step": 4852 }, { "epoch": 0.14168929374324837, "grad_norm": 0.6046666790093878, "learning_rate": 4.768532035685321e-05, "loss": 0.6671, "step": 4853 }, { "epoch": 0.14171848997109573, "grad_norm": 0.6268642595748843, "learning_rate": 4.7683698296836984e-05, "loss": 0.6671, "step": 4854 }, { "epoch": 0.1417476861989431, "grad_norm": 0.5908939070879468, "learning_rate": 4.7682076236820766e-05, "loss": 0.639, "step": 4855 }, { "epoch": 0.14177688242679046, "grad_norm": 0.5978730267576732, "learning_rate": 4.768045417680455e-05, "loss": 0.6752, "step": 4856 }, { "epoch": 0.14180607865463782, "grad_norm": 0.6522417276985277, "learning_rate": 4.767883211678832e-05, "loss": 0.6707, "step": 4857 }, { "epoch": 0.14183527488248518, "grad_norm": 0.6301384395157832, "learning_rate": 4.7677210056772104e-05, "loss": 0.6633, "step": 4858 }, { "epoch": 0.14186447111033254, "grad_norm": 0.6403145161495124, "learning_rate": 4.7675587996755886e-05, "loss": 0.6795, "step": 4859 }, { "epoch": 0.1418936673381799, "grad_norm": 0.6309367952969475, "learning_rate": 4.767396593673966e-05, "loss": 0.6834, "step": 4860 }, { "epoch": 0.14192286356602726, "grad_norm": 0.5816561330966171, "learning_rate": 4.767234387672344e-05, "loss": 0.6209, "step": 4861 }, { "epoch": 0.14195205979387462, "grad_norm": 0.6609652219177498, "learning_rate": 4.767072181670722e-05, "loss": 0.7933, "step": 4862 }, { "epoch": 0.14198125602172199, "grad_norm": 0.661709462186492, "learning_rate": 4.7669099756691e-05, "loss": 0.6855, "step": 4863 }, { "epoch": 0.14201045224956935, "grad_norm": 0.5846572667381082, "learning_rate": 4.766747769667478e-05, "loss": 0.6424, "step": 4864 }, { "epoch": 0.1420396484774167, "grad_norm": 0.6240547889384706, "learning_rate": 4.7665855636658556e-05, "loss": 0.676, "step": 4865 }, { "epoch": 0.14206884470526407, "grad_norm": 0.5864838985935442, "learning_rate": 4.766423357664234e-05, "loss": 0.6601, "step": 4866 }, { "epoch": 0.14209804093311143, "grad_norm": 0.6905599451942122, "learning_rate": 4.766261151662611e-05, "loss": 0.6918, "step": 4867 }, { "epoch": 0.1421272371609588, "grad_norm": 0.6196712676913727, "learning_rate": 4.7660989456609895e-05, "loss": 0.6999, "step": 4868 }, { "epoch": 0.14215643338880615, "grad_norm": 0.6189537240243134, "learning_rate": 4.7659367396593677e-05, "loss": 0.716, "step": 4869 }, { "epoch": 0.14218562961665354, "grad_norm": 0.6612614325930796, "learning_rate": 4.765774533657745e-05, "loss": 0.7793, "step": 4870 }, { "epoch": 0.1422148258445009, "grad_norm": 0.6025965092103105, "learning_rate": 4.765612327656124e-05, "loss": 0.6643, "step": 4871 }, { "epoch": 0.14224402207234826, "grad_norm": 0.5635214540741034, "learning_rate": 4.7654501216545015e-05, "loss": 0.6264, "step": 4872 }, { "epoch": 0.14227321830019563, "grad_norm": 0.592330737898328, "learning_rate": 4.76528791565288e-05, "loss": 0.6571, "step": 4873 }, { "epoch": 0.142302414528043, "grad_norm": 0.7633074478857177, "learning_rate": 4.765125709651257e-05, "loss": 0.6622, "step": 4874 }, { "epoch": 0.14233161075589035, "grad_norm": 0.5688286822039558, "learning_rate": 4.7649635036496354e-05, "loss": 0.6222, "step": 4875 }, { "epoch": 0.1423608069837377, "grad_norm": 0.6398865916074568, "learning_rate": 4.7648012976480135e-05, "loss": 0.7611, "step": 4876 }, { "epoch": 0.14239000321158507, "grad_norm": 0.6564257145274921, "learning_rate": 4.764639091646391e-05, "loss": 0.7212, "step": 4877 }, { "epoch": 0.14241919943943243, "grad_norm": 0.6354100709194201, "learning_rate": 4.764476885644769e-05, "loss": 0.6818, "step": 4878 }, { "epoch": 0.1424483956672798, "grad_norm": 0.5905814856735628, "learning_rate": 4.764314679643147e-05, "loss": 0.6871, "step": 4879 }, { "epoch": 0.14247759189512715, "grad_norm": 0.6248638930184491, "learning_rate": 4.764152473641525e-05, "loss": 0.6903, "step": 4880 }, { "epoch": 0.14250678812297451, "grad_norm": 0.5988430181239989, "learning_rate": 4.763990267639903e-05, "loss": 0.6537, "step": 4881 }, { "epoch": 0.14253598435082188, "grad_norm": 0.5972222520545553, "learning_rate": 4.7638280616382806e-05, "loss": 0.6467, "step": 4882 }, { "epoch": 0.14256518057866924, "grad_norm": 0.6749953045855631, "learning_rate": 4.763665855636659e-05, "loss": 0.7789, "step": 4883 }, { "epoch": 0.1425943768065166, "grad_norm": 0.5911535731884187, "learning_rate": 4.763503649635037e-05, "loss": 0.7094, "step": 4884 }, { "epoch": 0.14262357303436396, "grad_norm": 0.6607641381325603, "learning_rate": 4.7633414436334144e-05, "loss": 0.6777, "step": 4885 }, { "epoch": 0.14265276926221132, "grad_norm": 0.6047261068269026, "learning_rate": 4.7631792376317926e-05, "loss": 0.7444, "step": 4886 }, { "epoch": 0.14268196549005868, "grad_norm": 0.6464901919327665, "learning_rate": 4.76301703163017e-05, "loss": 0.697, "step": 4887 }, { "epoch": 0.14271116171790604, "grad_norm": 0.6895541174135064, "learning_rate": 4.762854825628548e-05, "loss": 0.6666, "step": 4888 }, { "epoch": 0.1427403579457534, "grad_norm": 0.7423821348886374, "learning_rate": 4.7626926196269264e-05, "loss": 0.7237, "step": 4889 }, { "epoch": 0.14276955417360077, "grad_norm": 0.5842396500719843, "learning_rate": 4.7625304136253046e-05, "loss": 0.6387, "step": 4890 }, { "epoch": 0.14279875040144813, "grad_norm": 0.6631001480233979, "learning_rate": 4.762368207623683e-05, "loss": 0.7577, "step": 4891 }, { "epoch": 0.1428279466292955, "grad_norm": 0.6553906711166292, "learning_rate": 4.76220600162206e-05, "loss": 0.7149, "step": 4892 }, { "epoch": 0.14285714285714285, "grad_norm": 0.656722645191059, "learning_rate": 4.7620437956204385e-05, "loss": 0.7587, "step": 4893 }, { "epoch": 0.1428863390849902, "grad_norm": 0.676192854496644, "learning_rate": 4.761881589618816e-05, "loss": 0.6803, "step": 4894 }, { "epoch": 0.14291553531283757, "grad_norm": 0.5795761364160216, "learning_rate": 4.761719383617194e-05, "loss": 0.6207, "step": 4895 }, { "epoch": 0.14294473154068493, "grad_norm": 0.5863843892035737, "learning_rate": 4.761557177615572e-05, "loss": 0.6137, "step": 4896 }, { "epoch": 0.1429739277685323, "grad_norm": 0.5482523671212732, "learning_rate": 4.76139497161395e-05, "loss": 0.5586, "step": 4897 }, { "epoch": 0.14300312399637966, "grad_norm": 0.6280190523614811, "learning_rate": 4.761232765612328e-05, "loss": 0.7217, "step": 4898 }, { "epoch": 0.14303232022422702, "grad_norm": 0.6110157652647354, "learning_rate": 4.7610705596107055e-05, "loss": 0.7086, "step": 4899 }, { "epoch": 0.1430615164520744, "grad_norm": 0.6156487996915906, "learning_rate": 4.760908353609084e-05, "loss": 0.6896, "step": 4900 }, { "epoch": 0.14309071267992177, "grad_norm": 0.6497206276962587, "learning_rate": 4.760746147607462e-05, "loss": 0.691, "step": 4901 }, { "epoch": 0.14311990890776913, "grad_norm": 0.5275529192881369, "learning_rate": 4.7605839416058393e-05, "loss": 0.5504, "step": 4902 }, { "epoch": 0.1431491051356165, "grad_norm": 0.7873203991070654, "learning_rate": 4.7604217356042175e-05, "loss": 0.761, "step": 4903 }, { "epoch": 0.14317830136346385, "grad_norm": 0.6276792543736772, "learning_rate": 4.760259529602596e-05, "loss": 0.7157, "step": 4904 }, { "epoch": 0.1432074975913112, "grad_norm": 0.5983998829543489, "learning_rate": 4.760097323600973e-05, "loss": 0.645, "step": 4905 }, { "epoch": 0.14323669381915857, "grad_norm": 0.6202855315572972, "learning_rate": 4.7599351175993514e-05, "loss": 0.5906, "step": 4906 }, { "epoch": 0.14326589004700593, "grad_norm": 0.5947206079019705, "learning_rate": 4.759772911597729e-05, "loss": 0.5743, "step": 4907 }, { "epoch": 0.1432950862748533, "grad_norm": 0.7527616347325914, "learning_rate": 4.759610705596108e-05, "loss": 0.6724, "step": 4908 }, { "epoch": 0.14332428250270066, "grad_norm": 0.5874818742869704, "learning_rate": 4.759448499594485e-05, "loss": 0.6582, "step": 4909 }, { "epoch": 0.14335347873054802, "grad_norm": 0.6117948443789591, "learning_rate": 4.7592862935928634e-05, "loss": 0.6695, "step": 4910 }, { "epoch": 0.14338267495839538, "grad_norm": 0.6066235765165298, "learning_rate": 4.7591240875912416e-05, "loss": 0.6881, "step": 4911 }, { "epoch": 0.14341187118624274, "grad_norm": 0.6378079337056589, "learning_rate": 4.758961881589619e-05, "loss": 0.6705, "step": 4912 }, { "epoch": 0.1434410674140901, "grad_norm": 0.5822312582675322, "learning_rate": 4.758799675587997e-05, "loss": 0.6439, "step": 4913 }, { "epoch": 0.14347026364193746, "grad_norm": 0.8019528436615465, "learning_rate": 4.758637469586375e-05, "loss": 0.806, "step": 4914 }, { "epoch": 0.14349945986978482, "grad_norm": 0.654210917297132, "learning_rate": 4.758475263584753e-05, "loss": 0.6113, "step": 4915 }, { "epoch": 0.14352865609763218, "grad_norm": 0.7264285663692641, "learning_rate": 4.758313057583131e-05, "loss": 0.7594, "step": 4916 }, { "epoch": 0.14355785232547955, "grad_norm": 0.5877770230425586, "learning_rate": 4.7581508515815086e-05, "loss": 0.6613, "step": 4917 }, { "epoch": 0.1435870485533269, "grad_norm": 0.6275963382301903, "learning_rate": 4.757988645579887e-05, "loss": 0.7391, "step": 4918 }, { "epoch": 0.14361624478117427, "grad_norm": 0.652166254154191, "learning_rate": 4.757826439578264e-05, "loss": 0.7718, "step": 4919 }, { "epoch": 0.14364544100902163, "grad_norm": 0.5543484292442882, "learning_rate": 4.7576642335766424e-05, "loss": 0.5505, "step": 4920 }, { "epoch": 0.143674637236869, "grad_norm": 0.6157389292636567, "learning_rate": 4.7575020275750206e-05, "loss": 0.6668, "step": 4921 }, { "epoch": 0.14370383346471635, "grad_norm": 0.6530927111362138, "learning_rate": 4.757339821573398e-05, "loss": 0.7066, "step": 4922 }, { "epoch": 0.1437330296925637, "grad_norm": 0.6141357253692883, "learning_rate": 4.757177615571776e-05, "loss": 0.7006, "step": 4923 }, { "epoch": 0.14376222592041107, "grad_norm": 0.5893559027532039, "learning_rate": 4.757015409570154e-05, "loss": 0.6381, "step": 4924 }, { "epoch": 0.14379142214825844, "grad_norm": 0.6099852567781622, "learning_rate": 4.756853203568532e-05, "loss": 0.7019, "step": 4925 }, { "epoch": 0.1438206183761058, "grad_norm": 0.6360169261030229, "learning_rate": 4.75669099756691e-05, "loss": 0.6874, "step": 4926 }, { "epoch": 0.14384981460395316, "grad_norm": 0.6013012986615848, "learning_rate": 4.756528791565288e-05, "loss": 0.6865, "step": 4927 }, { "epoch": 0.14387901083180052, "grad_norm": 0.6509185583347841, "learning_rate": 4.7563665855636665e-05, "loss": 0.7123, "step": 4928 }, { "epoch": 0.14390820705964788, "grad_norm": 0.6754432124797228, "learning_rate": 4.756204379562044e-05, "loss": 0.6499, "step": 4929 }, { "epoch": 0.14393740328749527, "grad_norm": 0.5684043891714486, "learning_rate": 4.756042173560422e-05, "loss": 0.6091, "step": 4930 }, { "epoch": 0.14396659951534263, "grad_norm": 0.5839492803919915, "learning_rate": 4.7558799675588004e-05, "loss": 0.6476, "step": 4931 }, { "epoch": 0.14399579574319, "grad_norm": 0.587553542289925, "learning_rate": 4.755717761557178e-05, "loss": 0.6595, "step": 4932 }, { "epoch": 0.14402499197103735, "grad_norm": 0.5822435360651355, "learning_rate": 4.755555555555556e-05, "loss": 0.6369, "step": 4933 }, { "epoch": 0.14405418819888471, "grad_norm": 0.5850259068288468, "learning_rate": 4.7553933495539335e-05, "loss": 0.6012, "step": 4934 }, { "epoch": 0.14408338442673208, "grad_norm": 0.6031931539839848, "learning_rate": 4.755231143552312e-05, "loss": 0.6955, "step": 4935 }, { "epoch": 0.14411258065457944, "grad_norm": 0.6340337199999506, "learning_rate": 4.75506893755069e-05, "loss": 0.7069, "step": 4936 }, { "epoch": 0.1441417768824268, "grad_norm": 0.7000646934018429, "learning_rate": 4.7549067315490674e-05, "loss": 0.8072, "step": 4937 }, { "epoch": 0.14417097311027416, "grad_norm": 0.6196768732100771, "learning_rate": 4.7547445255474456e-05, "loss": 0.7293, "step": 4938 }, { "epoch": 0.14420016933812152, "grad_norm": 0.8922719852575614, "learning_rate": 4.754582319545823e-05, "loss": 0.724, "step": 4939 }, { "epoch": 0.14422936556596888, "grad_norm": 0.5675476350407728, "learning_rate": 4.754420113544201e-05, "loss": 0.6309, "step": 4940 }, { "epoch": 0.14425856179381624, "grad_norm": 0.7340926416921223, "learning_rate": 4.7542579075425794e-05, "loss": 0.6861, "step": 4941 }, { "epoch": 0.1442877580216636, "grad_norm": 0.6144504691993894, "learning_rate": 4.754095701540957e-05, "loss": 0.6732, "step": 4942 }, { "epoch": 0.14431695424951096, "grad_norm": 0.5654896503677593, "learning_rate": 4.753933495539335e-05, "loss": 0.652, "step": 4943 }, { "epoch": 0.14434615047735833, "grad_norm": 0.6260619825760396, "learning_rate": 4.7537712895377126e-05, "loss": 0.7069, "step": 4944 }, { "epoch": 0.1443753467052057, "grad_norm": 0.6014217153854333, "learning_rate": 4.753609083536091e-05, "loss": 0.6774, "step": 4945 }, { "epoch": 0.14440454293305305, "grad_norm": 0.6060791617049931, "learning_rate": 4.753446877534469e-05, "loss": 0.6456, "step": 4946 }, { "epoch": 0.1444337391609004, "grad_norm": 1.1707914623304616, "learning_rate": 4.753284671532847e-05, "loss": 0.7347, "step": 4947 }, { "epoch": 0.14446293538874777, "grad_norm": 0.6881658141532025, "learning_rate": 4.753122465531225e-05, "loss": 0.76, "step": 4948 }, { "epoch": 0.14449213161659513, "grad_norm": 0.6799392289088801, "learning_rate": 4.752960259529603e-05, "loss": 0.7615, "step": 4949 }, { "epoch": 0.1445213278444425, "grad_norm": 0.619570529644485, "learning_rate": 4.752798053527981e-05, "loss": 0.7821, "step": 4950 }, { "epoch": 0.14455052407228985, "grad_norm": 1.8145390842136107, "learning_rate": 4.752635847526359e-05, "loss": 0.9833, "step": 4951 }, { "epoch": 0.14457972030013722, "grad_norm": 0.6083390170936225, "learning_rate": 4.7524736415247366e-05, "loss": 0.6701, "step": 4952 }, { "epoch": 0.14460891652798458, "grad_norm": 0.5876547078021296, "learning_rate": 4.752311435523115e-05, "loss": 0.6094, "step": 4953 }, { "epoch": 0.14463811275583194, "grad_norm": 0.633427670644758, "learning_rate": 4.752149229521492e-05, "loss": 0.7214, "step": 4954 }, { "epoch": 0.1446673089836793, "grad_norm": 0.5927117436925762, "learning_rate": 4.7519870235198705e-05, "loss": 0.6607, "step": 4955 }, { "epoch": 0.14469650521152666, "grad_norm": 0.6335975181570814, "learning_rate": 4.7518248175182487e-05, "loss": 0.7145, "step": 4956 }, { "epoch": 0.14472570143937402, "grad_norm": 0.5622734491713096, "learning_rate": 4.751662611516626e-05, "loss": 0.592, "step": 4957 }, { "epoch": 0.14475489766722138, "grad_norm": 0.6863690077591684, "learning_rate": 4.751500405515004e-05, "loss": 0.7026, "step": 4958 }, { "epoch": 0.14478409389506874, "grad_norm": 0.6144200232512151, "learning_rate": 4.751338199513382e-05, "loss": 0.6507, "step": 4959 }, { "epoch": 0.14481329012291613, "grad_norm": 0.6980921935491055, "learning_rate": 4.75117599351176e-05, "loss": 0.749, "step": 4960 }, { "epoch": 0.1448424863507635, "grad_norm": 0.6143033647185803, "learning_rate": 4.751013787510138e-05, "loss": 0.6975, "step": 4961 }, { "epoch": 0.14487168257861086, "grad_norm": 0.6286902934017102, "learning_rate": 4.750851581508516e-05, "loss": 0.7233, "step": 4962 }, { "epoch": 0.14490087880645822, "grad_norm": 0.5882523524074146, "learning_rate": 4.750689375506894e-05, "loss": 0.6279, "step": 4963 }, { "epoch": 0.14493007503430558, "grad_norm": 0.6805197225768486, "learning_rate": 4.7505271695052714e-05, "loss": 0.7217, "step": 4964 }, { "epoch": 0.14495927126215294, "grad_norm": 0.5940111188363968, "learning_rate": 4.75036496350365e-05, "loss": 0.6586, "step": 4965 }, { "epoch": 0.1449884674900003, "grad_norm": 0.9282436456173584, "learning_rate": 4.750202757502028e-05, "loss": 0.7666, "step": 4966 }, { "epoch": 0.14501766371784766, "grad_norm": 0.6670875977454371, "learning_rate": 4.750040551500406e-05, "loss": 0.7041, "step": 4967 }, { "epoch": 0.14504685994569502, "grad_norm": 0.6434509704625303, "learning_rate": 4.749878345498784e-05, "loss": 0.7394, "step": 4968 }, { "epoch": 0.14507605617354238, "grad_norm": 0.6240808783364407, "learning_rate": 4.7497161394971616e-05, "loss": 0.673, "step": 4969 }, { "epoch": 0.14510525240138974, "grad_norm": 0.6143626210559853, "learning_rate": 4.74955393349554e-05, "loss": 0.7081, "step": 4970 }, { "epoch": 0.1451344486292371, "grad_norm": 0.5918276970528386, "learning_rate": 4.749391727493918e-05, "loss": 0.6231, "step": 4971 }, { "epoch": 0.14516364485708447, "grad_norm": 0.5753074986411969, "learning_rate": 4.7492295214922954e-05, "loss": 0.6309, "step": 4972 }, { "epoch": 0.14519284108493183, "grad_norm": 0.6283368269727613, "learning_rate": 4.7490673154906736e-05, "loss": 0.6922, "step": 4973 }, { "epoch": 0.1452220373127792, "grad_norm": 0.5757738923902024, "learning_rate": 4.748905109489051e-05, "loss": 0.6176, "step": 4974 }, { "epoch": 0.14525123354062655, "grad_norm": 0.5589355480437823, "learning_rate": 4.748742903487429e-05, "loss": 0.5475, "step": 4975 }, { "epoch": 0.1452804297684739, "grad_norm": 0.6080878442818862, "learning_rate": 4.7485806974858074e-05, "loss": 0.6859, "step": 4976 }, { "epoch": 0.14530962599632127, "grad_norm": 0.6243282701391176, "learning_rate": 4.748418491484185e-05, "loss": 0.7184, "step": 4977 }, { "epoch": 0.14533882222416863, "grad_norm": 0.6421781648274103, "learning_rate": 4.748256285482563e-05, "loss": 0.7484, "step": 4978 }, { "epoch": 0.145368018452016, "grad_norm": 0.6911474331779308, "learning_rate": 4.7480940794809406e-05, "loss": 0.8428, "step": 4979 }, { "epoch": 0.14539721467986336, "grad_norm": 0.5785442743854303, "learning_rate": 4.747931873479319e-05, "loss": 0.5815, "step": 4980 }, { "epoch": 0.14542641090771072, "grad_norm": 0.6410751989882251, "learning_rate": 4.747769667477697e-05, "loss": 0.7158, "step": 4981 }, { "epoch": 0.14545560713555808, "grad_norm": 0.6626433579244186, "learning_rate": 4.7476074614760745e-05, "loss": 0.7286, "step": 4982 }, { "epoch": 0.14548480336340544, "grad_norm": 0.5853109317062134, "learning_rate": 4.7474452554744526e-05, "loss": 0.6578, "step": 4983 }, { "epoch": 0.1455139995912528, "grad_norm": 0.5998507106764693, "learning_rate": 4.747283049472831e-05, "loss": 0.6636, "step": 4984 }, { "epoch": 0.14554319581910016, "grad_norm": 0.6640816565015678, "learning_rate": 4.747120843471209e-05, "loss": 0.7128, "step": 4985 }, { "epoch": 0.14557239204694752, "grad_norm": 0.6502921691794565, "learning_rate": 4.7469586374695865e-05, "loss": 0.7322, "step": 4986 }, { "epoch": 0.14560158827479489, "grad_norm": 0.615708116311667, "learning_rate": 4.746796431467965e-05, "loss": 0.6896, "step": 4987 }, { "epoch": 0.14563078450264225, "grad_norm": 0.5812530441601295, "learning_rate": 4.746634225466343e-05, "loss": 0.6804, "step": 4988 }, { "epoch": 0.1456599807304896, "grad_norm": 0.6034619258862264, "learning_rate": 4.7464720194647203e-05, "loss": 0.7398, "step": 4989 }, { "epoch": 0.145689176958337, "grad_norm": 0.6220980830840005, "learning_rate": 4.7463098134630985e-05, "loss": 0.7216, "step": 4990 }, { "epoch": 0.14571837318618436, "grad_norm": 0.727832793192458, "learning_rate": 4.746147607461476e-05, "loss": 0.6859, "step": 4991 }, { "epoch": 0.14574756941403172, "grad_norm": 0.6425676714712486, "learning_rate": 4.745985401459854e-05, "loss": 0.772, "step": 4992 }, { "epoch": 0.14577676564187908, "grad_norm": 0.7480991347435321, "learning_rate": 4.7458231954582324e-05, "loss": 0.7718, "step": 4993 }, { "epoch": 0.14580596186972644, "grad_norm": 0.6213881426164324, "learning_rate": 4.74566098945661e-05, "loss": 0.7071, "step": 4994 }, { "epoch": 0.1458351580975738, "grad_norm": 0.6273156012248425, "learning_rate": 4.745498783454988e-05, "loss": 0.7605, "step": 4995 }, { "epoch": 0.14586435432542116, "grad_norm": 0.5849982055795518, "learning_rate": 4.745336577453366e-05, "loss": 0.5634, "step": 4996 }, { "epoch": 0.14589355055326853, "grad_norm": 0.6058050663944015, "learning_rate": 4.745174371451744e-05, "loss": 0.6769, "step": 4997 }, { "epoch": 0.1459227467811159, "grad_norm": 0.6939822182716231, "learning_rate": 4.745012165450122e-05, "loss": 0.8, "step": 4998 }, { "epoch": 0.14595194300896325, "grad_norm": 0.6177641097589661, "learning_rate": 4.7448499594484994e-05, "loss": 0.7211, "step": 4999 }, { "epoch": 0.1459811392368106, "grad_norm": 0.5816582484420175, "learning_rate": 4.7446877534468776e-05, "loss": 0.6647, "step": 5000 }, { "epoch": 0.14601033546465797, "grad_norm": 0.6425846136212449, "learning_rate": 4.744525547445256e-05, "loss": 0.6286, "step": 5001 }, { "epoch": 0.14603953169250533, "grad_norm": 0.6405494734156515, "learning_rate": 4.744363341443633e-05, "loss": 0.6848, "step": 5002 }, { "epoch": 0.1460687279203527, "grad_norm": 0.5611218511844319, "learning_rate": 4.744201135442012e-05, "loss": 0.5587, "step": 5003 }, { "epoch": 0.14609792414820005, "grad_norm": 0.5951393491164545, "learning_rate": 4.7440389294403896e-05, "loss": 0.6867, "step": 5004 }, { "epoch": 0.14612712037604741, "grad_norm": 0.6399177342165235, "learning_rate": 4.743876723438768e-05, "loss": 0.7586, "step": 5005 }, { "epoch": 0.14615631660389478, "grad_norm": 0.5771557036530486, "learning_rate": 4.743714517437145e-05, "loss": 0.64, "step": 5006 }, { "epoch": 0.14618551283174214, "grad_norm": 0.5535656209903089, "learning_rate": 4.7435523114355234e-05, "loss": 0.5934, "step": 5007 }, { "epoch": 0.1462147090595895, "grad_norm": 0.6874364894182562, "learning_rate": 4.7433901054339016e-05, "loss": 0.7495, "step": 5008 }, { "epoch": 0.14624390528743686, "grad_norm": 0.6425012970448396, "learning_rate": 4.743227899432279e-05, "loss": 0.7133, "step": 5009 }, { "epoch": 0.14627310151528422, "grad_norm": 0.6374262407414588, "learning_rate": 4.743065693430657e-05, "loss": 0.7382, "step": 5010 }, { "epoch": 0.14630229774313158, "grad_norm": 0.6563358909538993, "learning_rate": 4.742903487429035e-05, "loss": 0.6835, "step": 5011 }, { "epoch": 0.14633149397097894, "grad_norm": 0.6204473927175227, "learning_rate": 4.742741281427413e-05, "loss": 0.6846, "step": 5012 }, { "epoch": 0.1463606901988263, "grad_norm": 0.654242655031534, "learning_rate": 4.742579075425791e-05, "loss": 0.7493, "step": 5013 }, { "epoch": 0.14638988642667367, "grad_norm": 0.6146044674975704, "learning_rate": 4.7424168694241686e-05, "loss": 0.6822, "step": 5014 }, { "epoch": 0.14641908265452103, "grad_norm": 0.673411655925302, "learning_rate": 4.742254663422547e-05, "loss": 0.8384, "step": 5015 }, { "epoch": 0.1464482788823684, "grad_norm": 0.6696261129753529, "learning_rate": 4.742092457420925e-05, "loss": 0.7044, "step": 5016 }, { "epoch": 0.14647747511021575, "grad_norm": 0.6040594469095436, "learning_rate": 4.7419302514193025e-05, "loss": 0.697, "step": 5017 }, { "epoch": 0.1465066713380631, "grad_norm": 0.5826121508502463, "learning_rate": 4.741768045417681e-05, "loss": 0.6661, "step": 5018 }, { "epoch": 0.14653586756591047, "grad_norm": 0.6286128800043406, "learning_rate": 4.741605839416058e-05, "loss": 0.8096, "step": 5019 }, { "epoch": 0.14656506379375783, "grad_norm": 0.6808656112649095, "learning_rate": 4.7414436334144364e-05, "loss": 0.7607, "step": 5020 }, { "epoch": 0.14659426002160522, "grad_norm": 0.704641068335049, "learning_rate": 4.7412814274128145e-05, "loss": 0.6761, "step": 5021 }, { "epoch": 0.14662345624945258, "grad_norm": 0.5804442701819627, "learning_rate": 4.741119221411193e-05, "loss": 0.6553, "step": 5022 }, { "epoch": 0.14665265247729994, "grad_norm": 0.5832528693772046, "learning_rate": 4.740957015409571e-05, "loss": 0.6561, "step": 5023 }, { "epoch": 0.1466818487051473, "grad_norm": 0.6291469456888562, "learning_rate": 4.7407948094079484e-05, "loss": 0.6925, "step": 5024 }, { "epoch": 0.14671104493299467, "grad_norm": 0.6441404025501423, "learning_rate": 4.7406326034063266e-05, "loss": 0.696, "step": 5025 }, { "epoch": 0.14674024116084203, "grad_norm": 0.6416538939504594, "learning_rate": 4.740470397404704e-05, "loss": 0.7395, "step": 5026 }, { "epoch": 0.1467694373886894, "grad_norm": 0.6580029032587348, "learning_rate": 4.740308191403082e-05, "loss": 0.7555, "step": 5027 }, { "epoch": 0.14679863361653675, "grad_norm": 0.644960232025816, "learning_rate": 4.7401459854014604e-05, "loss": 0.6836, "step": 5028 }, { "epoch": 0.1468278298443841, "grad_norm": 0.7326039601365024, "learning_rate": 4.739983779399838e-05, "loss": 0.7841, "step": 5029 }, { "epoch": 0.14685702607223147, "grad_norm": 0.6140950305734098, "learning_rate": 4.739821573398216e-05, "loss": 0.642, "step": 5030 }, { "epoch": 0.14688622230007883, "grad_norm": 0.6219383240224912, "learning_rate": 4.7396593673965936e-05, "loss": 0.6574, "step": 5031 }, { "epoch": 0.1469154185279262, "grad_norm": 0.628719935474726, "learning_rate": 4.739497161394972e-05, "loss": 0.6954, "step": 5032 }, { "epoch": 0.14694461475577356, "grad_norm": 0.621835055598221, "learning_rate": 4.73933495539335e-05, "loss": 0.7913, "step": 5033 }, { "epoch": 0.14697381098362092, "grad_norm": 0.5783668099034703, "learning_rate": 4.7391727493917274e-05, "loss": 0.6076, "step": 5034 }, { "epoch": 0.14700300721146828, "grad_norm": 0.6631325390527905, "learning_rate": 4.7390105433901056e-05, "loss": 0.678, "step": 5035 }, { "epoch": 0.14703220343931564, "grad_norm": 0.620761051268535, "learning_rate": 4.738848337388483e-05, "loss": 0.6753, "step": 5036 }, { "epoch": 0.147061399667163, "grad_norm": 0.6138822011058459, "learning_rate": 4.738686131386861e-05, "loss": 0.6819, "step": 5037 }, { "epoch": 0.14709059589501036, "grad_norm": 0.5686103042915404, "learning_rate": 4.7385239253852395e-05, "loss": 0.6025, "step": 5038 }, { "epoch": 0.14711979212285772, "grad_norm": 0.6199695113002605, "learning_rate": 4.738361719383617e-05, "loss": 0.6808, "step": 5039 }, { "epoch": 0.14714898835070508, "grad_norm": 0.601598666933618, "learning_rate": 4.738199513381996e-05, "loss": 0.6626, "step": 5040 }, { "epoch": 0.14717818457855245, "grad_norm": 0.6142327385796098, "learning_rate": 4.738037307380373e-05, "loss": 0.6575, "step": 5041 }, { "epoch": 0.1472073808063998, "grad_norm": 0.6125847297156854, "learning_rate": 4.7378751013787515e-05, "loss": 0.6542, "step": 5042 }, { "epoch": 0.14723657703424717, "grad_norm": 0.6277480395831603, "learning_rate": 4.7377128953771297e-05, "loss": 0.7113, "step": 5043 }, { "epoch": 0.14726577326209453, "grad_norm": 0.6620927420052427, "learning_rate": 4.737550689375507e-05, "loss": 0.6975, "step": 5044 }, { "epoch": 0.1472949694899419, "grad_norm": 0.6793064090813112, "learning_rate": 4.737388483373885e-05, "loss": 0.8273, "step": 5045 }, { "epoch": 0.14732416571778925, "grad_norm": 0.6231922812138814, "learning_rate": 4.737226277372263e-05, "loss": 0.686, "step": 5046 }, { "epoch": 0.1473533619456366, "grad_norm": 0.6577079841214103, "learning_rate": 4.737064071370641e-05, "loss": 0.7453, "step": 5047 }, { "epoch": 0.14738255817348397, "grad_norm": 1.1660628178172727, "learning_rate": 4.736901865369019e-05, "loss": 0.6432, "step": 5048 }, { "epoch": 0.14741175440133134, "grad_norm": 0.5905527395320239, "learning_rate": 4.736739659367397e-05, "loss": 0.6649, "step": 5049 }, { "epoch": 0.1474409506291787, "grad_norm": 0.6684895242651665, "learning_rate": 4.736577453365775e-05, "loss": 0.6428, "step": 5050 }, { "epoch": 0.14747014685702609, "grad_norm": 0.6089022120239488, "learning_rate": 4.7364152473641524e-05, "loss": 0.6648, "step": 5051 }, { "epoch": 0.14749934308487345, "grad_norm": 0.6615303610250095, "learning_rate": 4.7362530413625305e-05, "loss": 0.7682, "step": 5052 }, { "epoch": 0.1475285393127208, "grad_norm": 0.5829266754234853, "learning_rate": 4.736090835360909e-05, "loss": 0.6475, "step": 5053 }, { "epoch": 0.14755773554056817, "grad_norm": 0.6393497092816834, "learning_rate": 4.735928629359286e-05, "loss": 0.6535, "step": 5054 }, { "epoch": 0.14758693176841553, "grad_norm": 0.7292137732762471, "learning_rate": 4.7357664233576644e-05, "loss": 0.7625, "step": 5055 }, { "epoch": 0.1476161279962629, "grad_norm": 0.5954200558151835, "learning_rate": 4.735604217356042e-05, "loss": 0.6533, "step": 5056 }, { "epoch": 0.14764532422411025, "grad_norm": 0.5708582940976314, "learning_rate": 4.73544201135442e-05, "loss": 0.6191, "step": 5057 }, { "epoch": 0.1476745204519576, "grad_norm": 0.5460913664722592, "learning_rate": 4.735279805352798e-05, "loss": 0.6124, "step": 5058 }, { "epoch": 0.14770371667980497, "grad_norm": 0.5834701134807666, "learning_rate": 4.7351175993511764e-05, "loss": 0.6721, "step": 5059 }, { "epoch": 0.14773291290765234, "grad_norm": 0.6007222846234764, "learning_rate": 4.7349553933495546e-05, "loss": 0.6488, "step": 5060 }, { "epoch": 0.1477621091354997, "grad_norm": 0.5522528856652588, "learning_rate": 4.734793187347932e-05, "loss": 0.6059, "step": 5061 }, { "epoch": 0.14779130536334706, "grad_norm": 0.5589908369209498, "learning_rate": 4.73463098134631e-05, "loss": 0.6019, "step": 5062 }, { "epoch": 0.14782050159119442, "grad_norm": 0.6440872830166746, "learning_rate": 4.7344687753446884e-05, "loss": 0.7605, "step": 5063 }, { "epoch": 0.14784969781904178, "grad_norm": 0.6408366884534997, "learning_rate": 4.734306569343066e-05, "loss": 0.6898, "step": 5064 }, { "epoch": 0.14787889404688914, "grad_norm": 0.6419730662571465, "learning_rate": 4.734144363341444e-05, "loss": 0.7051, "step": 5065 }, { "epoch": 0.1479080902747365, "grad_norm": 0.6081886723516771, "learning_rate": 4.7339821573398216e-05, "loss": 0.7007, "step": 5066 }, { "epoch": 0.14793728650258386, "grad_norm": 0.5933284150241148, "learning_rate": 4.7338199513382e-05, "loss": 0.6394, "step": 5067 }, { "epoch": 0.14796648273043123, "grad_norm": 0.5993613939184933, "learning_rate": 4.733657745336578e-05, "loss": 0.7011, "step": 5068 }, { "epoch": 0.1479956789582786, "grad_norm": 0.6274514784806459, "learning_rate": 4.7334955393349555e-05, "loss": 0.746, "step": 5069 }, { "epoch": 0.14802487518612595, "grad_norm": 0.5554159045227738, "learning_rate": 4.7333333333333336e-05, "loss": 0.6236, "step": 5070 }, { "epoch": 0.1480540714139733, "grad_norm": 0.6224144311043215, "learning_rate": 4.733171127331711e-05, "loss": 0.7203, "step": 5071 }, { "epoch": 0.14808326764182067, "grad_norm": 0.5965037431006276, "learning_rate": 4.733008921330089e-05, "loss": 0.6415, "step": 5072 }, { "epoch": 0.14811246386966803, "grad_norm": 0.6067256061306694, "learning_rate": 4.7328467153284675e-05, "loss": 0.6655, "step": 5073 }, { "epoch": 0.1481416600975154, "grad_norm": 0.5830761130694027, "learning_rate": 4.732684509326845e-05, "loss": 0.6562, "step": 5074 }, { "epoch": 0.14817085632536275, "grad_norm": 0.5689139011077429, "learning_rate": 4.732522303325223e-05, "loss": 0.6387, "step": 5075 }, { "epoch": 0.14820005255321012, "grad_norm": 0.6111213258266777, "learning_rate": 4.732360097323601e-05, "loss": 0.7142, "step": 5076 }, { "epoch": 0.14822924878105748, "grad_norm": 0.560753282093698, "learning_rate": 4.732197891321979e-05, "loss": 0.6203, "step": 5077 }, { "epoch": 0.14825844500890484, "grad_norm": 0.5864738974667735, "learning_rate": 4.732035685320357e-05, "loss": 0.6515, "step": 5078 }, { "epoch": 0.1482876412367522, "grad_norm": 0.621371737093386, "learning_rate": 4.731873479318735e-05, "loss": 0.6472, "step": 5079 }, { "epoch": 0.14831683746459956, "grad_norm": 0.5536106068744308, "learning_rate": 4.7317112733171134e-05, "loss": 0.5848, "step": 5080 }, { "epoch": 0.14834603369244695, "grad_norm": 0.6110861772338194, "learning_rate": 4.731549067315491e-05, "loss": 0.7155, "step": 5081 }, { "epoch": 0.1483752299202943, "grad_norm": 0.6076228590865727, "learning_rate": 4.731386861313869e-05, "loss": 0.6667, "step": 5082 }, { "epoch": 0.14840442614814167, "grad_norm": 0.6082476017107287, "learning_rate": 4.731224655312247e-05, "loss": 0.6259, "step": 5083 }, { "epoch": 0.14843362237598903, "grad_norm": 0.7338353159177644, "learning_rate": 4.731062449310625e-05, "loss": 0.7064, "step": 5084 }, { "epoch": 0.1484628186038364, "grad_norm": 0.675646815243249, "learning_rate": 4.730900243309003e-05, "loss": 0.756, "step": 5085 }, { "epoch": 0.14849201483168376, "grad_norm": 0.6030278472780135, "learning_rate": 4.7307380373073804e-05, "loss": 0.7057, "step": 5086 }, { "epoch": 0.14852121105953112, "grad_norm": 0.7632341451659017, "learning_rate": 4.7305758313057586e-05, "loss": 0.6622, "step": 5087 }, { "epoch": 0.14855040728737848, "grad_norm": 0.6786535491730281, "learning_rate": 4.730413625304137e-05, "loss": 0.6543, "step": 5088 }, { "epoch": 0.14857960351522584, "grad_norm": 0.627587139360604, "learning_rate": 4.730251419302514e-05, "loss": 0.6976, "step": 5089 }, { "epoch": 0.1486087997430732, "grad_norm": 0.6319269519774212, "learning_rate": 4.7300892133008924e-05, "loss": 0.7614, "step": 5090 }, { "epoch": 0.14863799597092056, "grad_norm": 0.5803437797700736, "learning_rate": 4.72992700729927e-05, "loss": 0.6364, "step": 5091 }, { "epoch": 0.14866719219876792, "grad_norm": 0.5539544276992591, "learning_rate": 4.729764801297648e-05, "loss": 0.5741, "step": 5092 }, { "epoch": 0.14869638842661528, "grad_norm": 0.6140113489822933, "learning_rate": 4.729602595296026e-05, "loss": 0.7114, "step": 5093 }, { "epoch": 0.14872558465446264, "grad_norm": 0.6115502640233774, "learning_rate": 4.729440389294404e-05, "loss": 0.6238, "step": 5094 }, { "epoch": 0.14875478088231, "grad_norm": 0.6035560920742092, "learning_rate": 4.729278183292782e-05, "loss": 0.6313, "step": 5095 }, { "epoch": 0.14878397711015737, "grad_norm": 0.5883029986478449, "learning_rate": 4.7291159772911594e-05, "loss": 0.6373, "step": 5096 }, { "epoch": 0.14881317333800473, "grad_norm": 0.593245666527291, "learning_rate": 4.728953771289538e-05, "loss": 0.6718, "step": 5097 }, { "epoch": 0.1488423695658521, "grad_norm": 0.6617979849363496, "learning_rate": 4.728791565287916e-05, "loss": 0.7306, "step": 5098 }, { "epoch": 0.14887156579369945, "grad_norm": 0.587688626942593, "learning_rate": 4.728629359286294e-05, "loss": 0.6592, "step": 5099 }, { "epoch": 0.1489007620215468, "grad_norm": 0.5883689474298001, "learning_rate": 4.728467153284672e-05, "loss": 0.6546, "step": 5100 }, { "epoch": 0.14892995824939417, "grad_norm": 0.6128147056124451, "learning_rate": 4.7283049472830496e-05, "loss": 0.661, "step": 5101 }, { "epoch": 0.14895915447724153, "grad_norm": 0.5917959017613617, "learning_rate": 4.728142741281428e-05, "loss": 0.6383, "step": 5102 }, { "epoch": 0.1489883507050889, "grad_norm": 0.5653374900893166, "learning_rate": 4.727980535279806e-05, "loss": 0.6969, "step": 5103 }, { "epoch": 0.14901754693293626, "grad_norm": 0.607047650114388, "learning_rate": 4.7278183292781835e-05, "loss": 0.6729, "step": 5104 }, { "epoch": 0.14904674316078362, "grad_norm": 0.5679482020006397, "learning_rate": 4.727656123276562e-05, "loss": 0.6159, "step": 5105 }, { "epoch": 0.14907593938863098, "grad_norm": 0.5638144494502215, "learning_rate": 4.727493917274939e-05, "loss": 0.6174, "step": 5106 }, { "epoch": 0.14910513561647834, "grad_norm": 0.5990514132245234, "learning_rate": 4.7273317112733174e-05, "loss": 0.6673, "step": 5107 }, { "epoch": 0.1491343318443257, "grad_norm": 0.6422607172510065, "learning_rate": 4.7271695052716955e-05, "loss": 0.7181, "step": 5108 }, { "epoch": 0.14916352807217306, "grad_norm": 0.6373417598467689, "learning_rate": 4.727007299270073e-05, "loss": 0.6478, "step": 5109 }, { "epoch": 0.14919272430002042, "grad_norm": 0.7480176892981777, "learning_rate": 4.726845093268451e-05, "loss": 0.8321, "step": 5110 }, { "epoch": 0.1492219205278678, "grad_norm": 0.6917093245487669, "learning_rate": 4.726682887266829e-05, "loss": 0.7289, "step": 5111 }, { "epoch": 0.14925111675571517, "grad_norm": 0.6376015946095444, "learning_rate": 4.726520681265207e-05, "loss": 0.733, "step": 5112 }, { "epoch": 0.14928031298356254, "grad_norm": 0.5780452537055917, "learning_rate": 4.726358475263585e-05, "loss": 0.5359, "step": 5113 }, { "epoch": 0.1493095092114099, "grad_norm": 0.6654674503534606, "learning_rate": 4.7261962692619626e-05, "loss": 0.7853, "step": 5114 }, { "epoch": 0.14933870543925726, "grad_norm": 0.5824802662454177, "learning_rate": 4.726034063260341e-05, "loss": 0.6202, "step": 5115 }, { "epoch": 0.14936790166710462, "grad_norm": 0.551823345592761, "learning_rate": 4.725871857258719e-05, "loss": 0.5703, "step": 5116 }, { "epoch": 0.14939709789495198, "grad_norm": 0.6781593844172475, "learning_rate": 4.725709651257097e-05, "loss": 0.7724, "step": 5117 }, { "epoch": 0.14942629412279934, "grad_norm": 0.656288745175964, "learning_rate": 4.7255474452554746e-05, "loss": 0.6749, "step": 5118 }, { "epoch": 0.1494554903506467, "grad_norm": 0.6124584725921074, "learning_rate": 4.725385239253853e-05, "loss": 0.6414, "step": 5119 }, { "epoch": 0.14948468657849406, "grad_norm": 0.6020114836598933, "learning_rate": 4.725223033252231e-05, "loss": 0.6211, "step": 5120 }, { "epoch": 0.14951388280634142, "grad_norm": 0.5826939821950791, "learning_rate": 4.7250608272506084e-05, "loss": 0.6614, "step": 5121 }, { "epoch": 0.14954307903418879, "grad_norm": 0.67171877193245, "learning_rate": 4.7248986212489866e-05, "loss": 0.7417, "step": 5122 }, { "epoch": 0.14957227526203615, "grad_norm": 0.6388020770964137, "learning_rate": 4.724736415247364e-05, "loss": 0.6577, "step": 5123 }, { "epoch": 0.1496014714898835, "grad_norm": 0.6838189856081626, "learning_rate": 4.724574209245742e-05, "loss": 0.6889, "step": 5124 }, { "epoch": 0.14963066771773087, "grad_norm": 0.5903930713199482, "learning_rate": 4.7244120032441205e-05, "loss": 0.5703, "step": 5125 }, { "epoch": 0.14965986394557823, "grad_norm": 0.719173770448067, "learning_rate": 4.724249797242498e-05, "loss": 0.7803, "step": 5126 }, { "epoch": 0.1496890601734256, "grad_norm": 0.7424842320990256, "learning_rate": 4.724087591240876e-05, "loss": 0.6546, "step": 5127 }, { "epoch": 0.14971825640127295, "grad_norm": 0.6173461974951566, "learning_rate": 4.723925385239254e-05, "loss": 0.7021, "step": 5128 }, { "epoch": 0.14974745262912031, "grad_norm": 0.6919459881979614, "learning_rate": 4.723763179237632e-05, "loss": 0.6668, "step": 5129 }, { "epoch": 0.14977664885696768, "grad_norm": 0.6218626653114547, "learning_rate": 4.72360097323601e-05, "loss": 0.6069, "step": 5130 }, { "epoch": 0.14980584508481504, "grad_norm": 0.6511816607519783, "learning_rate": 4.7234387672343875e-05, "loss": 0.7545, "step": 5131 }, { "epoch": 0.1498350413126624, "grad_norm": 0.6618194675751459, "learning_rate": 4.7232765612327657e-05, "loss": 0.6896, "step": 5132 }, { "epoch": 0.14986423754050976, "grad_norm": 0.6145848593038608, "learning_rate": 4.723114355231144e-05, "loss": 0.6526, "step": 5133 }, { "epoch": 0.14989343376835712, "grad_norm": 0.601357906151109, "learning_rate": 4.722952149229521e-05, "loss": 0.7083, "step": 5134 }, { "epoch": 0.14992262999620448, "grad_norm": 0.6229396939791049, "learning_rate": 4.7227899432279e-05, "loss": 0.655, "step": 5135 }, { "epoch": 0.14995182622405184, "grad_norm": 0.5948728491351696, "learning_rate": 4.722627737226278e-05, "loss": 0.6356, "step": 5136 }, { "epoch": 0.1499810224518992, "grad_norm": 0.6064813311775319, "learning_rate": 4.722465531224656e-05, "loss": 0.6569, "step": 5137 }, { "epoch": 0.15001021867974657, "grad_norm": 0.618755111635077, "learning_rate": 4.7223033252230334e-05, "loss": 0.659, "step": 5138 }, { "epoch": 0.15003941490759393, "grad_norm": 0.6904979985672876, "learning_rate": 4.7221411192214115e-05, "loss": 0.6874, "step": 5139 }, { "epoch": 0.1500686111354413, "grad_norm": 0.5761332571590533, "learning_rate": 4.72197891321979e-05, "loss": 0.6148, "step": 5140 }, { "epoch": 0.15009780736328868, "grad_norm": 0.6394880138122625, "learning_rate": 4.721816707218167e-05, "loss": 0.6396, "step": 5141 }, { "epoch": 0.15012700359113604, "grad_norm": 0.6092707366147022, "learning_rate": 4.7216545012165454e-05, "loss": 0.6503, "step": 5142 }, { "epoch": 0.1501561998189834, "grad_norm": 0.6840872327822562, "learning_rate": 4.721492295214923e-05, "loss": 0.6281, "step": 5143 }, { "epoch": 0.15018539604683076, "grad_norm": 0.6063098132055246, "learning_rate": 4.721330089213301e-05, "loss": 0.621, "step": 5144 }, { "epoch": 0.15021459227467812, "grad_norm": 0.6241727499741426, "learning_rate": 4.721167883211679e-05, "loss": 0.6744, "step": 5145 }, { "epoch": 0.15024378850252548, "grad_norm": 0.6370944180113683, "learning_rate": 4.721005677210057e-05, "loss": 0.6965, "step": 5146 }, { "epoch": 0.15027298473037284, "grad_norm": 0.6848447771994132, "learning_rate": 4.720843471208435e-05, "loss": 0.7279, "step": 5147 }, { "epoch": 0.1503021809582202, "grad_norm": 0.5878596706333011, "learning_rate": 4.720681265206813e-05, "loss": 0.6252, "step": 5148 }, { "epoch": 0.15033137718606757, "grad_norm": 0.5901093775515334, "learning_rate": 4.7205190592051906e-05, "loss": 0.6431, "step": 5149 }, { "epoch": 0.15036057341391493, "grad_norm": 0.6615288887331666, "learning_rate": 4.720356853203569e-05, "loss": 0.659, "step": 5150 }, { "epoch": 0.1503897696417623, "grad_norm": 0.5542582260338346, "learning_rate": 4.720194647201946e-05, "loss": 0.6155, "step": 5151 }, { "epoch": 0.15041896586960965, "grad_norm": 0.6550707545010821, "learning_rate": 4.7200324412003244e-05, "loss": 0.7112, "step": 5152 }, { "epoch": 0.150448162097457, "grad_norm": 0.6642655115056247, "learning_rate": 4.7198702351987026e-05, "loss": 0.7609, "step": 5153 }, { "epoch": 0.15047735832530437, "grad_norm": 0.6019393696149564, "learning_rate": 4.719708029197081e-05, "loss": 0.7052, "step": 5154 }, { "epoch": 0.15050655455315173, "grad_norm": 0.6317068352578817, "learning_rate": 4.719545823195459e-05, "loss": 0.7278, "step": 5155 }, { "epoch": 0.1505357507809991, "grad_norm": 0.5874306074026661, "learning_rate": 4.7193836171938365e-05, "loss": 0.6965, "step": 5156 }, { "epoch": 0.15056494700884646, "grad_norm": 0.5692433180449057, "learning_rate": 4.7192214111922146e-05, "loss": 0.6356, "step": 5157 }, { "epoch": 0.15059414323669382, "grad_norm": 0.635458273511324, "learning_rate": 4.719059205190592e-05, "loss": 0.6922, "step": 5158 }, { "epoch": 0.15062333946454118, "grad_norm": 0.6067011255014167, "learning_rate": 4.71889699918897e-05, "loss": 0.6725, "step": 5159 }, { "epoch": 0.15065253569238854, "grad_norm": 0.5801231299086357, "learning_rate": 4.7187347931873485e-05, "loss": 0.6478, "step": 5160 }, { "epoch": 0.1506817319202359, "grad_norm": 0.6204790353633446, "learning_rate": 4.718572587185726e-05, "loss": 0.7269, "step": 5161 }, { "epoch": 0.15071092814808326, "grad_norm": 0.5980545127345641, "learning_rate": 4.718410381184104e-05, "loss": 0.6752, "step": 5162 }, { "epoch": 0.15074012437593062, "grad_norm": 0.6129152727500135, "learning_rate": 4.718248175182482e-05, "loss": 0.679, "step": 5163 }, { "epoch": 0.15076932060377798, "grad_norm": 0.7536596071580702, "learning_rate": 4.71808596918086e-05, "loss": 0.7948, "step": 5164 }, { "epoch": 0.15079851683162535, "grad_norm": 0.615555968309488, "learning_rate": 4.717923763179238e-05, "loss": 0.7313, "step": 5165 }, { "epoch": 0.1508277130594727, "grad_norm": 0.5927893667883324, "learning_rate": 4.7177615571776155e-05, "loss": 0.6399, "step": 5166 }, { "epoch": 0.15085690928732007, "grad_norm": 0.719421923874871, "learning_rate": 4.717599351175994e-05, "loss": 0.6766, "step": 5167 }, { "epoch": 0.15088610551516743, "grad_norm": 0.548237251938272, "learning_rate": 4.717437145174371e-05, "loss": 0.5681, "step": 5168 }, { "epoch": 0.1509153017430148, "grad_norm": 0.5992205844485298, "learning_rate": 4.7172749391727494e-05, "loss": 0.65, "step": 5169 }, { "epoch": 0.15094449797086215, "grad_norm": 0.6695861993700541, "learning_rate": 4.7171127331711275e-05, "loss": 0.7065, "step": 5170 }, { "epoch": 0.15097369419870954, "grad_norm": 0.6616706221049846, "learning_rate": 4.716950527169505e-05, "loss": 0.6534, "step": 5171 }, { "epoch": 0.1510028904265569, "grad_norm": 0.6694629540024223, "learning_rate": 4.716788321167883e-05, "loss": 0.7698, "step": 5172 }, { "epoch": 0.15103208665440426, "grad_norm": 0.5287620970696191, "learning_rate": 4.7166261151662614e-05, "loss": 0.5399, "step": 5173 }, { "epoch": 0.15106128288225162, "grad_norm": 0.6990616449842231, "learning_rate": 4.7164639091646396e-05, "loss": 0.7672, "step": 5174 }, { "epoch": 0.15109047911009899, "grad_norm": 0.8001121790477665, "learning_rate": 4.716301703163018e-05, "loss": 0.7158, "step": 5175 }, { "epoch": 0.15111967533794635, "grad_norm": 0.6721595192315959, "learning_rate": 4.716139497161395e-05, "loss": 0.7341, "step": 5176 }, { "epoch": 0.1511488715657937, "grad_norm": 0.5374039790100114, "learning_rate": 4.7159772911597734e-05, "loss": 0.599, "step": 5177 }, { "epoch": 0.15117806779364107, "grad_norm": 0.6434166730325309, "learning_rate": 4.715815085158151e-05, "loss": 0.7137, "step": 5178 }, { "epoch": 0.15120726402148843, "grad_norm": 0.6518797290945108, "learning_rate": 4.715652879156529e-05, "loss": 0.7592, "step": 5179 }, { "epoch": 0.1512364602493358, "grad_norm": 0.5549162768320869, "learning_rate": 4.715490673154907e-05, "loss": 0.6111, "step": 5180 }, { "epoch": 0.15126565647718315, "grad_norm": 0.6264426320552698, "learning_rate": 4.715328467153285e-05, "loss": 0.6776, "step": 5181 }, { "epoch": 0.1512948527050305, "grad_norm": 0.6802370957380179, "learning_rate": 4.715166261151663e-05, "loss": 0.6708, "step": 5182 }, { "epoch": 0.15132404893287787, "grad_norm": 0.6585335750194453, "learning_rate": 4.7150040551500404e-05, "loss": 0.6669, "step": 5183 }, { "epoch": 0.15135324516072524, "grad_norm": 0.6408414752948227, "learning_rate": 4.7148418491484186e-05, "loss": 0.7309, "step": 5184 }, { "epoch": 0.1513824413885726, "grad_norm": 0.5473675044702843, "learning_rate": 4.714679643146797e-05, "loss": 0.5816, "step": 5185 }, { "epoch": 0.15141163761641996, "grad_norm": 0.63079110192004, "learning_rate": 4.714517437145174e-05, "loss": 0.7382, "step": 5186 }, { "epoch": 0.15144083384426732, "grad_norm": 0.6247445872490905, "learning_rate": 4.7143552311435525e-05, "loss": 0.6824, "step": 5187 }, { "epoch": 0.15147003007211468, "grad_norm": 0.5932171904581588, "learning_rate": 4.71419302514193e-05, "loss": 0.7222, "step": 5188 }, { "epoch": 0.15149922629996204, "grad_norm": 0.6744030257416083, "learning_rate": 4.714030819140308e-05, "loss": 0.7862, "step": 5189 }, { "epoch": 0.1515284225278094, "grad_norm": 0.6154539236273457, "learning_rate": 4.713868613138686e-05, "loss": 0.7166, "step": 5190 }, { "epoch": 0.15155761875565676, "grad_norm": 0.58770676677236, "learning_rate": 4.7137064071370645e-05, "loss": 0.6366, "step": 5191 }, { "epoch": 0.15158681498350413, "grad_norm": 1.284158859619402, "learning_rate": 4.713544201135443e-05, "loss": 0.6817, "step": 5192 }, { "epoch": 0.1516160112113515, "grad_norm": 0.6196961543074796, "learning_rate": 4.71338199513382e-05, "loss": 0.7337, "step": 5193 }, { "epoch": 0.15164520743919885, "grad_norm": 0.6261401768864612, "learning_rate": 4.7132197891321984e-05, "loss": 0.7001, "step": 5194 }, { "epoch": 0.1516744036670462, "grad_norm": 0.6135828672740978, "learning_rate": 4.7130575831305765e-05, "loss": 0.6851, "step": 5195 }, { "epoch": 0.15170359989489357, "grad_norm": 0.7148657013268439, "learning_rate": 4.712895377128954e-05, "loss": 0.7785, "step": 5196 }, { "epoch": 0.15173279612274093, "grad_norm": 0.6057992836967749, "learning_rate": 4.712733171127332e-05, "loss": 0.6658, "step": 5197 }, { "epoch": 0.1517619923505883, "grad_norm": 0.6177808661127917, "learning_rate": 4.71257096512571e-05, "loss": 0.7324, "step": 5198 }, { "epoch": 0.15179118857843565, "grad_norm": 0.5647564377814588, "learning_rate": 4.712408759124088e-05, "loss": 0.5901, "step": 5199 }, { "epoch": 0.15182038480628302, "grad_norm": 0.6242699786591148, "learning_rate": 4.712246553122466e-05, "loss": 0.7169, "step": 5200 }, { "epoch": 0.1518495810341304, "grad_norm": 0.6304319396071244, "learning_rate": 4.7120843471208436e-05, "loss": 0.6444, "step": 5201 }, { "epoch": 0.15187877726197777, "grad_norm": 0.6302488948441999, "learning_rate": 4.711922141119222e-05, "loss": 0.6933, "step": 5202 }, { "epoch": 0.15190797348982513, "grad_norm": 0.5464138111159285, "learning_rate": 4.711759935117599e-05, "loss": 0.6098, "step": 5203 }, { "epoch": 0.1519371697176725, "grad_norm": 0.6082479878540664, "learning_rate": 4.7115977291159774e-05, "loss": 0.6858, "step": 5204 }, { "epoch": 0.15196636594551985, "grad_norm": 0.6348494949419884, "learning_rate": 4.7114355231143556e-05, "loss": 0.731, "step": 5205 }, { "epoch": 0.1519955621733672, "grad_norm": 0.6007059777606657, "learning_rate": 4.711273317112733e-05, "loss": 0.6369, "step": 5206 }, { "epoch": 0.15202475840121457, "grad_norm": 0.5830728279227828, "learning_rate": 4.711111111111111e-05, "loss": 0.6305, "step": 5207 }, { "epoch": 0.15205395462906193, "grad_norm": 0.6503986598528051, "learning_rate": 4.710948905109489e-05, "loss": 0.7806, "step": 5208 }, { "epoch": 0.1520831508569093, "grad_norm": 0.5836574210827694, "learning_rate": 4.710786699107867e-05, "loss": 0.6546, "step": 5209 }, { "epoch": 0.15211234708475666, "grad_norm": 0.6003368125314236, "learning_rate": 4.710624493106245e-05, "loss": 0.7044, "step": 5210 }, { "epoch": 0.15214154331260402, "grad_norm": 0.6389347073571682, "learning_rate": 4.710462287104623e-05, "loss": 0.7563, "step": 5211 }, { "epoch": 0.15217073954045138, "grad_norm": 0.5981953207376111, "learning_rate": 4.7103000811030015e-05, "loss": 0.6987, "step": 5212 }, { "epoch": 0.15219993576829874, "grad_norm": 0.5828952445812406, "learning_rate": 4.710137875101379e-05, "loss": 0.6622, "step": 5213 }, { "epoch": 0.1522291319961461, "grad_norm": 0.6190699630610746, "learning_rate": 4.709975669099757e-05, "loss": 0.6807, "step": 5214 }, { "epoch": 0.15225832822399346, "grad_norm": 0.5557610256701022, "learning_rate": 4.709813463098135e-05, "loss": 0.5806, "step": 5215 }, { "epoch": 0.15228752445184082, "grad_norm": 0.5623025093419213, "learning_rate": 4.709651257096513e-05, "loss": 0.6278, "step": 5216 }, { "epoch": 0.15231672067968818, "grad_norm": 0.6164232887307517, "learning_rate": 4.709489051094891e-05, "loss": 0.6751, "step": 5217 }, { "epoch": 0.15234591690753554, "grad_norm": 0.6028345115281964, "learning_rate": 4.7093268450932685e-05, "loss": 0.6968, "step": 5218 }, { "epoch": 0.1523751131353829, "grad_norm": 0.5656183837246419, "learning_rate": 4.7091646390916467e-05, "loss": 0.6366, "step": 5219 }, { "epoch": 0.15240430936323027, "grad_norm": 0.6477396168237564, "learning_rate": 4.709002433090025e-05, "loss": 0.698, "step": 5220 }, { "epoch": 0.15243350559107763, "grad_norm": 0.56876802467744, "learning_rate": 4.708840227088402e-05, "loss": 0.641, "step": 5221 }, { "epoch": 0.152462701818925, "grad_norm": 0.5817243586133996, "learning_rate": 4.7086780210867805e-05, "loss": 0.666, "step": 5222 }, { "epoch": 0.15249189804677235, "grad_norm": 0.642590686169234, "learning_rate": 4.708515815085158e-05, "loss": 0.7582, "step": 5223 }, { "epoch": 0.1525210942746197, "grad_norm": 0.6519495413355898, "learning_rate": 4.708353609083536e-05, "loss": 0.6992, "step": 5224 }, { "epoch": 0.15255029050246707, "grad_norm": 0.5641163309790352, "learning_rate": 4.7081914030819144e-05, "loss": 0.5933, "step": 5225 }, { "epoch": 0.15257948673031443, "grad_norm": 0.5970608235495098, "learning_rate": 4.708029197080292e-05, "loss": 0.6309, "step": 5226 }, { "epoch": 0.1526086829581618, "grad_norm": 0.6001571459318181, "learning_rate": 4.70786699107867e-05, "loss": 0.6416, "step": 5227 }, { "epoch": 0.15263787918600916, "grad_norm": 0.6186637202210848, "learning_rate": 4.7077047850770475e-05, "loss": 0.7097, "step": 5228 }, { "epoch": 0.15266707541385652, "grad_norm": 0.6128526413044237, "learning_rate": 4.7075425790754264e-05, "loss": 0.6639, "step": 5229 }, { "epoch": 0.15269627164170388, "grad_norm": 0.5933829093632593, "learning_rate": 4.707380373073804e-05, "loss": 0.6686, "step": 5230 }, { "epoch": 0.15272546786955124, "grad_norm": 0.5740193163110273, "learning_rate": 4.707218167072182e-05, "loss": 0.6278, "step": 5231 }, { "epoch": 0.15275466409739863, "grad_norm": 0.6090332296524537, "learning_rate": 4.70705596107056e-05, "loss": 0.6826, "step": 5232 }, { "epoch": 0.152783860325246, "grad_norm": 0.566515427872497, "learning_rate": 4.706893755068938e-05, "loss": 0.6112, "step": 5233 }, { "epoch": 0.15281305655309335, "grad_norm": 0.5920606779454158, "learning_rate": 4.706731549067316e-05, "loss": 0.6692, "step": 5234 }, { "epoch": 0.1528422527809407, "grad_norm": 0.5833130939105741, "learning_rate": 4.7065693430656934e-05, "loss": 0.5896, "step": 5235 }, { "epoch": 0.15287144900878807, "grad_norm": 0.6209948503950468, "learning_rate": 4.7064071370640716e-05, "loss": 0.7113, "step": 5236 }, { "epoch": 0.15290064523663544, "grad_norm": 0.7263054875883889, "learning_rate": 4.70624493106245e-05, "loss": 0.7593, "step": 5237 }, { "epoch": 0.1529298414644828, "grad_norm": 0.6174043864352657, "learning_rate": 4.706082725060827e-05, "loss": 0.6432, "step": 5238 }, { "epoch": 0.15295903769233016, "grad_norm": 0.6467053453028544, "learning_rate": 4.7059205190592054e-05, "loss": 0.7114, "step": 5239 }, { "epoch": 0.15298823392017752, "grad_norm": 0.5592155572714856, "learning_rate": 4.7057583130575836e-05, "loss": 0.5973, "step": 5240 }, { "epoch": 0.15301743014802488, "grad_norm": 0.6434638925587142, "learning_rate": 4.705596107055961e-05, "loss": 0.7737, "step": 5241 }, { "epoch": 0.15304662637587224, "grad_norm": 0.5814065838990105, "learning_rate": 4.705433901054339e-05, "loss": 0.6356, "step": 5242 }, { "epoch": 0.1530758226037196, "grad_norm": 0.599417597094588, "learning_rate": 4.705271695052717e-05, "loss": 0.6424, "step": 5243 }, { "epoch": 0.15310501883156696, "grad_norm": 0.5430481986873432, "learning_rate": 4.705109489051095e-05, "loss": 0.5928, "step": 5244 }, { "epoch": 0.15313421505941432, "grad_norm": 0.7174878871323006, "learning_rate": 4.704947283049473e-05, "loss": 0.8205, "step": 5245 }, { "epoch": 0.15316341128726169, "grad_norm": 0.5893055653521247, "learning_rate": 4.7047850770478506e-05, "loss": 0.7014, "step": 5246 }, { "epoch": 0.15319260751510905, "grad_norm": 0.6001670021387732, "learning_rate": 4.704622871046229e-05, "loss": 0.6621, "step": 5247 }, { "epoch": 0.1532218037429564, "grad_norm": 0.6885991074296304, "learning_rate": 4.704460665044607e-05, "loss": 0.7795, "step": 5248 }, { "epoch": 0.15325099997080377, "grad_norm": 0.6269156144882906, "learning_rate": 4.704298459042985e-05, "loss": 0.6523, "step": 5249 }, { "epoch": 0.15328019619865113, "grad_norm": 0.5570818370186642, "learning_rate": 4.704136253041363e-05, "loss": 0.6571, "step": 5250 }, { "epoch": 0.1533093924264985, "grad_norm": 0.5433919125024076, "learning_rate": 4.703974047039741e-05, "loss": 0.5771, "step": 5251 }, { "epoch": 0.15333858865434585, "grad_norm": 0.6989518243117376, "learning_rate": 4.703811841038119e-05, "loss": 0.8705, "step": 5252 }, { "epoch": 0.15336778488219321, "grad_norm": 0.6298147855564632, "learning_rate": 4.7036496350364965e-05, "loss": 0.6855, "step": 5253 }, { "epoch": 0.15339698111004058, "grad_norm": 0.6230484000636757, "learning_rate": 4.703487429034875e-05, "loss": 0.7273, "step": 5254 }, { "epoch": 0.15342617733788794, "grad_norm": 0.6576401350767809, "learning_rate": 4.703325223033252e-05, "loss": 0.7262, "step": 5255 }, { "epoch": 0.1534553735657353, "grad_norm": 0.5934072839080545, "learning_rate": 4.7031630170316304e-05, "loss": 0.6454, "step": 5256 }, { "epoch": 0.15348456979358266, "grad_norm": 0.5366379067778101, "learning_rate": 4.7030008110300085e-05, "loss": 0.576, "step": 5257 }, { "epoch": 0.15351376602143002, "grad_norm": 0.5986603603643403, "learning_rate": 4.702838605028386e-05, "loss": 0.5936, "step": 5258 }, { "epoch": 0.15354296224927738, "grad_norm": 0.5853220817265269, "learning_rate": 4.702676399026764e-05, "loss": 0.6677, "step": 5259 }, { "epoch": 0.15357215847712474, "grad_norm": 0.6843085443545582, "learning_rate": 4.7025141930251424e-05, "loss": 0.7063, "step": 5260 }, { "epoch": 0.1536013547049721, "grad_norm": 0.6552473066981692, "learning_rate": 4.70235198702352e-05, "loss": 0.7828, "step": 5261 }, { "epoch": 0.1536305509328195, "grad_norm": 0.6963673072522711, "learning_rate": 4.702189781021898e-05, "loss": 0.7253, "step": 5262 }, { "epoch": 0.15365974716066685, "grad_norm": 0.652727683228244, "learning_rate": 4.7020275750202756e-05, "loss": 0.7554, "step": 5263 }, { "epoch": 0.15368894338851422, "grad_norm": 0.6688384249012456, "learning_rate": 4.701865369018654e-05, "loss": 0.7564, "step": 5264 }, { "epoch": 0.15371813961636158, "grad_norm": 0.6835797753511716, "learning_rate": 4.701703163017032e-05, "loss": 0.8275, "step": 5265 }, { "epoch": 0.15374733584420894, "grad_norm": 0.6265704549644662, "learning_rate": 4.7015409570154094e-05, "loss": 0.6876, "step": 5266 }, { "epoch": 0.1537765320720563, "grad_norm": 0.5541285785907963, "learning_rate": 4.701378751013788e-05, "loss": 0.5863, "step": 5267 }, { "epoch": 0.15380572829990366, "grad_norm": 0.7372186241768647, "learning_rate": 4.701216545012166e-05, "loss": 0.8102, "step": 5268 }, { "epoch": 0.15383492452775102, "grad_norm": 0.6106296344317651, "learning_rate": 4.701054339010544e-05, "loss": 0.6698, "step": 5269 }, { "epoch": 0.15386412075559838, "grad_norm": 0.5631034328927126, "learning_rate": 4.7008921330089214e-05, "loss": 0.5751, "step": 5270 }, { "epoch": 0.15389331698344574, "grad_norm": 0.5895873543426899, "learning_rate": 4.7007299270072996e-05, "loss": 0.657, "step": 5271 }, { "epoch": 0.1539225132112931, "grad_norm": 1.029044867765411, "learning_rate": 4.700567721005678e-05, "loss": 0.6799, "step": 5272 }, { "epoch": 0.15395170943914047, "grad_norm": 0.5702137176087824, "learning_rate": 4.700405515004055e-05, "loss": 0.6465, "step": 5273 }, { "epoch": 0.15398090566698783, "grad_norm": 0.5387691523705642, "learning_rate": 4.7002433090024335e-05, "loss": 0.5896, "step": 5274 }, { "epoch": 0.1540101018948352, "grad_norm": 0.549873143800792, "learning_rate": 4.700081103000811e-05, "loss": 0.6076, "step": 5275 }, { "epoch": 0.15403929812268255, "grad_norm": 0.6071608072654242, "learning_rate": 4.699918896999189e-05, "loss": 0.741, "step": 5276 }, { "epoch": 0.1540684943505299, "grad_norm": 0.5890643354181679, "learning_rate": 4.699756690997567e-05, "loss": 0.6024, "step": 5277 }, { "epoch": 0.15409769057837727, "grad_norm": 0.5986735727721503, "learning_rate": 4.699594484995945e-05, "loss": 0.6112, "step": 5278 }, { "epoch": 0.15412688680622463, "grad_norm": 0.6709411474158679, "learning_rate": 4.699432278994323e-05, "loss": 0.7032, "step": 5279 }, { "epoch": 0.154156083034072, "grad_norm": 0.6773046155681497, "learning_rate": 4.6992700729927005e-05, "loss": 0.7129, "step": 5280 }, { "epoch": 0.15418527926191936, "grad_norm": 0.7090576275135836, "learning_rate": 4.699107866991079e-05, "loss": 0.7741, "step": 5281 }, { "epoch": 0.15421447548976672, "grad_norm": 0.6221942798968185, "learning_rate": 4.698945660989457e-05, "loss": 0.7225, "step": 5282 }, { "epoch": 0.15424367171761408, "grad_norm": 0.6543867297661384, "learning_rate": 4.6987834549878344e-05, "loss": 0.6634, "step": 5283 }, { "epoch": 0.15427286794546144, "grad_norm": 0.5791886027042397, "learning_rate": 4.6986212489862125e-05, "loss": 0.6419, "step": 5284 }, { "epoch": 0.1543020641733088, "grad_norm": 0.648809926582916, "learning_rate": 4.698459042984591e-05, "loss": 0.7113, "step": 5285 }, { "epoch": 0.15433126040115616, "grad_norm": 0.6436186553169123, "learning_rate": 4.698296836982969e-05, "loss": 0.7123, "step": 5286 }, { "epoch": 0.15436045662900352, "grad_norm": 0.629517566551997, "learning_rate": 4.698134630981347e-05, "loss": 0.7081, "step": 5287 }, { "epoch": 0.15438965285685088, "grad_norm": 0.6012749962186011, "learning_rate": 4.6979724249797246e-05, "loss": 0.6627, "step": 5288 }, { "epoch": 0.15441884908469825, "grad_norm": 0.6164601271790633, "learning_rate": 4.697810218978103e-05, "loss": 0.6766, "step": 5289 }, { "epoch": 0.1544480453125456, "grad_norm": 0.5741683591342234, "learning_rate": 4.69764801297648e-05, "loss": 0.6313, "step": 5290 }, { "epoch": 0.15447724154039297, "grad_norm": 0.5691190671039741, "learning_rate": 4.6974858069748584e-05, "loss": 0.6419, "step": 5291 }, { "epoch": 0.15450643776824036, "grad_norm": 0.6413835292617137, "learning_rate": 4.6973236009732366e-05, "loss": 0.7424, "step": 5292 }, { "epoch": 0.15453563399608772, "grad_norm": 0.5538979068727671, "learning_rate": 4.697161394971614e-05, "loss": 0.6208, "step": 5293 }, { "epoch": 0.15456483022393508, "grad_norm": 0.6353851029849492, "learning_rate": 4.696999188969992e-05, "loss": 0.7398, "step": 5294 }, { "epoch": 0.15459402645178244, "grad_norm": 0.6001051087623525, "learning_rate": 4.69683698296837e-05, "loss": 0.6202, "step": 5295 }, { "epoch": 0.1546232226796298, "grad_norm": 0.5679432000983533, "learning_rate": 4.696674776966748e-05, "loss": 0.628, "step": 5296 }, { "epoch": 0.15465241890747716, "grad_norm": 0.5876473028659586, "learning_rate": 4.696512570965126e-05, "loss": 0.6064, "step": 5297 }, { "epoch": 0.15468161513532452, "grad_norm": 0.5299192579714852, "learning_rate": 4.6963503649635036e-05, "loss": 0.5856, "step": 5298 }, { "epoch": 0.15471081136317189, "grad_norm": 0.5823490573996962, "learning_rate": 4.696188158961882e-05, "loss": 0.7025, "step": 5299 }, { "epoch": 0.15474000759101925, "grad_norm": 0.6040311050099959, "learning_rate": 4.696025952960259e-05, "loss": 0.7204, "step": 5300 }, { "epoch": 0.1547692038188666, "grad_norm": 0.6372260089869481, "learning_rate": 4.6958637469586375e-05, "loss": 0.7567, "step": 5301 }, { "epoch": 0.15479840004671397, "grad_norm": 0.5341899233205996, "learning_rate": 4.6957015409570156e-05, "loss": 0.5897, "step": 5302 }, { "epoch": 0.15482759627456133, "grad_norm": 0.591067839385858, "learning_rate": 4.695539334955393e-05, "loss": 0.7525, "step": 5303 }, { "epoch": 0.1548567925024087, "grad_norm": 0.8444454410193654, "learning_rate": 4.695377128953771e-05, "loss": 0.6783, "step": 5304 }, { "epoch": 0.15488598873025605, "grad_norm": 0.5718238835378155, "learning_rate": 4.6952149229521495e-05, "loss": 0.6665, "step": 5305 }, { "epoch": 0.1549151849581034, "grad_norm": 0.5334668050942776, "learning_rate": 4.6950527169505277e-05, "loss": 0.5496, "step": 5306 }, { "epoch": 0.15494438118595077, "grad_norm": 0.6293774072820136, "learning_rate": 4.694890510948906e-05, "loss": 0.7048, "step": 5307 }, { "epoch": 0.15497357741379814, "grad_norm": 0.6195837060461925, "learning_rate": 4.694728304947283e-05, "loss": 0.6811, "step": 5308 }, { "epoch": 0.1550027736416455, "grad_norm": 0.6114834984100382, "learning_rate": 4.6945660989456615e-05, "loss": 0.7257, "step": 5309 }, { "epoch": 0.15503196986949286, "grad_norm": 0.5967834548078021, "learning_rate": 4.694403892944039e-05, "loss": 0.6636, "step": 5310 }, { "epoch": 0.15506116609734022, "grad_norm": 0.7207240029916954, "learning_rate": 4.694241686942417e-05, "loss": 0.785, "step": 5311 }, { "epoch": 0.15509036232518758, "grad_norm": 0.6655003228098428, "learning_rate": 4.6940794809407954e-05, "loss": 0.6848, "step": 5312 }, { "epoch": 0.15511955855303494, "grad_norm": 0.5784053351367019, "learning_rate": 4.693917274939173e-05, "loss": 0.6389, "step": 5313 }, { "epoch": 0.1551487547808823, "grad_norm": 0.5925742678143691, "learning_rate": 4.693755068937551e-05, "loss": 0.6031, "step": 5314 }, { "epoch": 0.15517795100872966, "grad_norm": 0.5724118384107179, "learning_rate": 4.6935928629359285e-05, "loss": 0.6148, "step": 5315 }, { "epoch": 0.15520714723657703, "grad_norm": 0.5731887818923672, "learning_rate": 4.693430656934307e-05, "loss": 0.6578, "step": 5316 }, { "epoch": 0.1552363434644244, "grad_norm": 0.6627159884211123, "learning_rate": 4.693268450932685e-05, "loss": 0.7628, "step": 5317 }, { "epoch": 0.15526553969227175, "grad_norm": 0.5865841578510183, "learning_rate": 4.6931062449310624e-05, "loss": 0.6937, "step": 5318 }, { "epoch": 0.1552947359201191, "grad_norm": 0.6408502835311334, "learning_rate": 4.6929440389294406e-05, "loss": 0.6418, "step": 5319 }, { "epoch": 0.15532393214796647, "grad_norm": 0.5525046782463279, "learning_rate": 4.692781832927818e-05, "loss": 0.5788, "step": 5320 }, { "epoch": 0.15535312837581383, "grad_norm": 0.5616074479996235, "learning_rate": 4.692619626926196e-05, "loss": 0.6086, "step": 5321 }, { "epoch": 0.15538232460366122, "grad_norm": 0.5908574276071487, "learning_rate": 4.6924574209245744e-05, "loss": 0.6667, "step": 5322 }, { "epoch": 0.15541152083150858, "grad_norm": 0.6034660945192097, "learning_rate": 4.692295214922952e-05, "loss": 0.6587, "step": 5323 }, { "epoch": 0.15544071705935594, "grad_norm": 0.5974222024976575, "learning_rate": 4.692133008921331e-05, "loss": 0.6618, "step": 5324 }, { "epoch": 0.1554699132872033, "grad_norm": 0.5829813914200288, "learning_rate": 4.691970802919708e-05, "loss": 0.6708, "step": 5325 }, { "epoch": 0.15549910951505067, "grad_norm": 0.806439720979547, "learning_rate": 4.6918085969180864e-05, "loss": 0.771, "step": 5326 }, { "epoch": 0.15552830574289803, "grad_norm": 0.6807447160158421, "learning_rate": 4.6916463909164646e-05, "loss": 0.8451, "step": 5327 }, { "epoch": 0.1555575019707454, "grad_norm": 0.6166018357776141, "learning_rate": 4.691484184914842e-05, "loss": 0.6972, "step": 5328 }, { "epoch": 0.15558669819859275, "grad_norm": 0.6222304457432499, "learning_rate": 4.69132197891322e-05, "loss": 0.7003, "step": 5329 }, { "epoch": 0.1556158944264401, "grad_norm": 0.5988907647328858, "learning_rate": 4.691159772911598e-05, "loss": 0.6819, "step": 5330 }, { "epoch": 0.15564509065428747, "grad_norm": 0.6586187583640755, "learning_rate": 4.690997566909976e-05, "loss": 0.7824, "step": 5331 }, { "epoch": 0.15567428688213483, "grad_norm": 0.6027466997955518, "learning_rate": 4.690835360908354e-05, "loss": 0.7347, "step": 5332 }, { "epoch": 0.1557034831099822, "grad_norm": 0.6328237485823849, "learning_rate": 4.6906731549067316e-05, "loss": 0.7092, "step": 5333 }, { "epoch": 0.15573267933782955, "grad_norm": 0.6486110824035733, "learning_rate": 4.69051094890511e-05, "loss": 0.7012, "step": 5334 }, { "epoch": 0.15576187556567692, "grad_norm": 0.6904043671693352, "learning_rate": 4.690348742903487e-05, "loss": 0.7205, "step": 5335 }, { "epoch": 0.15579107179352428, "grad_norm": 0.6404475729576211, "learning_rate": 4.6901865369018655e-05, "loss": 0.7183, "step": 5336 }, { "epoch": 0.15582026802137164, "grad_norm": 0.5509963857799496, "learning_rate": 4.690024330900244e-05, "loss": 0.6403, "step": 5337 }, { "epoch": 0.155849464249219, "grad_norm": 0.605102954649756, "learning_rate": 4.689862124898621e-05, "loss": 0.7173, "step": 5338 }, { "epoch": 0.15587866047706636, "grad_norm": 0.5347423316007847, "learning_rate": 4.6896999188969993e-05, "loss": 0.5917, "step": 5339 }, { "epoch": 0.15590785670491372, "grad_norm": 0.614486645223078, "learning_rate": 4.689537712895377e-05, "loss": 0.5846, "step": 5340 }, { "epoch": 0.15593705293276108, "grad_norm": 0.6197494638812119, "learning_rate": 4.689375506893755e-05, "loss": 0.6694, "step": 5341 }, { "epoch": 0.15596624916060844, "grad_norm": 0.6168342228148422, "learning_rate": 4.689213300892133e-05, "loss": 0.7116, "step": 5342 }, { "epoch": 0.1559954453884558, "grad_norm": 0.6875075265667877, "learning_rate": 4.6890510948905114e-05, "loss": 0.7522, "step": 5343 }, { "epoch": 0.15602464161630317, "grad_norm": 0.6309093337927582, "learning_rate": 4.6888888888888895e-05, "loss": 0.7251, "step": 5344 }, { "epoch": 0.15605383784415053, "grad_norm": 0.6065745668502649, "learning_rate": 4.688726682887267e-05, "loss": 0.6572, "step": 5345 }, { "epoch": 0.1560830340719979, "grad_norm": 0.6347733258940725, "learning_rate": 4.688564476885645e-05, "loss": 0.6968, "step": 5346 }, { "epoch": 0.15611223029984525, "grad_norm": 0.6621903421423966, "learning_rate": 4.6884022708840234e-05, "loss": 0.7448, "step": 5347 }, { "epoch": 0.1561414265276926, "grad_norm": 0.5644862258791687, "learning_rate": 4.688240064882401e-05, "loss": 0.5836, "step": 5348 }, { "epoch": 0.15617062275553997, "grad_norm": 0.6468536678515686, "learning_rate": 4.688077858880779e-05, "loss": 0.5912, "step": 5349 }, { "epoch": 0.15619981898338733, "grad_norm": 0.5867481619410567, "learning_rate": 4.6879156528791566e-05, "loss": 0.691, "step": 5350 }, { "epoch": 0.1562290152112347, "grad_norm": 0.619625270910638, "learning_rate": 4.687753446877535e-05, "loss": 0.6777, "step": 5351 }, { "epoch": 0.15625821143908208, "grad_norm": 0.6903317954269916, "learning_rate": 4.687591240875913e-05, "loss": 0.7257, "step": 5352 }, { "epoch": 0.15628740766692945, "grad_norm": 0.5808690179883579, "learning_rate": 4.6874290348742904e-05, "loss": 0.657, "step": 5353 }, { "epoch": 0.1563166038947768, "grad_norm": 0.5486443265825992, "learning_rate": 4.6872668288726686e-05, "loss": 0.5903, "step": 5354 }, { "epoch": 0.15634580012262417, "grad_norm": 0.6065826389584926, "learning_rate": 4.687104622871046e-05, "loss": 0.6239, "step": 5355 }, { "epoch": 0.15637499635047153, "grad_norm": 0.6094875261505547, "learning_rate": 4.686942416869424e-05, "loss": 0.6742, "step": 5356 }, { "epoch": 0.1564041925783189, "grad_norm": 0.5972517830942101, "learning_rate": 4.6867802108678024e-05, "loss": 0.6281, "step": 5357 }, { "epoch": 0.15643338880616625, "grad_norm": 0.6788261251128078, "learning_rate": 4.68661800486618e-05, "loss": 0.7593, "step": 5358 }, { "epoch": 0.1564625850340136, "grad_norm": 0.6042884201341584, "learning_rate": 4.686455798864558e-05, "loss": 0.658, "step": 5359 }, { "epoch": 0.15649178126186097, "grad_norm": 0.609482863758786, "learning_rate": 4.6862935928629356e-05, "loss": 0.6789, "step": 5360 }, { "epoch": 0.15652097748970834, "grad_norm": 0.6478574312342898, "learning_rate": 4.6861313868613145e-05, "loss": 0.6981, "step": 5361 }, { "epoch": 0.1565501737175557, "grad_norm": 0.6135041078437105, "learning_rate": 4.685969180859692e-05, "loss": 0.6553, "step": 5362 }, { "epoch": 0.15657936994540306, "grad_norm": 0.6331560998956925, "learning_rate": 4.68580697485807e-05, "loss": 0.6922, "step": 5363 }, { "epoch": 0.15660856617325042, "grad_norm": 0.6038773781465593, "learning_rate": 4.685644768856448e-05, "loss": 0.6858, "step": 5364 }, { "epoch": 0.15663776240109778, "grad_norm": 0.5551359793623178, "learning_rate": 4.685482562854826e-05, "loss": 0.5836, "step": 5365 }, { "epoch": 0.15666695862894514, "grad_norm": 0.5802521448441862, "learning_rate": 4.685320356853204e-05, "loss": 0.5971, "step": 5366 }, { "epoch": 0.1566961548567925, "grad_norm": 0.6071295645893362, "learning_rate": 4.6851581508515815e-05, "loss": 0.7248, "step": 5367 }, { "epoch": 0.15672535108463986, "grad_norm": 0.5264825683707223, "learning_rate": 4.68499594484996e-05, "loss": 0.5537, "step": 5368 }, { "epoch": 0.15675454731248722, "grad_norm": 0.6646522926618329, "learning_rate": 4.684833738848338e-05, "loss": 0.8349, "step": 5369 }, { "epoch": 0.15678374354033459, "grad_norm": 0.6433162187105861, "learning_rate": 4.6846715328467154e-05, "loss": 0.6867, "step": 5370 }, { "epoch": 0.15681293976818195, "grad_norm": 0.597426375403461, "learning_rate": 4.6845093268450935e-05, "loss": 0.6423, "step": 5371 }, { "epoch": 0.1568421359960293, "grad_norm": 0.5310392875106819, "learning_rate": 4.684347120843472e-05, "loss": 0.5432, "step": 5372 }, { "epoch": 0.15687133222387667, "grad_norm": 0.592089766581167, "learning_rate": 4.684184914841849e-05, "loss": 0.697, "step": 5373 }, { "epoch": 0.15690052845172403, "grad_norm": 0.5794403300869723, "learning_rate": 4.6840227088402274e-05, "loss": 0.6426, "step": 5374 }, { "epoch": 0.1569297246795714, "grad_norm": 0.6542704869900536, "learning_rate": 4.683860502838605e-05, "loss": 0.7443, "step": 5375 }, { "epoch": 0.15695892090741875, "grad_norm": 0.5735626726849259, "learning_rate": 4.683698296836983e-05, "loss": 0.5755, "step": 5376 }, { "epoch": 0.15698811713526611, "grad_norm": 0.5894915479931118, "learning_rate": 4.683536090835361e-05, "loss": 0.6744, "step": 5377 }, { "epoch": 0.15701731336311348, "grad_norm": 0.581741876818192, "learning_rate": 4.683373884833739e-05, "loss": 0.6574, "step": 5378 }, { "epoch": 0.15704650959096084, "grad_norm": 0.5853062419477991, "learning_rate": 4.683211678832117e-05, "loss": 0.6471, "step": 5379 }, { "epoch": 0.1570757058188082, "grad_norm": 0.6577639723187716, "learning_rate": 4.683049472830495e-05, "loss": 0.7257, "step": 5380 }, { "epoch": 0.15710490204665556, "grad_norm": 0.5925714673020039, "learning_rate": 4.682887266828873e-05, "loss": 0.6405, "step": 5381 }, { "epoch": 0.15713409827450295, "grad_norm": 0.6275551774693642, "learning_rate": 4.682725060827251e-05, "loss": 0.689, "step": 5382 }, { "epoch": 0.1571632945023503, "grad_norm": 0.665726444854428, "learning_rate": 4.682562854825629e-05, "loss": 0.7088, "step": 5383 }, { "epoch": 0.15719249073019767, "grad_norm": 0.6360729277826241, "learning_rate": 4.682400648824007e-05, "loss": 0.6548, "step": 5384 }, { "epoch": 0.15722168695804503, "grad_norm": 0.5583488710114056, "learning_rate": 4.6822384428223846e-05, "loss": 0.598, "step": 5385 }, { "epoch": 0.1572508831858924, "grad_norm": 0.6199228383983177, "learning_rate": 4.682076236820763e-05, "loss": 0.6743, "step": 5386 }, { "epoch": 0.15728007941373975, "grad_norm": 0.5949115213481385, "learning_rate": 4.68191403081914e-05, "loss": 0.6656, "step": 5387 }, { "epoch": 0.15730927564158712, "grad_norm": 0.7447214303616185, "learning_rate": 4.6817518248175185e-05, "loss": 0.692, "step": 5388 }, { "epoch": 0.15733847186943448, "grad_norm": 0.6226537572396788, "learning_rate": 4.6815896188158966e-05, "loss": 0.6887, "step": 5389 }, { "epoch": 0.15736766809728184, "grad_norm": 0.5887226068534723, "learning_rate": 4.681427412814274e-05, "loss": 0.6756, "step": 5390 }, { "epoch": 0.1573968643251292, "grad_norm": 0.5790313219455899, "learning_rate": 4.681265206812652e-05, "loss": 0.6342, "step": 5391 }, { "epoch": 0.15742606055297656, "grad_norm": 0.610710289544534, "learning_rate": 4.6811030008110305e-05, "loss": 0.6589, "step": 5392 }, { "epoch": 0.15745525678082392, "grad_norm": 0.6895838209995474, "learning_rate": 4.680940794809408e-05, "loss": 0.7041, "step": 5393 }, { "epoch": 0.15748445300867128, "grad_norm": 0.6231462943465584, "learning_rate": 4.680778588807786e-05, "loss": 0.7148, "step": 5394 }, { "epoch": 0.15751364923651864, "grad_norm": 0.5578163946818742, "learning_rate": 4.6806163828061637e-05, "loss": 0.5382, "step": 5395 }, { "epoch": 0.157542845464366, "grad_norm": 0.6641261965709058, "learning_rate": 4.680454176804542e-05, "loss": 0.7073, "step": 5396 }, { "epoch": 0.15757204169221337, "grad_norm": 0.5918445049222109, "learning_rate": 4.68029197080292e-05, "loss": 0.687, "step": 5397 }, { "epoch": 0.15760123792006073, "grad_norm": 0.5783658073478201, "learning_rate": 4.6801297648012975e-05, "loss": 0.6502, "step": 5398 }, { "epoch": 0.1576304341479081, "grad_norm": 0.6215684088468347, "learning_rate": 4.6799675587996764e-05, "loss": 0.7261, "step": 5399 }, { "epoch": 0.15765963037575545, "grad_norm": 0.594583157952789, "learning_rate": 4.679805352798054e-05, "loss": 0.6807, "step": 5400 }, { "epoch": 0.1576888266036028, "grad_norm": 0.6496495382927344, "learning_rate": 4.679643146796432e-05, "loss": 0.7941, "step": 5401 }, { "epoch": 0.15771802283145017, "grad_norm": 0.5713658785523622, "learning_rate": 4.6794809407948095e-05, "loss": 0.6397, "step": 5402 }, { "epoch": 0.15774721905929753, "grad_norm": 0.5818047220757004, "learning_rate": 4.679318734793188e-05, "loss": 0.6343, "step": 5403 }, { "epoch": 0.1577764152871449, "grad_norm": 0.6023174014764003, "learning_rate": 4.679156528791566e-05, "loss": 0.5991, "step": 5404 }, { "epoch": 0.15780561151499226, "grad_norm": 0.5748063594385023, "learning_rate": 4.6789943227899434e-05, "loss": 0.6378, "step": 5405 }, { "epoch": 0.15783480774283962, "grad_norm": 0.6425282419336151, "learning_rate": 4.6788321167883216e-05, "loss": 0.7592, "step": 5406 }, { "epoch": 0.15786400397068698, "grad_norm": 0.7544071951655482, "learning_rate": 4.678669910786699e-05, "loss": 0.7597, "step": 5407 }, { "epoch": 0.15789320019853434, "grad_norm": 0.6211515830482428, "learning_rate": 4.678507704785077e-05, "loss": 0.7029, "step": 5408 }, { "epoch": 0.1579223964263817, "grad_norm": 0.6935265845235958, "learning_rate": 4.6783454987834554e-05, "loss": 0.7552, "step": 5409 }, { "epoch": 0.15795159265422906, "grad_norm": 0.5796413272062985, "learning_rate": 4.678183292781833e-05, "loss": 0.5435, "step": 5410 }, { "epoch": 0.15798078888207642, "grad_norm": 0.5822063531282191, "learning_rate": 4.678021086780211e-05, "loss": 0.6639, "step": 5411 }, { "epoch": 0.15800998510992378, "grad_norm": 0.5815144134505501, "learning_rate": 4.6778588807785886e-05, "loss": 0.6545, "step": 5412 }, { "epoch": 0.15803918133777117, "grad_norm": 0.6454064574584908, "learning_rate": 4.677696674776967e-05, "loss": 0.6965, "step": 5413 }, { "epoch": 0.15806837756561853, "grad_norm": 0.5935730674871028, "learning_rate": 4.677534468775345e-05, "loss": 0.6221, "step": 5414 }, { "epoch": 0.1580975737934659, "grad_norm": 0.6173517761442181, "learning_rate": 4.6773722627737224e-05, "loss": 0.709, "step": 5415 }, { "epoch": 0.15812677002131326, "grad_norm": 0.6078918483369918, "learning_rate": 4.6772100567721006e-05, "loss": 0.6865, "step": 5416 }, { "epoch": 0.15815596624916062, "grad_norm": 0.5998498507305909, "learning_rate": 4.677047850770479e-05, "loss": 0.6384, "step": 5417 }, { "epoch": 0.15818516247700798, "grad_norm": 0.5700853936600714, "learning_rate": 4.676885644768857e-05, "loss": 0.6589, "step": 5418 }, { "epoch": 0.15821435870485534, "grad_norm": 0.648864211105732, "learning_rate": 4.676723438767235e-05, "loss": 0.7386, "step": 5419 }, { "epoch": 0.1582435549327027, "grad_norm": 0.5938803356630895, "learning_rate": 4.6765612327656126e-05, "loss": 0.6611, "step": 5420 }, { "epoch": 0.15827275116055006, "grad_norm": 0.6177236902924167, "learning_rate": 4.676399026763991e-05, "loss": 0.6751, "step": 5421 }, { "epoch": 0.15830194738839742, "grad_norm": 0.6073503769293439, "learning_rate": 4.676236820762368e-05, "loss": 0.7314, "step": 5422 }, { "epoch": 0.15833114361624478, "grad_norm": 0.6111904541898763, "learning_rate": 4.6760746147607465e-05, "loss": 0.7566, "step": 5423 }, { "epoch": 0.15836033984409215, "grad_norm": 0.6675201518607625, "learning_rate": 4.675912408759125e-05, "loss": 0.742, "step": 5424 }, { "epoch": 0.1583895360719395, "grad_norm": 0.5755345728427156, "learning_rate": 4.675750202757502e-05, "loss": 0.6688, "step": 5425 }, { "epoch": 0.15841873229978687, "grad_norm": 0.5780434229017726, "learning_rate": 4.6755879967558803e-05, "loss": 0.6499, "step": 5426 }, { "epoch": 0.15844792852763423, "grad_norm": 0.5806499260864765, "learning_rate": 4.675425790754258e-05, "loss": 0.682, "step": 5427 }, { "epoch": 0.1584771247554816, "grad_norm": 0.5710551702093438, "learning_rate": 4.675263584752636e-05, "loss": 0.6068, "step": 5428 }, { "epoch": 0.15850632098332895, "grad_norm": 0.625707188167231, "learning_rate": 4.675101378751014e-05, "loss": 0.6374, "step": 5429 }, { "epoch": 0.1585355172111763, "grad_norm": 0.5583887156334251, "learning_rate": 4.674939172749392e-05, "loss": 0.621, "step": 5430 }, { "epoch": 0.15856471343902367, "grad_norm": 0.5663710175119454, "learning_rate": 4.67477696674777e-05, "loss": 0.6305, "step": 5431 }, { "epoch": 0.15859390966687104, "grad_norm": 0.6208542907246143, "learning_rate": 4.6746147607461474e-05, "loss": 0.6706, "step": 5432 }, { "epoch": 0.1586231058947184, "grad_norm": 0.6414152764513426, "learning_rate": 4.6744525547445255e-05, "loss": 0.7729, "step": 5433 }, { "epoch": 0.15865230212256576, "grad_norm": 0.5709840191154794, "learning_rate": 4.674290348742904e-05, "loss": 0.6106, "step": 5434 }, { "epoch": 0.15868149835041312, "grad_norm": 0.5937455370205044, "learning_rate": 4.674128142741281e-05, "loss": 0.7176, "step": 5435 }, { "epoch": 0.15871069457826048, "grad_norm": 0.5685771042766087, "learning_rate": 4.6739659367396594e-05, "loss": 0.6237, "step": 5436 }, { "epoch": 0.15873989080610784, "grad_norm": 0.6174572470574197, "learning_rate": 4.6738037307380376e-05, "loss": 0.7152, "step": 5437 }, { "epoch": 0.1587690870339552, "grad_norm": 0.6571452390752949, "learning_rate": 4.673641524736416e-05, "loss": 0.6527, "step": 5438 }, { "epoch": 0.15879828326180256, "grad_norm": 0.6436394713167589, "learning_rate": 4.673479318734794e-05, "loss": 0.741, "step": 5439 }, { "epoch": 0.15882747948964993, "grad_norm": 0.5998117777629368, "learning_rate": 4.6733171127331714e-05, "loss": 0.6759, "step": 5440 }, { "epoch": 0.1588566757174973, "grad_norm": 0.6204148215408517, "learning_rate": 4.6731549067315496e-05, "loss": 0.6705, "step": 5441 }, { "epoch": 0.15888587194534465, "grad_norm": 0.5763915664720155, "learning_rate": 4.672992700729927e-05, "loss": 0.583, "step": 5442 }, { "epoch": 0.15891506817319204, "grad_norm": 0.5752104132688023, "learning_rate": 4.672830494728305e-05, "loss": 0.6488, "step": 5443 }, { "epoch": 0.1589442644010394, "grad_norm": 0.6159398037559928, "learning_rate": 4.6726682887266835e-05, "loss": 0.6822, "step": 5444 }, { "epoch": 0.15897346062888676, "grad_norm": 0.6171586811957749, "learning_rate": 4.672506082725061e-05, "loss": 0.6981, "step": 5445 }, { "epoch": 0.15900265685673412, "grad_norm": 0.6899972425194351, "learning_rate": 4.672343876723439e-05, "loss": 0.6416, "step": 5446 }, { "epoch": 0.15903185308458148, "grad_norm": 0.6057179981461988, "learning_rate": 4.6721816707218166e-05, "loss": 0.6472, "step": 5447 }, { "epoch": 0.15906104931242884, "grad_norm": 0.670866161814345, "learning_rate": 4.672019464720195e-05, "loss": 0.8072, "step": 5448 }, { "epoch": 0.1590902455402762, "grad_norm": 0.6508495865755918, "learning_rate": 4.671857258718573e-05, "loss": 0.7662, "step": 5449 }, { "epoch": 0.15911944176812357, "grad_norm": 0.6051261799259544, "learning_rate": 4.6716950527169505e-05, "loss": 0.63, "step": 5450 }, { "epoch": 0.15914863799597093, "grad_norm": 0.585395788001652, "learning_rate": 4.6715328467153287e-05, "loss": 0.6422, "step": 5451 }, { "epoch": 0.1591778342238183, "grad_norm": 0.6304330317000946, "learning_rate": 4.671370640713706e-05, "loss": 0.6754, "step": 5452 }, { "epoch": 0.15920703045166565, "grad_norm": 0.5709197127581958, "learning_rate": 4.671208434712084e-05, "loss": 0.5702, "step": 5453 }, { "epoch": 0.159236226679513, "grad_norm": 0.5780139876311469, "learning_rate": 4.6710462287104625e-05, "loss": 0.6528, "step": 5454 }, { "epoch": 0.15926542290736037, "grad_norm": 0.639403214691306, "learning_rate": 4.67088402270884e-05, "loss": 0.7404, "step": 5455 }, { "epoch": 0.15929461913520773, "grad_norm": 0.5784333891708228, "learning_rate": 4.670721816707219e-05, "loss": 0.6271, "step": 5456 }, { "epoch": 0.1593238153630551, "grad_norm": 0.6301369920768645, "learning_rate": 4.6705596107055964e-05, "loss": 0.7165, "step": 5457 }, { "epoch": 0.15935301159090245, "grad_norm": 0.6017056120122702, "learning_rate": 4.6703974047039745e-05, "loss": 0.6717, "step": 5458 }, { "epoch": 0.15938220781874982, "grad_norm": 0.5937607308953413, "learning_rate": 4.670235198702353e-05, "loss": 0.6649, "step": 5459 }, { "epoch": 0.15941140404659718, "grad_norm": 0.661253459120673, "learning_rate": 4.67007299270073e-05, "loss": 0.8138, "step": 5460 }, { "epoch": 0.15944060027444454, "grad_norm": 0.6142891737775694, "learning_rate": 4.6699107866991084e-05, "loss": 0.7416, "step": 5461 }, { "epoch": 0.1594697965022919, "grad_norm": 0.6168018306183055, "learning_rate": 4.669748580697486e-05, "loss": 0.6912, "step": 5462 }, { "epoch": 0.15949899273013926, "grad_norm": 0.5654540187673546, "learning_rate": 4.669586374695864e-05, "loss": 0.6351, "step": 5463 }, { "epoch": 0.15952818895798662, "grad_norm": 0.609739723959573, "learning_rate": 4.669424168694242e-05, "loss": 0.7213, "step": 5464 }, { "epoch": 0.15955738518583398, "grad_norm": 0.6753323048428892, "learning_rate": 4.66926196269262e-05, "loss": 0.7371, "step": 5465 }, { "epoch": 0.15958658141368134, "grad_norm": 0.5653334147356553, "learning_rate": 4.669099756690998e-05, "loss": 0.6421, "step": 5466 }, { "epoch": 0.1596157776415287, "grad_norm": 0.5941476225394229, "learning_rate": 4.6689375506893754e-05, "loss": 0.6949, "step": 5467 }, { "epoch": 0.15964497386937607, "grad_norm": 0.5683993370127791, "learning_rate": 4.6687753446877536e-05, "loss": 0.6595, "step": 5468 }, { "epoch": 0.15967417009722343, "grad_norm": 0.6628784357749035, "learning_rate": 4.668613138686132e-05, "loss": 0.8016, "step": 5469 }, { "epoch": 0.1597033663250708, "grad_norm": 0.5825817652539905, "learning_rate": 4.668450932684509e-05, "loss": 0.6912, "step": 5470 }, { "epoch": 0.15973256255291815, "grad_norm": 0.6386531168398357, "learning_rate": 4.6682887266828874e-05, "loss": 0.8095, "step": 5471 }, { "epoch": 0.1597617587807655, "grad_norm": 0.6266766373971017, "learning_rate": 4.668126520681265e-05, "loss": 0.6867, "step": 5472 }, { "epoch": 0.1597909550086129, "grad_norm": 0.5614685571454269, "learning_rate": 4.667964314679643e-05, "loss": 0.6312, "step": 5473 }, { "epoch": 0.15982015123646026, "grad_norm": 0.5728985350999148, "learning_rate": 4.667802108678021e-05, "loss": 0.6119, "step": 5474 }, { "epoch": 0.15984934746430762, "grad_norm": 0.6177722509893717, "learning_rate": 4.6676399026763995e-05, "loss": 0.7613, "step": 5475 }, { "epoch": 0.15987854369215498, "grad_norm": 0.6241403004324615, "learning_rate": 4.6674776966747776e-05, "loss": 0.6831, "step": 5476 }, { "epoch": 0.15990773992000235, "grad_norm": 0.625207116441346, "learning_rate": 4.667315490673155e-05, "loss": 0.7296, "step": 5477 }, { "epoch": 0.1599369361478497, "grad_norm": 0.6613456769010998, "learning_rate": 4.667153284671533e-05, "loss": 0.6819, "step": 5478 }, { "epoch": 0.15996613237569707, "grad_norm": 0.6394594076958516, "learning_rate": 4.666991078669911e-05, "loss": 0.7078, "step": 5479 }, { "epoch": 0.15999532860354443, "grad_norm": 0.5905078216967252, "learning_rate": 4.666828872668289e-05, "loss": 0.6601, "step": 5480 }, { "epoch": 0.1600245248313918, "grad_norm": 0.5840493990104964, "learning_rate": 4.666666666666667e-05, "loss": 0.6908, "step": 5481 }, { "epoch": 0.16005372105923915, "grad_norm": 0.6124597087170753, "learning_rate": 4.6665044606650447e-05, "loss": 0.7266, "step": 5482 }, { "epoch": 0.1600829172870865, "grad_norm": 0.6098834479216052, "learning_rate": 4.666342254663423e-05, "loss": 0.6684, "step": 5483 }, { "epoch": 0.16011211351493387, "grad_norm": 0.6874085353992071, "learning_rate": 4.666180048661801e-05, "loss": 0.8025, "step": 5484 }, { "epoch": 0.16014130974278123, "grad_norm": 0.5690412905831433, "learning_rate": 4.6660178426601785e-05, "loss": 0.5965, "step": 5485 }, { "epoch": 0.1601705059706286, "grad_norm": 0.9656254539070505, "learning_rate": 4.665855636658557e-05, "loss": 0.6708, "step": 5486 }, { "epoch": 0.16019970219847596, "grad_norm": 0.5780316565244127, "learning_rate": 4.665693430656934e-05, "loss": 0.5883, "step": 5487 }, { "epoch": 0.16022889842632332, "grad_norm": 0.5981926757037118, "learning_rate": 4.6655312246553124e-05, "loss": 0.6534, "step": 5488 }, { "epoch": 0.16025809465417068, "grad_norm": 0.6281981271299384, "learning_rate": 4.6653690186536905e-05, "loss": 0.7317, "step": 5489 }, { "epoch": 0.16028729088201804, "grad_norm": 0.6465430494217502, "learning_rate": 4.665206812652068e-05, "loss": 0.7061, "step": 5490 }, { "epoch": 0.1603164871098654, "grad_norm": 0.6357649246791481, "learning_rate": 4.665044606650446e-05, "loss": 0.7048, "step": 5491 }, { "epoch": 0.16034568333771276, "grad_norm": 0.6821498525100491, "learning_rate": 4.664882400648824e-05, "loss": 0.8368, "step": 5492 }, { "epoch": 0.16037487956556012, "grad_norm": 0.6741587964058805, "learning_rate": 4.6647201946472026e-05, "loss": 0.7057, "step": 5493 }, { "epoch": 0.16040407579340749, "grad_norm": 0.6425381226939489, "learning_rate": 4.66455798864558e-05, "loss": 0.7236, "step": 5494 }, { "epoch": 0.16043327202125485, "grad_norm": 0.7691673294035205, "learning_rate": 4.664395782643958e-05, "loss": 0.7181, "step": 5495 }, { "epoch": 0.1604624682491022, "grad_norm": 0.5769433069837774, "learning_rate": 4.6642335766423364e-05, "loss": 0.6052, "step": 5496 }, { "epoch": 0.16049166447694957, "grad_norm": 0.6185614109814537, "learning_rate": 4.664071370640714e-05, "loss": 0.7765, "step": 5497 }, { "epoch": 0.16052086070479693, "grad_norm": 0.6152089121425431, "learning_rate": 4.663909164639092e-05, "loss": 0.71, "step": 5498 }, { "epoch": 0.1605500569326443, "grad_norm": 0.561280729559498, "learning_rate": 4.6637469586374696e-05, "loss": 0.5955, "step": 5499 }, { "epoch": 0.16057925316049165, "grad_norm": 0.6049495551415797, "learning_rate": 4.663584752635848e-05, "loss": 0.6939, "step": 5500 }, { "epoch": 0.16060844938833901, "grad_norm": 0.6121129683597034, "learning_rate": 4.663422546634226e-05, "loss": 0.7288, "step": 5501 }, { "epoch": 0.16063764561618638, "grad_norm": 0.6415688226678026, "learning_rate": 4.6632603406326034e-05, "loss": 0.7246, "step": 5502 }, { "epoch": 0.16066684184403376, "grad_norm": 0.66444082200194, "learning_rate": 4.6630981346309816e-05, "loss": 0.7216, "step": 5503 }, { "epoch": 0.16069603807188113, "grad_norm": 0.5090085650773274, "learning_rate": 4.66293592862936e-05, "loss": 0.5334, "step": 5504 }, { "epoch": 0.1607252342997285, "grad_norm": 0.5480219454329067, "learning_rate": 4.662773722627737e-05, "loss": 0.5995, "step": 5505 }, { "epoch": 0.16075443052757585, "grad_norm": 0.5790651163236985, "learning_rate": 4.6626115166261155e-05, "loss": 0.6515, "step": 5506 }, { "epoch": 0.1607836267554232, "grad_norm": 0.5913705025378888, "learning_rate": 4.662449310624493e-05, "loss": 0.6857, "step": 5507 }, { "epoch": 0.16081282298327057, "grad_norm": 0.5754978608020919, "learning_rate": 4.662287104622871e-05, "loss": 0.6304, "step": 5508 }, { "epoch": 0.16084201921111793, "grad_norm": 0.6877872357354052, "learning_rate": 4.662124898621249e-05, "loss": 0.6662, "step": 5509 }, { "epoch": 0.1608712154389653, "grad_norm": 0.617452410245416, "learning_rate": 4.661962692619627e-05, "loss": 0.6802, "step": 5510 }, { "epoch": 0.16090041166681265, "grad_norm": 0.7056264300288236, "learning_rate": 4.661800486618005e-05, "loss": 0.8281, "step": 5511 }, { "epoch": 0.16092960789466002, "grad_norm": 0.5409155733989454, "learning_rate": 4.661638280616383e-05, "loss": 0.6074, "step": 5512 }, { "epoch": 0.16095880412250738, "grad_norm": 0.632263277387314, "learning_rate": 4.6614760746147613e-05, "loss": 0.7147, "step": 5513 }, { "epoch": 0.16098800035035474, "grad_norm": 0.6019923585568674, "learning_rate": 4.661313868613139e-05, "loss": 0.6272, "step": 5514 }, { "epoch": 0.1610171965782021, "grad_norm": 0.5914206213606917, "learning_rate": 4.661151662611517e-05, "loss": 0.6581, "step": 5515 }, { "epoch": 0.16104639280604946, "grad_norm": 0.6271278979414352, "learning_rate": 4.660989456609895e-05, "loss": 0.7453, "step": 5516 }, { "epoch": 0.16107558903389682, "grad_norm": 0.5973197960582232, "learning_rate": 4.660827250608273e-05, "loss": 0.6054, "step": 5517 }, { "epoch": 0.16110478526174418, "grad_norm": 0.6125028618798978, "learning_rate": 4.660665044606651e-05, "loss": 0.7016, "step": 5518 }, { "epoch": 0.16113398148959154, "grad_norm": 0.5718267508080759, "learning_rate": 4.6605028386050284e-05, "loss": 0.6638, "step": 5519 }, { "epoch": 0.1611631777174389, "grad_norm": 0.5972600347417435, "learning_rate": 4.6603406326034065e-05, "loss": 0.7001, "step": 5520 }, { "epoch": 0.16119237394528627, "grad_norm": 0.6174248853962312, "learning_rate": 4.660178426601785e-05, "loss": 0.7241, "step": 5521 }, { "epoch": 0.16122157017313363, "grad_norm": 0.8193827676809958, "learning_rate": 4.660016220600162e-05, "loss": 0.8156, "step": 5522 }, { "epoch": 0.161250766400981, "grad_norm": 0.6259760631558584, "learning_rate": 4.6598540145985404e-05, "loss": 0.7321, "step": 5523 }, { "epoch": 0.16127996262882835, "grad_norm": 0.6522815714868953, "learning_rate": 4.659691808596918e-05, "loss": 0.7327, "step": 5524 }, { "epoch": 0.1613091588566757, "grad_norm": 0.639218137738535, "learning_rate": 4.659529602595296e-05, "loss": 0.7307, "step": 5525 }, { "epoch": 0.16133835508452307, "grad_norm": 0.6641926867817393, "learning_rate": 4.659367396593674e-05, "loss": 0.6714, "step": 5526 }, { "epoch": 0.16136755131237043, "grad_norm": 0.5720587801462977, "learning_rate": 4.659205190592052e-05, "loss": 0.6444, "step": 5527 }, { "epoch": 0.1613967475402178, "grad_norm": 0.5651514041927566, "learning_rate": 4.65904298459043e-05, "loss": 0.6339, "step": 5528 }, { "epoch": 0.16142594376806516, "grad_norm": 0.6138902685853396, "learning_rate": 4.658880778588808e-05, "loss": 0.6842, "step": 5529 }, { "epoch": 0.16145513999591252, "grad_norm": 0.604189489182437, "learning_rate": 4.6587185725871856e-05, "loss": 0.7021, "step": 5530 }, { "epoch": 0.16148433622375988, "grad_norm": 0.6248553904270657, "learning_rate": 4.6585563665855645e-05, "loss": 0.6535, "step": 5531 }, { "epoch": 0.16151353245160724, "grad_norm": 0.5809925353475573, "learning_rate": 4.658394160583942e-05, "loss": 0.6634, "step": 5532 }, { "epoch": 0.16154272867945463, "grad_norm": 0.6063925770801234, "learning_rate": 4.65823195458232e-05, "loss": 0.6947, "step": 5533 }, { "epoch": 0.161571924907302, "grad_norm": 0.6317524205807034, "learning_rate": 4.6580697485806976e-05, "loss": 0.7053, "step": 5534 }, { "epoch": 0.16160112113514935, "grad_norm": 0.5853108819707125, "learning_rate": 4.657907542579076e-05, "loss": 0.6218, "step": 5535 }, { "epoch": 0.1616303173629967, "grad_norm": 0.6152072265057232, "learning_rate": 4.657745336577454e-05, "loss": 0.6505, "step": 5536 }, { "epoch": 0.16165951359084407, "grad_norm": 0.6273588271793522, "learning_rate": 4.6575831305758315e-05, "loss": 0.7346, "step": 5537 }, { "epoch": 0.16168870981869143, "grad_norm": 0.5997062104515108, "learning_rate": 4.6574209245742097e-05, "loss": 0.6413, "step": 5538 }, { "epoch": 0.1617179060465388, "grad_norm": 0.5668583519000122, "learning_rate": 4.657258718572587e-05, "loss": 0.6445, "step": 5539 }, { "epoch": 0.16174710227438616, "grad_norm": 0.6055932699929034, "learning_rate": 4.657096512570965e-05, "loss": 0.6858, "step": 5540 }, { "epoch": 0.16177629850223352, "grad_norm": 0.5894534477155685, "learning_rate": 4.6569343065693435e-05, "loss": 0.7108, "step": 5541 }, { "epoch": 0.16180549473008088, "grad_norm": 0.6542338696940201, "learning_rate": 4.656772100567721e-05, "loss": 0.7211, "step": 5542 }, { "epoch": 0.16183469095792824, "grad_norm": 0.614253493171949, "learning_rate": 4.656609894566099e-05, "loss": 0.7296, "step": 5543 }, { "epoch": 0.1618638871857756, "grad_norm": 0.8365644906547564, "learning_rate": 4.656447688564477e-05, "loss": 0.7979, "step": 5544 }, { "epoch": 0.16189308341362296, "grad_norm": 0.5631011472978744, "learning_rate": 4.656285482562855e-05, "loss": 0.6236, "step": 5545 }, { "epoch": 0.16192227964147032, "grad_norm": 0.5394081084734124, "learning_rate": 4.656123276561233e-05, "loss": 0.5798, "step": 5546 }, { "epoch": 0.16195147586931768, "grad_norm": 0.6536556040112107, "learning_rate": 4.6559610705596105e-05, "loss": 0.732, "step": 5547 }, { "epoch": 0.16198067209716505, "grad_norm": 0.5787101612729665, "learning_rate": 4.655798864557989e-05, "loss": 0.6677, "step": 5548 }, { "epoch": 0.1620098683250124, "grad_norm": 0.587524634309782, "learning_rate": 4.655636658556367e-05, "loss": 0.6125, "step": 5549 }, { "epoch": 0.16203906455285977, "grad_norm": 0.5682626619321907, "learning_rate": 4.655474452554745e-05, "loss": 0.6422, "step": 5550 }, { "epoch": 0.16206826078070713, "grad_norm": 0.5398594891788735, "learning_rate": 4.655312246553123e-05, "loss": 0.6153, "step": 5551 }, { "epoch": 0.1620974570085545, "grad_norm": 0.5928731214304709, "learning_rate": 4.655150040551501e-05, "loss": 0.6415, "step": 5552 }, { "epoch": 0.16212665323640185, "grad_norm": 0.6233489950343932, "learning_rate": 4.654987834549879e-05, "loss": 0.7078, "step": 5553 }, { "epoch": 0.1621558494642492, "grad_norm": 0.5739214311958147, "learning_rate": 4.6548256285482564e-05, "loss": 0.6369, "step": 5554 }, { "epoch": 0.16218504569209657, "grad_norm": 0.5431124480584947, "learning_rate": 4.6546634225466346e-05, "loss": 0.6133, "step": 5555 }, { "epoch": 0.16221424191994394, "grad_norm": 0.5613663479932587, "learning_rate": 4.654501216545013e-05, "loss": 0.5893, "step": 5556 }, { "epoch": 0.1622434381477913, "grad_norm": 0.6125320464073275, "learning_rate": 4.65433901054339e-05, "loss": 0.6815, "step": 5557 }, { "epoch": 0.16227263437563866, "grad_norm": 0.5764243652574329, "learning_rate": 4.6541768045417684e-05, "loss": 0.6599, "step": 5558 }, { "epoch": 0.16230183060348602, "grad_norm": 0.5684128619308382, "learning_rate": 4.654014598540146e-05, "loss": 0.6258, "step": 5559 }, { "epoch": 0.16233102683133338, "grad_norm": 0.5983004624870344, "learning_rate": 4.653852392538524e-05, "loss": 0.6996, "step": 5560 }, { "epoch": 0.16236022305918074, "grad_norm": 0.5701497744113883, "learning_rate": 4.653690186536902e-05, "loss": 0.6543, "step": 5561 }, { "epoch": 0.1623894192870281, "grad_norm": 0.5936143036706966, "learning_rate": 4.65352798053528e-05, "loss": 0.6542, "step": 5562 }, { "epoch": 0.1624186155148755, "grad_norm": 0.6051844266269957, "learning_rate": 4.653365774533658e-05, "loss": 0.6687, "step": 5563 }, { "epoch": 0.16244781174272285, "grad_norm": 0.5672028514413895, "learning_rate": 4.6532035685320355e-05, "loss": 0.6113, "step": 5564 }, { "epoch": 0.16247700797057021, "grad_norm": 0.5980299841176613, "learning_rate": 4.6530413625304136e-05, "loss": 0.7258, "step": 5565 }, { "epoch": 0.16250620419841758, "grad_norm": 0.6182891997187052, "learning_rate": 4.652879156528792e-05, "loss": 0.7074, "step": 5566 }, { "epoch": 0.16253540042626494, "grad_norm": 0.7175599677687693, "learning_rate": 4.652716950527169e-05, "loss": 0.6557, "step": 5567 }, { "epoch": 0.1625645966541123, "grad_norm": 0.6325898645150534, "learning_rate": 4.6525547445255475e-05, "loss": 0.7223, "step": 5568 }, { "epoch": 0.16259379288195966, "grad_norm": 0.70776898159501, "learning_rate": 4.6523925385239257e-05, "loss": 0.7221, "step": 5569 }, { "epoch": 0.16262298910980702, "grad_norm": 0.6071594048526553, "learning_rate": 4.652230332522304e-05, "loss": 0.6726, "step": 5570 }, { "epoch": 0.16265218533765438, "grad_norm": 0.5863222051757695, "learning_rate": 4.652068126520682e-05, "loss": 0.6618, "step": 5571 }, { "epoch": 0.16268138156550174, "grad_norm": 0.6363433018673632, "learning_rate": 4.6519059205190595e-05, "loss": 0.6658, "step": 5572 }, { "epoch": 0.1627105777933491, "grad_norm": 0.5942820970409197, "learning_rate": 4.651743714517438e-05, "loss": 0.6796, "step": 5573 }, { "epoch": 0.16273977402119646, "grad_norm": 0.5947892034775069, "learning_rate": 4.651581508515815e-05, "loss": 0.7158, "step": 5574 }, { "epoch": 0.16276897024904383, "grad_norm": 0.5920832019322264, "learning_rate": 4.6514193025141934e-05, "loss": 0.6739, "step": 5575 }, { "epoch": 0.1627981664768912, "grad_norm": 0.6319875353997826, "learning_rate": 4.6512570965125715e-05, "loss": 0.6839, "step": 5576 }, { "epoch": 0.16282736270473855, "grad_norm": 0.6501059559479814, "learning_rate": 4.651094890510949e-05, "loss": 0.7053, "step": 5577 }, { "epoch": 0.1628565589325859, "grad_norm": 0.6415054398508263, "learning_rate": 4.650932684509327e-05, "loss": 0.7501, "step": 5578 }, { "epoch": 0.16288575516043327, "grad_norm": 0.5858744933748034, "learning_rate": 4.650770478507705e-05, "loss": 0.6889, "step": 5579 }, { "epoch": 0.16291495138828063, "grad_norm": 0.5555035744047995, "learning_rate": 4.650608272506083e-05, "loss": 0.6003, "step": 5580 }, { "epoch": 0.162944147616128, "grad_norm": 0.5812586240306168, "learning_rate": 4.650446066504461e-05, "loss": 0.7157, "step": 5581 }, { "epoch": 0.16297334384397535, "grad_norm": 0.5867940739375725, "learning_rate": 4.6502838605028386e-05, "loss": 0.685, "step": 5582 }, { "epoch": 0.16300254007182272, "grad_norm": 0.5909775298103827, "learning_rate": 4.650121654501217e-05, "loss": 0.7078, "step": 5583 }, { "epoch": 0.16303173629967008, "grad_norm": 0.6422743842672098, "learning_rate": 4.649959448499594e-05, "loss": 0.7339, "step": 5584 }, { "epoch": 0.16306093252751744, "grad_norm": 0.5954797192918093, "learning_rate": 4.6497972424979724e-05, "loss": 0.6559, "step": 5585 }, { "epoch": 0.1630901287553648, "grad_norm": 0.6345103902497793, "learning_rate": 4.6496350364963506e-05, "loss": 0.7311, "step": 5586 }, { "epoch": 0.16311932498321216, "grad_norm": 0.5391467166403565, "learning_rate": 4.649472830494728e-05, "loss": 0.5887, "step": 5587 }, { "epoch": 0.16314852121105952, "grad_norm": 0.6495261830557272, "learning_rate": 4.649310624493107e-05, "loss": 0.7081, "step": 5588 }, { "epoch": 0.16317771743890688, "grad_norm": 0.5840060399338013, "learning_rate": 4.6491484184914844e-05, "loss": 0.6271, "step": 5589 }, { "epoch": 0.16320691366675424, "grad_norm": 0.6264575087097007, "learning_rate": 4.6489862124898626e-05, "loss": 0.6946, "step": 5590 }, { "epoch": 0.1632361098946016, "grad_norm": 0.5561263478310069, "learning_rate": 4.648824006488241e-05, "loss": 0.5769, "step": 5591 }, { "epoch": 0.16326530612244897, "grad_norm": 0.5896385858676491, "learning_rate": 4.648661800486618e-05, "loss": 0.6996, "step": 5592 }, { "epoch": 0.16329450235029636, "grad_norm": 0.5607985672079476, "learning_rate": 4.6484995944849965e-05, "loss": 0.599, "step": 5593 }, { "epoch": 0.16332369857814372, "grad_norm": 0.5817541836852222, "learning_rate": 4.648337388483374e-05, "loss": 0.6479, "step": 5594 }, { "epoch": 0.16335289480599108, "grad_norm": 0.595279032729076, "learning_rate": 4.648175182481752e-05, "loss": 0.6876, "step": 5595 }, { "epoch": 0.16338209103383844, "grad_norm": 0.5869622084721831, "learning_rate": 4.64801297648013e-05, "loss": 0.6166, "step": 5596 }, { "epoch": 0.1634112872616858, "grad_norm": 0.6221329838789132, "learning_rate": 4.647850770478508e-05, "loss": 0.6473, "step": 5597 }, { "epoch": 0.16344048348953316, "grad_norm": 0.5584066478472101, "learning_rate": 4.647688564476886e-05, "loss": 0.5673, "step": 5598 }, { "epoch": 0.16346967971738052, "grad_norm": 0.6068870488370529, "learning_rate": 4.6475263584752635e-05, "loss": 0.6635, "step": 5599 }, { "epoch": 0.16349887594522788, "grad_norm": 0.6004151072471207, "learning_rate": 4.647364152473642e-05, "loss": 0.6802, "step": 5600 }, { "epoch": 0.16352807217307525, "grad_norm": 0.5995231236449442, "learning_rate": 4.64720194647202e-05, "loss": 0.6602, "step": 5601 }, { "epoch": 0.1635572684009226, "grad_norm": 0.5981909984977333, "learning_rate": 4.6470397404703973e-05, "loss": 0.7364, "step": 5602 }, { "epoch": 0.16358646462876997, "grad_norm": 0.6043684509638352, "learning_rate": 4.6468775344687755e-05, "loss": 0.6919, "step": 5603 }, { "epoch": 0.16361566085661733, "grad_norm": 0.608948291959558, "learning_rate": 4.646715328467153e-05, "loss": 0.6309, "step": 5604 }, { "epoch": 0.1636448570844647, "grad_norm": 0.5817774516068962, "learning_rate": 4.646553122465531e-05, "loss": 0.6157, "step": 5605 }, { "epoch": 0.16367405331231205, "grad_norm": 0.5993240280838035, "learning_rate": 4.6463909164639094e-05, "loss": 0.7134, "step": 5606 }, { "epoch": 0.1637032495401594, "grad_norm": 0.6191228389164992, "learning_rate": 4.6462287104622875e-05, "loss": 0.6988, "step": 5607 }, { "epoch": 0.16373244576800677, "grad_norm": 0.5471638234714927, "learning_rate": 4.646066504460666e-05, "loss": 0.6224, "step": 5608 }, { "epoch": 0.16376164199585413, "grad_norm": 0.5995149509088927, "learning_rate": 4.645904298459043e-05, "loss": 0.7338, "step": 5609 }, { "epoch": 0.1637908382237015, "grad_norm": 0.558764417390069, "learning_rate": 4.6457420924574214e-05, "loss": 0.5922, "step": 5610 }, { "epoch": 0.16382003445154886, "grad_norm": 0.6778150841254846, "learning_rate": 4.645579886455799e-05, "loss": 0.8032, "step": 5611 }, { "epoch": 0.16384923067939622, "grad_norm": 0.6343090001041805, "learning_rate": 4.645417680454177e-05, "loss": 0.6583, "step": 5612 }, { "epoch": 0.16387842690724358, "grad_norm": 0.6346650537526654, "learning_rate": 4.645255474452555e-05, "loss": 0.6929, "step": 5613 }, { "epoch": 0.16390762313509094, "grad_norm": 0.565269409571457, "learning_rate": 4.645093268450933e-05, "loss": 0.645, "step": 5614 }, { "epoch": 0.1639368193629383, "grad_norm": 0.6000681131606431, "learning_rate": 4.644931062449311e-05, "loss": 0.7068, "step": 5615 }, { "epoch": 0.16396601559078566, "grad_norm": 0.6495156129575518, "learning_rate": 4.644768856447689e-05, "loss": 0.7672, "step": 5616 }, { "epoch": 0.16399521181863302, "grad_norm": 0.5749718082449384, "learning_rate": 4.6446066504460666e-05, "loss": 0.6292, "step": 5617 }, { "epoch": 0.16402440804648039, "grad_norm": 0.6011214668426172, "learning_rate": 4.644444444444445e-05, "loss": 0.6886, "step": 5618 }, { "epoch": 0.16405360427432775, "grad_norm": 0.5963308692640346, "learning_rate": 4.644282238442822e-05, "loss": 0.6741, "step": 5619 }, { "epoch": 0.1640828005021751, "grad_norm": 0.6403363373685882, "learning_rate": 4.6441200324412005e-05, "loss": 0.6715, "step": 5620 }, { "epoch": 0.16411199673002247, "grad_norm": 0.677833094944533, "learning_rate": 4.6439578264395786e-05, "loss": 0.7356, "step": 5621 }, { "epoch": 0.16414119295786983, "grad_norm": 0.5838954101385777, "learning_rate": 4.643795620437956e-05, "loss": 0.6216, "step": 5622 }, { "epoch": 0.1641703891857172, "grad_norm": 0.584173304757263, "learning_rate": 4.643633414436334e-05, "loss": 0.6451, "step": 5623 }, { "epoch": 0.16419958541356458, "grad_norm": 0.6162904580112929, "learning_rate": 4.643471208434712e-05, "loss": 0.6904, "step": 5624 }, { "epoch": 0.16422878164141194, "grad_norm": 0.598920674529031, "learning_rate": 4.64330900243309e-05, "loss": 0.6815, "step": 5625 }, { "epoch": 0.1642579778692593, "grad_norm": 0.6233564245591584, "learning_rate": 4.643146796431468e-05, "loss": 0.7425, "step": 5626 }, { "epoch": 0.16428717409710666, "grad_norm": 0.5452560965472996, "learning_rate": 4.642984590429846e-05, "loss": 0.6157, "step": 5627 }, { "epoch": 0.16431637032495403, "grad_norm": 0.5827421846372238, "learning_rate": 4.6428223844282245e-05, "loss": 0.6701, "step": 5628 }, { "epoch": 0.1643455665528014, "grad_norm": 0.6653000239018751, "learning_rate": 4.642660178426602e-05, "loss": 0.776, "step": 5629 }, { "epoch": 0.16437476278064875, "grad_norm": 0.6504152480631118, "learning_rate": 4.64249797242498e-05, "loss": 0.6972, "step": 5630 }, { "epoch": 0.1644039590084961, "grad_norm": 0.5718316465315214, "learning_rate": 4.642335766423358e-05, "loss": 0.6202, "step": 5631 }, { "epoch": 0.16443315523634347, "grad_norm": 0.5546732490920603, "learning_rate": 4.642173560421736e-05, "loss": 0.6183, "step": 5632 }, { "epoch": 0.16446235146419083, "grad_norm": 0.5691809236747455, "learning_rate": 4.642011354420114e-05, "loss": 0.6452, "step": 5633 }, { "epoch": 0.1644915476920382, "grad_norm": 0.6011477763679169, "learning_rate": 4.6418491484184915e-05, "loss": 0.606, "step": 5634 }, { "epoch": 0.16452074391988555, "grad_norm": 0.593034712737506, "learning_rate": 4.64168694241687e-05, "loss": 0.6948, "step": 5635 }, { "epoch": 0.16454994014773291, "grad_norm": 0.6200754161857887, "learning_rate": 4.641524736415248e-05, "loss": 0.7222, "step": 5636 }, { "epoch": 0.16457913637558028, "grad_norm": 0.671835261934106, "learning_rate": 4.6413625304136254e-05, "loss": 0.743, "step": 5637 }, { "epoch": 0.16460833260342764, "grad_norm": 0.6574029933977477, "learning_rate": 4.6412003244120036e-05, "loss": 0.7245, "step": 5638 }, { "epoch": 0.164637528831275, "grad_norm": 0.5498813435258932, "learning_rate": 4.641038118410381e-05, "loss": 0.5999, "step": 5639 }, { "epoch": 0.16466672505912236, "grad_norm": 0.5790946720831133, "learning_rate": 4.640875912408759e-05, "loss": 0.6811, "step": 5640 }, { "epoch": 0.16469592128696972, "grad_norm": 0.6114088622923936, "learning_rate": 4.6407137064071374e-05, "loss": 0.689, "step": 5641 }, { "epoch": 0.16472511751481708, "grad_norm": 0.5642708183997237, "learning_rate": 4.640551500405515e-05, "loss": 0.6129, "step": 5642 }, { "epoch": 0.16475431374266444, "grad_norm": 0.6173166014732635, "learning_rate": 4.640389294403893e-05, "loss": 0.7202, "step": 5643 }, { "epoch": 0.1647835099705118, "grad_norm": 0.622544549508562, "learning_rate": 4.640227088402271e-05, "loss": 0.6923, "step": 5644 }, { "epoch": 0.16481270619835917, "grad_norm": 0.5716780336364279, "learning_rate": 4.6400648824006494e-05, "loss": 0.634, "step": 5645 }, { "epoch": 0.16484190242620653, "grad_norm": 0.5683790361799087, "learning_rate": 4.639902676399027e-05, "loss": 0.6041, "step": 5646 }, { "epoch": 0.1648710986540539, "grad_norm": 0.5699791848614844, "learning_rate": 4.639740470397405e-05, "loss": 0.6302, "step": 5647 }, { "epoch": 0.16490029488190125, "grad_norm": 0.5470912751984358, "learning_rate": 4.639578264395783e-05, "loss": 0.5665, "step": 5648 }, { "epoch": 0.1649294911097486, "grad_norm": 0.5872245230836229, "learning_rate": 4.639416058394161e-05, "loss": 0.6027, "step": 5649 }, { "epoch": 0.16495868733759597, "grad_norm": 0.5863519875662313, "learning_rate": 4.639253852392539e-05, "loss": 0.6493, "step": 5650 }, { "epoch": 0.16498788356544333, "grad_norm": 0.6291592097858746, "learning_rate": 4.6390916463909165e-05, "loss": 0.6886, "step": 5651 }, { "epoch": 0.1650170797932907, "grad_norm": 0.6257700665015284, "learning_rate": 4.6389294403892946e-05, "loss": 0.6979, "step": 5652 }, { "epoch": 0.16504627602113806, "grad_norm": 0.5704811176340487, "learning_rate": 4.638767234387673e-05, "loss": 0.5377, "step": 5653 }, { "epoch": 0.16507547224898544, "grad_norm": 0.5798883964704253, "learning_rate": 4.63860502838605e-05, "loss": 0.675, "step": 5654 }, { "epoch": 0.1651046684768328, "grad_norm": 0.5442577046907855, "learning_rate": 4.6384428223844285e-05, "loss": 0.6017, "step": 5655 }, { "epoch": 0.16513386470468017, "grad_norm": 0.6029255232121955, "learning_rate": 4.638280616382806e-05, "loss": 0.6542, "step": 5656 }, { "epoch": 0.16516306093252753, "grad_norm": 0.6545675392035143, "learning_rate": 4.638118410381184e-05, "loss": 0.779, "step": 5657 }, { "epoch": 0.1651922571603749, "grad_norm": 0.5869827886153518, "learning_rate": 4.637956204379562e-05, "loss": 0.628, "step": 5658 }, { "epoch": 0.16522145338822225, "grad_norm": 0.6277263681991162, "learning_rate": 4.63779399837794e-05, "loss": 0.709, "step": 5659 }, { "epoch": 0.1652506496160696, "grad_norm": 0.5673366600063651, "learning_rate": 4.637631792376318e-05, "loss": 0.6114, "step": 5660 }, { "epoch": 0.16527984584391697, "grad_norm": 0.5967386129177213, "learning_rate": 4.637469586374696e-05, "loss": 0.6501, "step": 5661 }, { "epoch": 0.16530904207176433, "grad_norm": 0.6047698846112339, "learning_rate": 4.637307380373074e-05, "loss": 0.6505, "step": 5662 }, { "epoch": 0.1653382382996117, "grad_norm": 0.608958555328608, "learning_rate": 4.6371451743714525e-05, "loss": 0.7544, "step": 5663 }, { "epoch": 0.16536743452745906, "grad_norm": 0.7799434553176655, "learning_rate": 4.63698296836983e-05, "loss": 0.8542, "step": 5664 }, { "epoch": 0.16539663075530642, "grad_norm": 0.6230232014432623, "learning_rate": 4.636820762368208e-05, "loss": 0.6451, "step": 5665 }, { "epoch": 0.16542582698315378, "grad_norm": 0.5990380041084756, "learning_rate": 4.636658556366586e-05, "loss": 0.7138, "step": 5666 }, { "epoch": 0.16545502321100114, "grad_norm": 0.614694625084232, "learning_rate": 4.636496350364964e-05, "loss": 0.6939, "step": 5667 }, { "epoch": 0.1654842194388485, "grad_norm": 0.5619870588893119, "learning_rate": 4.636334144363342e-05, "loss": 0.6289, "step": 5668 }, { "epoch": 0.16551341566669586, "grad_norm": 0.6209559066779727, "learning_rate": 4.6361719383617196e-05, "loss": 0.7023, "step": 5669 }, { "epoch": 0.16554261189454322, "grad_norm": 0.5621398106085477, "learning_rate": 4.636009732360098e-05, "loss": 0.6307, "step": 5670 }, { "epoch": 0.16557180812239058, "grad_norm": 0.5823849543161271, "learning_rate": 4.635847526358475e-05, "loss": 0.6686, "step": 5671 }, { "epoch": 0.16560100435023795, "grad_norm": 0.5893421957473567, "learning_rate": 4.6356853203568534e-05, "loss": 0.6948, "step": 5672 }, { "epoch": 0.1656302005780853, "grad_norm": 0.6083427696143101, "learning_rate": 4.6355231143552316e-05, "loss": 0.7311, "step": 5673 }, { "epoch": 0.16565939680593267, "grad_norm": 0.5688636271486697, "learning_rate": 4.635360908353609e-05, "loss": 0.6362, "step": 5674 }, { "epoch": 0.16568859303378003, "grad_norm": 0.5683685046746773, "learning_rate": 4.635198702351987e-05, "loss": 0.6549, "step": 5675 }, { "epoch": 0.1657177892616274, "grad_norm": 0.5946609528813718, "learning_rate": 4.635036496350365e-05, "loss": 0.6927, "step": 5676 }, { "epoch": 0.16574698548947475, "grad_norm": 0.5648228767642449, "learning_rate": 4.634874290348743e-05, "loss": 0.5887, "step": 5677 }, { "epoch": 0.1657761817173221, "grad_norm": 0.6315346028945209, "learning_rate": 4.634712084347121e-05, "loss": 0.678, "step": 5678 }, { "epoch": 0.16580537794516947, "grad_norm": 0.6125548799954603, "learning_rate": 4.6345498783454986e-05, "loss": 0.6294, "step": 5679 }, { "epoch": 0.16583457417301684, "grad_norm": 0.668228480792689, "learning_rate": 4.634387672343877e-05, "loss": 0.7035, "step": 5680 }, { "epoch": 0.1658637704008642, "grad_norm": 0.6500457479944619, "learning_rate": 4.634225466342254e-05, "loss": 0.6811, "step": 5681 }, { "epoch": 0.16589296662871156, "grad_norm": 0.6728448897658329, "learning_rate": 4.634063260340633e-05, "loss": 0.638, "step": 5682 }, { "epoch": 0.16592216285655892, "grad_norm": 0.5921869901209785, "learning_rate": 4.633901054339011e-05, "loss": 0.7176, "step": 5683 }, { "epoch": 0.1659513590844063, "grad_norm": 0.6054549573923416, "learning_rate": 4.633738848337389e-05, "loss": 0.662, "step": 5684 }, { "epoch": 0.16598055531225367, "grad_norm": 0.6032141883227333, "learning_rate": 4.633576642335767e-05, "loss": 0.6918, "step": 5685 }, { "epoch": 0.16600975154010103, "grad_norm": 0.6038386717027392, "learning_rate": 4.6334144363341445e-05, "loss": 0.665, "step": 5686 }, { "epoch": 0.1660389477679484, "grad_norm": 0.6643689242677464, "learning_rate": 4.633252230332523e-05, "loss": 0.742, "step": 5687 }, { "epoch": 0.16606814399579575, "grad_norm": 0.7194661250071144, "learning_rate": 4.633090024330901e-05, "loss": 0.6913, "step": 5688 }, { "epoch": 0.16609734022364311, "grad_norm": 0.6118811920275616, "learning_rate": 4.6329278183292783e-05, "loss": 0.656, "step": 5689 }, { "epoch": 0.16612653645149048, "grad_norm": 0.6008420349674939, "learning_rate": 4.6327656123276565e-05, "loss": 0.6465, "step": 5690 }, { "epoch": 0.16615573267933784, "grad_norm": 0.5503355399998191, "learning_rate": 4.632603406326034e-05, "loss": 0.5971, "step": 5691 }, { "epoch": 0.1661849289071852, "grad_norm": 0.5705983113185293, "learning_rate": 4.632441200324412e-05, "loss": 0.5964, "step": 5692 }, { "epoch": 0.16621412513503256, "grad_norm": 0.6205365038082183, "learning_rate": 4.6322789943227904e-05, "loss": 0.6981, "step": 5693 }, { "epoch": 0.16624332136287992, "grad_norm": 0.5871762209037976, "learning_rate": 4.632116788321168e-05, "loss": 0.6455, "step": 5694 }, { "epoch": 0.16627251759072728, "grad_norm": 0.6613096045415862, "learning_rate": 4.631954582319546e-05, "loss": 0.6737, "step": 5695 }, { "epoch": 0.16630171381857464, "grad_norm": 0.6153559364618227, "learning_rate": 4.6317923763179235e-05, "loss": 0.7172, "step": 5696 }, { "epoch": 0.166330910046422, "grad_norm": 0.5711960650998731, "learning_rate": 4.631630170316302e-05, "loss": 0.6599, "step": 5697 }, { "epoch": 0.16636010627426936, "grad_norm": 0.5414710249977251, "learning_rate": 4.63146796431468e-05, "loss": 0.5995, "step": 5698 }, { "epoch": 0.16638930250211673, "grad_norm": 0.6086457534770945, "learning_rate": 4.6313057583130574e-05, "loss": 0.6911, "step": 5699 }, { "epoch": 0.1664184987299641, "grad_norm": 0.5854026712943514, "learning_rate": 4.6311435523114356e-05, "loss": 0.6501, "step": 5700 }, { "epoch": 0.16644769495781145, "grad_norm": 0.625171214442083, "learning_rate": 4.630981346309814e-05, "loss": 0.6855, "step": 5701 }, { "epoch": 0.1664768911856588, "grad_norm": 0.5879181425952512, "learning_rate": 4.630819140308192e-05, "loss": 0.6318, "step": 5702 }, { "epoch": 0.16650608741350617, "grad_norm": 0.6436666792568077, "learning_rate": 4.63065693430657e-05, "loss": 0.6863, "step": 5703 }, { "epoch": 0.16653528364135353, "grad_norm": 0.6061195613170806, "learning_rate": 4.6304947283049476e-05, "loss": 0.614, "step": 5704 }, { "epoch": 0.1665644798692009, "grad_norm": 0.6842979806415924, "learning_rate": 4.630332522303326e-05, "loss": 0.7981, "step": 5705 }, { "epoch": 0.16659367609704825, "grad_norm": 0.6434506048587625, "learning_rate": 4.630170316301703e-05, "loss": 0.714, "step": 5706 }, { "epoch": 0.16662287232489562, "grad_norm": 0.5555855097604282, "learning_rate": 4.6300081103000815e-05, "loss": 0.5946, "step": 5707 }, { "epoch": 0.16665206855274298, "grad_norm": 0.644457336624821, "learning_rate": 4.6298459042984596e-05, "loss": 0.7593, "step": 5708 }, { "epoch": 0.16668126478059034, "grad_norm": 0.5898061362227502, "learning_rate": 4.629683698296837e-05, "loss": 0.6598, "step": 5709 }, { "epoch": 0.1667104610084377, "grad_norm": 0.5746999494031699, "learning_rate": 4.629521492295215e-05, "loss": 0.5959, "step": 5710 }, { "epoch": 0.16673965723628506, "grad_norm": 0.5729161980639235, "learning_rate": 4.629359286293593e-05, "loss": 0.5858, "step": 5711 }, { "epoch": 0.16676885346413242, "grad_norm": 0.6099288307425609, "learning_rate": 4.629197080291971e-05, "loss": 0.7188, "step": 5712 }, { "epoch": 0.16679804969197978, "grad_norm": 0.5651611621109396, "learning_rate": 4.629034874290349e-05, "loss": 0.6254, "step": 5713 }, { "epoch": 0.16682724591982717, "grad_norm": 0.6411725370329834, "learning_rate": 4.6288726682887267e-05, "loss": 0.7515, "step": 5714 }, { "epoch": 0.16685644214767453, "grad_norm": 0.569870030830005, "learning_rate": 4.628710462287105e-05, "loss": 0.5652, "step": 5715 }, { "epoch": 0.1668856383755219, "grad_norm": 0.5775646536863293, "learning_rate": 4.628548256285482e-05, "loss": 0.63, "step": 5716 }, { "epoch": 0.16691483460336926, "grad_norm": 0.5628040544884426, "learning_rate": 4.6283860502838605e-05, "loss": 0.6351, "step": 5717 }, { "epoch": 0.16694403083121662, "grad_norm": 0.5715461099679064, "learning_rate": 4.628223844282239e-05, "loss": 0.6278, "step": 5718 }, { "epoch": 0.16697322705906398, "grad_norm": 0.5772676974280435, "learning_rate": 4.628061638280616e-05, "loss": 0.6312, "step": 5719 }, { "epoch": 0.16700242328691134, "grad_norm": 0.6200569374725541, "learning_rate": 4.627899432278995e-05, "loss": 0.7269, "step": 5720 }, { "epoch": 0.1670316195147587, "grad_norm": 0.6211571103100514, "learning_rate": 4.6277372262773725e-05, "loss": 0.7226, "step": 5721 }, { "epoch": 0.16706081574260606, "grad_norm": 0.551830490877702, "learning_rate": 4.627575020275751e-05, "loss": 0.6082, "step": 5722 }, { "epoch": 0.16709001197045342, "grad_norm": 0.5930328309147225, "learning_rate": 4.627412814274128e-05, "loss": 0.6301, "step": 5723 }, { "epoch": 0.16711920819830078, "grad_norm": 0.5951258914803803, "learning_rate": 4.6272506082725064e-05, "loss": 0.634, "step": 5724 }, { "epoch": 0.16714840442614814, "grad_norm": 0.5458649837704668, "learning_rate": 4.6270884022708846e-05, "loss": 0.577, "step": 5725 }, { "epoch": 0.1671776006539955, "grad_norm": 0.6155605377568703, "learning_rate": 4.626926196269262e-05, "loss": 0.7104, "step": 5726 }, { "epoch": 0.16720679688184287, "grad_norm": 0.6067926067558447, "learning_rate": 4.62676399026764e-05, "loss": 0.7071, "step": 5727 }, { "epoch": 0.16723599310969023, "grad_norm": 0.6093883863134091, "learning_rate": 4.6266017842660184e-05, "loss": 0.6624, "step": 5728 }, { "epoch": 0.1672651893375376, "grad_norm": 0.6038087417683293, "learning_rate": 4.626439578264396e-05, "loss": 0.703, "step": 5729 }, { "epoch": 0.16729438556538495, "grad_norm": 0.5936324467848993, "learning_rate": 4.626277372262774e-05, "loss": 0.6654, "step": 5730 }, { "epoch": 0.1673235817932323, "grad_norm": 0.6934367970320268, "learning_rate": 4.6261151662611516e-05, "loss": 0.7945, "step": 5731 }, { "epoch": 0.16735277802107967, "grad_norm": 0.5962597399065722, "learning_rate": 4.62595296025953e-05, "loss": 0.7133, "step": 5732 }, { "epoch": 0.16738197424892703, "grad_norm": 0.617925443031204, "learning_rate": 4.625790754257908e-05, "loss": 0.718, "step": 5733 }, { "epoch": 0.1674111704767744, "grad_norm": 0.5642153229584801, "learning_rate": 4.6256285482562854e-05, "loss": 0.6523, "step": 5734 }, { "epoch": 0.16744036670462176, "grad_norm": 0.5558902154271496, "learning_rate": 4.6254663422546636e-05, "loss": 0.5934, "step": 5735 }, { "epoch": 0.16746956293246912, "grad_norm": 0.8358773629675198, "learning_rate": 4.625304136253041e-05, "loss": 0.7512, "step": 5736 }, { "epoch": 0.16749875916031648, "grad_norm": 0.5672391094479814, "learning_rate": 4.625141930251419e-05, "loss": 0.6271, "step": 5737 }, { "epoch": 0.16752795538816384, "grad_norm": 0.6785715515987756, "learning_rate": 4.6249797242497975e-05, "loss": 0.7319, "step": 5738 }, { "epoch": 0.1675571516160112, "grad_norm": 0.6527818684274012, "learning_rate": 4.6248175182481756e-05, "loss": 0.7706, "step": 5739 }, { "epoch": 0.16758634784385856, "grad_norm": 0.5958975484164545, "learning_rate": 4.624655312246554e-05, "loss": 0.7008, "step": 5740 }, { "epoch": 0.16761554407170592, "grad_norm": 0.5992350885205137, "learning_rate": 4.624493106244931e-05, "loss": 0.66, "step": 5741 }, { "epoch": 0.16764474029955329, "grad_norm": 0.5883089984985909, "learning_rate": 4.6243309002433095e-05, "loss": 0.7004, "step": 5742 }, { "epoch": 0.16767393652740065, "grad_norm": 0.5157384060047249, "learning_rate": 4.624168694241687e-05, "loss": 0.5575, "step": 5743 }, { "epoch": 0.16770313275524804, "grad_norm": 0.5587585764870605, "learning_rate": 4.624006488240065e-05, "loss": 0.6189, "step": 5744 }, { "epoch": 0.1677323289830954, "grad_norm": 0.6086517139411083, "learning_rate": 4.623844282238443e-05, "loss": 0.7123, "step": 5745 }, { "epoch": 0.16776152521094276, "grad_norm": 0.6050247422083608, "learning_rate": 4.623682076236821e-05, "loss": 0.6712, "step": 5746 }, { "epoch": 0.16779072143879012, "grad_norm": 0.6498891788876993, "learning_rate": 4.623519870235199e-05, "loss": 0.6651, "step": 5747 }, { "epoch": 0.16781991766663748, "grad_norm": 0.6242730451807718, "learning_rate": 4.623357664233577e-05, "loss": 0.6787, "step": 5748 }, { "epoch": 0.16784911389448484, "grad_norm": 0.5866688358641982, "learning_rate": 4.623195458231955e-05, "loss": 0.6068, "step": 5749 }, { "epoch": 0.1678783101223322, "grad_norm": 0.5777556473511589, "learning_rate": 4.623033252230333e-05, "loss": 0.6999, "step": 5750 }, { "epoch": 0.16790750635017956, "grad_norm": 0.5433376569819541, "learning_rate": 4.6228710462287104e-05, "loss": 0.6231, "step": 5751 }, { "epoch": 0.16793670257802693, "grad_norm": 0.6230288657521315, "learning_rate": 4.6227088402270885e-05, "loss": 0.6144, "step": 5752 }, { "epoch": 0.1679658988058743, "grad_norm": 0.548386994593132, "learning_rate": 4.622546634225467e-05, "loss": 0.5528, "step": 5753 }, { "epoch": 0.16799509503372165, "grad_norm": 0.5746574257264911, "learning_rate": 4.622384428223844e-05, "loss": 0.6127, "step": 5754 }, { "epoch": 0.168024291261569, "grad_norm": 0.6744294317912705, "learning_rate": 4.6222222222222224e-05, "loss": 0.7177, "step": 5755 }, { "epoch": 0.16805348748941637, "grad_norm": 0.5817654057694437, "learning_rate": 4.6220600162206e-05, "loss": 0.6407, "step": 5756 }, { "epoch": 0.16808268371726373, "grad_norm": 0.6818020951295876, "learning_rate": 4.621897810218978e-05, "loss": 0.7315, "step": 5757 }, { "epoch": 0.1681118799451111, "grad_norm": 0.5871102325656667, "learning_rate": 4.621735604217356e-05, "loss": 0.6527, "step": 5758 }, { "epoch": 0.16814107617295845, "grad_norm": 0.5671778766512008, "learning_rate": 4.6215733982157344e-05, "loss": 0.6475, "step": 5759 }, { "epoch": 0.16817027240080581, "grad_norm": 0.5670048897931984, "learning_rate": 4.6214111922141126e-05, "loss": 0.6205, "step": 5760 }, { "epoch": 0.16819946862865318, "grad_norm": 0.73860711394392, "learning_rate": 4.62124898621249e-05, "loss": 0.835, "step": 5761 }, { "epoch": 0.16822866485650054, "grad_norm": 0.6030083990563734, "learning_rate": 4.621086780210868e-05, "loss": 0.6802, "step": 5762 }, { "epoch": 0.1682578610843479, "grad_norm": 0.5864281595723985, "learning_rate": 4.620924574209246e-05, "loss": 0.6744, "step": 5763 }, { "epoch": 0.16828705731219526, "grad_norm": 0.6127967745899349, "learning_rate": 4.620762368207624e-05, "loss": 0.7022, "step": 5764 }, { "epoch": 0.16831625354004262, "grad_norm": 0.6979821595227333, "learning_rate": 4.620600162206002e-05, "loss": 0.679, "step": 5765 }, { "epoch": 0.16834544976788998, "grad_norm": 0.576177330499137, "learning_rate": 4.6204379562043796e-05, "loss": 0.6602, "step": 5766 }, { "epoch": 0.16837464599573734, "grad_norm": 0.6693549297947922, "learning_rate": 4.620275750202758e-05, "loss": 0.755, "step": 5767 }, { "epoch": 0.1684038422235847, "grad_norm": 0.606692006843863, "learning_rate": 4.620113544201135e-05, "loss": 0.6121, "step": 5768 }, { "epoch": 0.16843303845143207, "grad_norm": 0.6450718037986647, "learning_rate": 4.6199513381995135e-05, "loss": 0.7082, "step": 5769 }, { "epoch": 0.16846223467927943, "grad_norm": 0.585146312694109, "learning_rate": 4.6197891321978916e-05, "loss": 0.6845, "step": 5770 }, { "epoch": 0.1684914309071268, "grad_norm": 0.6079392688477327, "learning_rate": 4.619626926196269e-05, "loss": 0.6481, "step": 5771 }, { "epoch": 0.16852062713497415, "grad_norm": 0.5762906740013164, "learning_rate": 4.619464720194647e-05, "loss": 0.5933, "step": 5772 }, { "epoch": 0.1685498233628215, "grad_norm": 0.5787101102784893, "learning_rate": 4.6193025141930255e-05, "loss": 0.6414, "step": 5773 }, { "epoch": 0.1685790195906689, "grad_norm": 0.5313159355660504, "learning_rate": 4.619140308191403e-05, "loss": 0.5482, "step": 5774 }, { "epoch": 0.16860821581851626, "grad_norm": 0.7552881893866835, "learning_rate": 4.618978102189781e-05, "loss": 0.6208, "step": 5775 }, { "epoch": 0.16863741204636362, "grad_norm": 0.6188229599814998, "learning_rate": 4.618815896188159e-05, "loss": 0.6635, "step": 5776 }, { "epoch": 0.16866660827421098, "grad_norm": 0.5367716347917744, "learning_rate": 4.6186536901865375e-05, "loss": 0.5737, "step": 5777 }, { "epoch": 0.16869580450205834, "grad_norm": 0.5842980467807503, "learning_rate": 4.618491484184915e-05, "loss": 0.6357, "step": 5778 }, { "epoch": 0.1687250007299057, "grad_norm": 0.6345087663842819, "learning_rate": 4.618329278183293e-05, "loss": 0.7084, "step": 5779 }, { "epoch": 0.16875419695775307, "grad_norm": 0.5565359158843073, "learning_rate": 4.6181670721816714e-05, "loss": 0.6682, "step": 5780 }, { "epoch": 0.16878339318560043, "grad_norm": 0.5579053114851068, "learning_rate": 4.618004866180049e-05, "loss": 0.5946, "step": 5781 }, { "epoch": 0.1688125894134478, "grad_norm": 0.6115716170897804, "learning_rate": 4.617842660178427e-05, "loss": 0.7151, "step": 5782 }, { "epoch": 0.16884178564129515, "grad_norm": 0.6128932926824554, "learning_rate": 4.6176804541768045e-05, "loss": 0.7042, "step": 5783 }, { "epoch": 0.1688709818691425, "grad_norm": 0.5779372415534502, "learning_rate": 4.617518248175183e-05, "loss": 0.6287, "step": 5784 }, { "epoch": 0.16890017809698987, "grad_norm": 0.6055374858095698, "learning_rate": 4.617356042173561e-05, "loss": 0.6126, "step": 5785 }, { "epoch": 0.16892937432483723, "grad_norm": 0.5831590188449411, "learning_rate": 4.6171938361719384e-05, "loss": 0.6498, "step": 5786 }, { "epoch": 0.1689585705526846, "grad_norm": 0.6289940616829013, "learning_rate": 4.6170316301703166e-05, "loss": 0.7183, "step": 5787 }, { "epoch": 0.16898776678053196, "grad_norm": 0.5621947692436051, "learning_rate": 4.616869424168694e-05, "loss": 0.584, "step": 5788 }, { "epoch": 0.16901696300837932, "grad_norm": 0.638590450544347, "learning_rate": 4.616707218167072e-05, "loss": 0.702, "step": 5789 }, { "epoch": 0.16904615923622668, "grad_norm": 0.5852028467297996, "learning_rate": 4.6165450121654504e-05, "loss": 0.6722, "step": 5790 }, { "epoch": 0.16907535546407404, "grad_norm": 0.5671470559556391, "learning_rate": 4.616382806163828e-05, "loss": 0.6741, "step": 5791 }, { "epoch": 0.1691045516919214, "grad_norm": 0.5908631938324757, "learning_rate": 4.616220600162206e-05, "loss": 0.6574, "step": 5792 }, { "epoch": 0.16913374791976876, "grad_norm": 0.5796722164018319, "learning_rate": 4.616058394160584e-05, "loss": 0.6614, "step": 5793 }, { "epoch": 0.16916294414761612, "grad_norm": 0.5746754743600888, "learning_rate": 4.615896188158962e-05, "loss": 0.5877, "step": 5794 }, { "epoch": 0.16919214037546348, "grad_norm": 0.5990606045300417, "learning_rate": 4.6157339821573406e-05, "loss": 0.661, "step": 5795 }, { "epoch": 0.16922133660331085, "grad_norm": 0.6591410252638645, "learning_rate": 4.615571776155718e-05, "loss": 0.7374, "step": 5796 }, { "epoch": 0.1692505328311582, "grad_norm": 0.6332391895114419, "learning_rate": 4.615409570154096e-05, "loss": 0.758, "step": 5797 }, { "epoch": 0.16927972905900557, "grad_norm": 0.6062761963019091, "learning_rate": 4.615247364152474e-05, "loss": 0.7117, "step": 5798 }, { "epoch": 0.16930892528685293, "grad_norm": 0.6012766331994513, "learning_rate": 4.615085158150852e-05, "loss": 0.7299, "step": 5799 }, { "epoch": 0.1693381215147003, "grad_norm": 0.7168480306779812, "learning_rate": 4.61492295214923e-05, "loss": 0.6687, "step": 5800 }, { "epoch": 0.16936731774254765, "grad_norm": 0.6654674516556847, "learning_rate": 4.6147607461476077e-05, "loss": 0.8122, "step": 5801 }, { "epoch": 0.169396513970395, "grad_norm": 0.6277362276778247, "learning_rate": 4.614598540145986e-05, "loss": 0.7257, "step": 5802 }, { "epoch": 0.16942571019824237, "grad_norm": 0.5645655527304757, "learning_rate": 4.614436334144363e-05, "loss": 0.6391, "step": 5803 }, { "epoch": 0.16945490642608976, "grad_norm": 0.637096730090182, "learning_rate": 4.6142741281427415e-05, "loss": 0.7412, "step": 5804 }, { "epoch": 0.16948410265393712, "grad_norm": 0.5812590727586062, "learning_rate": 4.61411192214112e-05, "loss": 0.6561, "step": 5805 }, { "epoch": 0.16951329888178449, "grad_norm": 0.593694916220956, "learning_rate": 4.613949716139497e-05, "loss": 0.6741, "step": 5806 }, { "epoch": 0.16954249510963185, "grad_norm": 0.8050843633519329, "learning_rate": 4.6137875101378754e-05, "loss": 0.8192, "step": 5807 }, { "epoch": 0.1695716913374792, "grad_norm": 0.6291319023107053, "learning_rate": 4.613625304136253e-05, "loss": 0.7594, "step": 5808 }, { "epoch": 0.16960088756532657, "grad_norm": 0.6468920775414383, "learning_rate": 4.613463098134631e-05, "loss": 0.7174, "step": 5809 }, { "epoch": 0.16963008379317393, "grad_norm": 0.5789682923307045, "learning_rate": 4.613300892133009e-05, "loss": 0.5941, "step": 5810 }, { "epoch": 0.1696592800210213, "grad_norm": 0.614619111885513, "learning_rate": 4.613138686131387e-05, "loss": 0.65, "step": 5811 }, { "epoch": 0.16968847624886865, "grad_norm": 0.5639693635571861, "learning_rate": 4.612976480129765e-05, "loss": 0.6359, "step": 5812 }, { "epoch": 0.169717672476716, "grad_norm": 0.6345541333088845, "learning_rate": 4.6128142741281424e-05, "loss": 0.7087, "step": 5813 }, { "epoch": 0.16974686870456338, "grad_norm": 0.6373394015560566, "learning_rate": 4.612652068126521e-05, "loss": 0.6923, "step": 5814 }, { "epoch": 0.16977606493241074, "grad_norm": 0.5771878108500172, "learning_rate": 4.6124898621248994e-05, "loss": 0.6567, "step": 5815 }, { "epoch": 0.1698052611602581, "grad_norm": 0.56046817382222, "learning_rate": 4.612327656123277e-05, "loss": 0.603, "step": 5816 }, { "epoch": 0.16983445738810546, "grad_norm": 0.583157620846023, "learning_rate": 4.612165450121655e-05, "loss": 0.6467, "step": 5817 }, { "epoch": 0.16986365361595282, "grad_norm": 0.8399492039155741, "learning_rate": 4.6120032441200326e-05, "loss": 0.7584, "step": 5818 }, { "epoch": 0.16989284984380018, "grad_norm": 0.6099212759237221, "learning_rate": 4.611841038118411e-05, "loss": 0.6806, "step": 5819 }, { "epoch": 0.16992204607164754, "grad_norm": 0.6028387435534548, "learning_rate": 4.611678832116789e-05, "loss": 0.6779, "step": 5820 }, { "epoch": 0.1699512422994949, "grad_norm": 0.7251138282411635, "learning_rate": 4.6115166261151664e-05, "loss": 0.7865, "step": 5821 }, { "epoch": 0.16998043852734226, "grad_norm": 0.5712574884338791, "learning_rate": 4.6113544201135446e-05, "loss": 0.6871, "step": 5822 }, { "epoch": 0.17000963475518963, "grad_norm": 0.5540327117611843, "learning_rate": 4.611192214111922e-05, "loss": 0.6218, "step": 5823 }, { "epoch": 0.170038830983037, "grad_norm": 0.6290707203079038, "learning_rate": 4.6110300081103e-05, "loss": 0.7765, "step": 5824 }, { "epoch": 0.17006802721088435, "grad_norm": 0.6075014440521471, "learning_rate": 4.6108678021086785e-05, "loss": 0.6982, "step": 5825 }, { "epoch": 0.1700972234387317, "grad_norm": 0.6502569059138313, "learning_rate": 4.610705596107056e-05, "loss": 0.7348, "step": 5826 }, { "epoch": 0.17012641966657907, "grad_norm": 0.5800328181438866, "learning_rate": 4.610543390105434e-05, "loss": 0.6223, "step": 5827 }, { "epoch": 0.17015561589442643, "grad_norm": 0.6277115815132047, "learning_rate": 4.6103811841038116e-05, "loss": 0.6774, "step": 5828 }, { "epoch": 0.1701848121222738, "grad_norm": 0.5737660664842168, "learning_rate": 4.61021897810219e-05, "loss": 0.6267, "step": 5829 }, { "epoch": 0.17021400835012115, "grad_norm": 0.695071976935826, "learning_rate": 4.610056772100568e-05, "loss": 0.7092, "step": 5830 }, { "epoch": 0.17024320457796852, "grad_norm": 0.785767217042505, "learning_rate": 4.6098945660989455e-05, "loss": 0.7484, "step": 5831 }, { "epoch": 0.17027240080581588, "grad_norm": 0.5754812167232356, "learning_rate": 4.609732360097324e-05, "loss": 0.6711, "step": 5832 }, { "epoch": 0.17030159703366324, "grad_norm": 0.5766671311755418, "learning_rate": 4.609570154095702e-05, "loss": 0.6586, "step": 5833 }, { "epoch": 0.1703307932615106, "grad_norm": 0.5468921456087881, "learning_rate": 4.60940794809408e-05, "loss": 0.5457, "step": 5834 }, { "epoch": 0.170359989489358, "grad_norm": 0.5853208070816135, "learning_rate": 4.609245742092458e-05, "loss": 0.6029, "step": 5835 }, { "epoch": 0.17038918571720535, "grad_norm": 0.5545418428558562, "learning_rate": 4.609083536090836e-05, "loss": 0.6188, "step": 5836 }, { "epoch": 0.1704183819450527, "grad_norm": 0.5673029332853041, "learning_rate": 4.608921330089214e-05, "loss": 0.6175, "step": 5837 }, { "epoch": 0.17044757817290007, "grad_norm": 0.6547274129602682, "learning_rate": 4.6087591240875914e-05, "loss": 0.6223, "step": 5838 }, { "epoch": 0.17047677440074743, "grad_norm": 0.6568501136511664, "learning_rate": 4.6085969180859695e-05, "loss": 0.7055, "step": 5839 }, { "epoch": 0.1705059706285948, "grad_norm": 0.5720965082133599, "learning_rate": 4.608434712084348e-05, "loss": 0.6105, "step": 5840 }, { "epoch": 0.17053516685644216, "grad_norm": 0.7437847753321857, "learning_rate": 4.608272506082725e-05, "loss": 0.6946, "step": 5841 }, { "epoch": 0.17056436308428952, "grad_norm": 0.7294319553406411, "learning_rate": 4.6081103000811034e-05, "loss": 0.6753, "step": 5842 }, { "epoch": 0.17059355931213688, "grad_norm": 0.6204610179849575, "learning_rate": 4.607948094079481e-05, "loss": 0.7143, "step": 5843 }, { "epoch": 0.17062275553998424, "grad_norm": 0.5976618316110075, "learning_rate": 4.607785888077859e-05, "loss": 0.674, "step": 5844 }, { "epoch": 0.1706519517678316, "grad_norm": 0.6369260431605497, "learning_rate": 4.607623682076237e-05, "loss": 0.7176, "step": 5845 }, { "epoch": 0.17068114799567896, "grad_norm": 0.590761150710158, "learning_rate": 4.607461476074615e-05, "loss": 0.6161, "step": 5846 }, { "epoch": 0.17071034422352632, "grad_norm": 0.6225349625935519, "learning_rate": 4.607299270072993e-05, "loss": 0.7519, "step": 5847 }, { "epoch": 0.17073954045137368, "grad_norm": 0.6099840232509475, "learning_rate": 4.6071370640713704e-05, "loss": 0.6442, "step": 5848 }, { "epoch": 0.17076873667922104, "grad_norm": 0.6613938863179526, "learning_rate": 4.6069748580697486e-05, "loss": 0.7201, "step": 5849 }, { "epoch": 0.1707979329070684, "grad_norm": 0.6380984318320464, "learning_rate": 4.606812652068127e-05, "loss": 0.7631, "step": 5850 }, { "epoch": 0.17082712913491577, "grad_norm": 0.5822953159681562, "learning_rate": 4.606650446066504e-05, "loss": 0.6404, "step": 5851 }, { "epoch": 0.17085632536276313, "grad_norm": 0.5906208826109396, "learning_rate": 4.606488240064883e-05, "loss": 0.6779, "step": 5852 }, { "epoch": 0.1708855215906105, "grad_norm": 0.6448289854886374, "learning_rate": 4.6063260340632606e-05, "loss": 0.7287, "step": 5853 }, { "epoch": 0.17091471781845785, "grad_norm": 0.5553069979215128, "learning_rate": 4.606163828061639e-05, "loss": 0.5677, "step": 5854 }, { "epoch": 0.1709439140463052, "grad_norm": 0.552005578083117, "learning_rate": 4.606001622060016e-05, "loss": 0.5891, "step": 5855 }, { "epoch": 0.17097311027415257, "grad_norm": 0.5661111581775724, "learning_rate": 4.6058394160583945e-05, "loss": 0.5839, "step": 5856 }, { "epoch": 0.17100230650199993, "grad_norm": 0.5824256520377533, "learning_rate": 4.6056772100567726e-05, "loss": 0.623, "step": 5857 }, { "epoch": 0.1710315027298473, "grad_norm": 0.599836357500574, "learning_rate": 4.60551500405515e-05, "loss": 0.6736, "step": 5858 }, { "epoch": 0.17106069895769466, "grad_norm": 0.645470254962667, "learning_rate": 4.605352798053528e-05, "loss": 0.7004, "step": 5859 }, { "epoch": 0.17108989518554202, "grad_norm": 0.5974618141421486, "learning_rate": 4.6051905920519065e-05, "loss": 0.6915, "step": 5860 }, { "epoch": 0.17111909141338938, "grad_norm": 0.6415095005385305, "learning_rate": 4.605028386050284e-05, "loss": 0.7817, "step": 5861 }, { "epoch": 0.17114828764123674, "grad_norm": 0.5604240280512125, "learning_rate": 4.604866180048662e-05, "loss": 0.6244, "step": 5862 }, { "epoch": 0.1711774838690841, "grad_norm": 0.5974011601121323, "learning_rate": 4.60470397404704e-05, "loss": 0.6658, "step": 5863 }, { "epoch": 0.17120668009693146, "grad_norm": 0.7173900825817344, "learning_rate": 4.604541768045418e-05, "loss": 0.7401, "step": 5864 }, { "epoch": 0.17123587632477885, "grad_norm": 0.5804973375713259, "learning_rate": 4.604379562043796e-05, "loss": 0.6798, "step": 5865 }, { "epoch": 0.1712650725526262, "grad_norm": 0.5535168508597553, "learning_rate": 4.6042173560421735e-05, "loss": 0.6284, "step": 5866 }, { "epoch": 0.17129426878047357, "grad_norm": 0.6446775062828562, "learning_rate": 4.604055150040552e-05, "loss": 0.6236, "step": 5867 }, { "epoch": 0.17132346500832094, "grad_norm": 0.7589455758218822, "learning_rate": 4.603892944038929e-05, "loss": 0.6369, "step": 5868 }, { "epoch": 0.1713526612361683, "grad_norm": 0.6210016717097323, "learning_rate": 4.6037307380373074e-05, "loss": 0.6991, "step": 5869 }, { "epoch": 0.17138185746401566, "grad_norm": 0.5746176049334968, "learning_rate": 4.6035685320356855e-05, "loss": 0.6468, "step": 5870 }, { "epoch": 0.17141105369186302, "grad_norm": 0.5793369859588363, "learning_rate": 4.603406326034064e-05, "loss": 0.6529, "step": 5871 }, { "epoch": 0.17144024991971038, "grad_norm": 0.5877743829121049, "learning_rate": 4.603244120032442e-05, "loss": 0.6924, "step": 5872 }, { "epoch": 0.17146944614755774, "grad_norm": 0.6096747053301323, "learning_rate": 4.6030819140308194e-05, "loss": 0.7105, "step": 5873 }, { "epoch": 0.1714986423754051, "grad_norm": 0.6361801217847918, "learning_rate": 4.6029197080291976e-05, "loss": 0.6857, "step": 5874 }, { "epoch": 0.17152783860325246, "grad_norm": 0.593757564144368, "learning_rate": 4.602757502027575e-05, "loss": 0.6821, "step": 5875 }, { "epoch": 0.17155703483109982, "grad_norm": 0.6880469757766156, "learning_rate": 4.602595296025953e-05, "loss": 0.7474, "step": 5876 }, { "epoch": 0.1715862310589472, "grad_norm": 0.6244949396365581, "learning_rate": 4.6024330900243314e-05, "loss": 0.712, "step": 5877 }, { "epoch": 0.17161542728679455, "grad_norm": 0.7237737620246384, "learning_rate": 4.602270884022709e-05, "loss": 0.7603, "step": 5878 }, { "epoch": 0.1716446235146419, "grad_norm": 0.5618881356257721, "learning_rate": 4.602108678021087e-05, "loss": 0.6436, "step": 5879 }, { "epoch": 0.17167381974248927, "grad_norm": 0.621040666695532, "learning_rate": 4.6019464720194646e-05, "loss": 0.7985, "step": 5880 }, { "epoch": 0.17170301597033663, "grad_norm": 0.5996935492342673, "learning_rate": 4.601784266017843e-05, "loss": 0.6827, "step": 5881 }, { "epoch": 0.171732212198184, "grad_norm": 0.57431134562333, "learning_rate": 4.601622060016221e-05, "loss": 0.673, "step": 5882 }, { "epoch": 0.17176140842603135, "grad_norm": 0.5811726929797293, "learning_rate": 4.6014598540145985e-05, "loss": 0.6559, "step": 5883 }, { "epoch": 0.17179060465387871, "grad_norm": 0.5928332574230175, "learning_rate": 4.6012976480129766e-05, "loss": 0.6838, "step": 5884 }, { "epoch": 0.17181980088172608, "grad_norm": 0.5675389637924729, "learning_rate": 4.601135442011355e-05, "loss": 0.6265, "step": 5885 }, { "epoch": 0.17184899710957344, "grad_norm": 0.5412765007586724, "learning_rate": 4.600973236009732e-05, "loss": 0.6363, "step": 5886 }, { "epoch": 0.1718781933374208, "grad_norm": 0.5781240254583794, "learning_rate": 4.6008110300081105e-05, "loss": 0.6674, "step": 5887 }, { "epoch": 0.17190738956526816, "grad_norm": 0.6439609007224081, "learning_rate": 4.600648824006488e-05, "loss": 0.6887, "step": 5888 }, { "epoch": 0.17193658579311552, "grad_norm": 0.579897468432891, "learning_rate": 4.600486618004866e-05, "loss": 0.6291, "step": 5889 }, { "epoch": 0.17196578202096288, "grad_norm": 0.576639471225133, "learning_rate": 4.600324412003244e-05, "loss": 0.6132, "step": 5890 }, { "epoch": 0.17199497824881024, "grad_norm": 0.587255655851878, "learning_rate": 4.6001622060016225e-05, "loss": 0.6674, "step": 5891 }, { "epoch": 0.1720241744766576, "grad_norm": 0.5823168346968544, "learning_rate": 4.600000000000001e-05, "loss": 0.6597, "step": 5892 }, { "epoch": 0.17205337070450497, "grad_norm": 0.5887063820007064, "learning_rate": 4.599837793998378e-05, "loss": 0.6618, "step": 5893 }, { "epoch": 0.17208256693235233, "grad_norm": 0.6028431832015678, "learning_rate": 4.5996755879967564e-05, "loss": 0.6532, "step": 5894 }, { "epoch": 0.17211176316019972, "grad_norm": 0.6159613331910583, "learning_rate": 4.599513381995134e-05, "loss": 0.6468, "step": 5895 }, { "epoch": 0.17214095938804708, "grad_norm": 0.5904406910028005, "learning_rate": 4.599351175993512e-05, "loss": 0.645, "step": 5896 }, { "epoch": 0.17217015561589444, "grad_norm": 0.6144042482438665, "learning_rate": 4.59918896999189e-05, "loss": 0.7167, "step": 5897 }, { "epoch": 0.1721993518437418, "grad_norm": 0.6385925737209366, "learning_rate": 4.599026763990268e-05, "loss": 0.6469, "step": 5898 }, { "epoch": 0.17222854807158916, "grad_norm": 0.5553938837323125, "learning_rate": 4.598864557988646e-05, "loss": 0.6027, "step": 5899 }, { "epoch": 0.17225774429943652, "grad_norm": 0.6450037987528034, "learning_rate": 4.5987023519870234e-05, "loss": 0.768, "step": 5900 }, { "epoch": 0.17228694052728388, "grad_norm": 0.5852951309454428, "learning_rate": 4.5985401459854016e-05, "loss": 0.628, "step": 5901 }, { "epoch": 0.17231613675513124, "grad_norm": 0.5959761271557776, "learning_rate": 4.59837793998378e-05, "loss": 0.6679, "step": 5902 }, { "epoch": 0.1723453329829786, "grad_norm": 0.585833754907733, "learning_rate": 4.598215733982157e-05, "loss": 0.6175, "step": 5903 }, { "epoch": 0.17237452921082597, "grad_norm": 0.5738241994850973, "learning_rate": 4.5980535279805354e-05, "loss": 0.6591, "step": 5904 }, { "epoch": 0.17240372543867333, "grad_norm": 0.5796153469868502, "learning_rate": 4.5978913219789136e-05, "loss": 0.611, "step": 5905 }, { "epoch": 0.1724329216665207, "grad_norm": 0.5552456435328859, "learning_rate": 4.597729115977291e-05, "loss": 0.6403, "step": 5906 }, { "epoch": 0.17246211789436805, "grad_norm": 0.625102935481285, "learning_rate": 4.597566909975669e-05, "loss": 0.7245, "step": 5907 }, { "epoch": 0.1724913141222154, "grad_norm": 0.5685306113932264, "learning_rate": 4.597404703974047e-05, "loss": 0.6472, "step": 5908 }, { "epoch": 0.17252051035006277, "grad_norm": 0.557638069654802, "learning_rate": 4.5972424979724256e-05, "loss": 0.6253, "step": 5909 }, { "epoch": 0.17254970657791013, "grad_norm": 0.5543291521647885, "learning_rate": 4.597080291970803e-05, "loss": 0.5874, "step": 5910 }, { "epoch": 0.1725789028057575, "grad_norm": 0.620066906551315, "learning_rate": 4.596918085969181e-05, "loss": 0.6956, "step": 5911 }, { "epoch": 0.17260809903360486, "grad_norm": 0.6190328117152694, "learning_rate": 4.5967558799675595e-05, "loss": 0.6548, "step": 5912 }, { "epoch": 0.17263729526145222, "grad_norm": 0.5433590783667978, "learning_rate": 4.596593673965937e-05, "loss": 0.5979, "step": 5913 }, { "epoch": 0.17266649148929958, "grad_norm": 0.5570571635674838, "learning_rate": 4.596431467964315e-05, "loss": 0.5746, "step": 5914 }, { "epoch": 0.17269568771714694, "grad_norm": 0.5727223725714481, "learning_rate": 4.5962692619626926e-05, "loss": 0.6136, "step": 5915 }, { "epoch": 0.1727248839449943, "grad_norm": 0.6095092786384296, "learning_rate": 4.596107055961071e-05, "loss": 0.6711, "step": 5916 }, { "epoch": 0.17275408017284166, "grad_norm": 0.6561773335906018, "learning_rate": 4.595944849959449e-05, "loss": 0.7719, "step": 5917 }, { "epoch": 0.17278327640068902, "grad_norm": 0.6005349808053891, "learning_rate": 4.5957826439578265e-05, "loss": 0.7132, "step": 5918 }, { "epoch": 0.17281247262853638, "grad_norm": 0.5695695360888546, "learning_rate": 4.595620437956205e-05, "loss": 0.6487, "step": 5919 }, { "epoch": 0.17284166885638375, "grad_norm": 0.5929394934958802, "learning_rate": 4.595458231954582e-05, "loss": 0.6638, "step": 5920 }, { "epoch": 0.1728708650842311, "grad_norm": 0.6645322785519898, "learning_rate": 4.59529602595296e-05, "loss": 0.7312, "step": 5921 }, { "epoch": 0.17290006131207847, "grad_norm": 0.6225154018260317, "learning_rate": 4.5951338199513385e-05, "loss": 0.6867, "step": 5922 }, { "epoch": 0.17292925753992583, "grad_norm": 0.5739866547994265, "learning_rate": 4.594971613949716e-05, "loss": 0.6328, "step": 5923 }, { "epoch": 0.1729584537677732, "grad_norm": 0.6113881112996672, "learning_rate": 4.594809407948094e-05, "loss": 0.6435, "step": 5924 }, { "epoch": 0.17298764999562058, "grad_norm": 0.601311130198586, "learning_rate": 4.594647201946472e-05, "loss": 0.6683, "step": 5925 }, { "epoch": 0.17301684622346794, "grad_norm": 0.6572054149300203, "learning_rate": 4.59448499594485e-05, "loss": 0.8012, "step": 5926 }, { "epoch": 0.1730460424513153, "grad_norm": 0.627949600116861, "learning_rate": 4.594322789943228e-05, "loss": 0.7032, "step": 5927 }, { "epoch": 0.17307523867916266, "grad_norm": 0.5917076698560502, "learning_rate": 4.594160583941606e-05, "loss": 0.6922, "step": 5928 }, { "epoch": 0.17310443490701002, "grad_norm": 0.5451994480090772, "learning_rate": 4.5939983779399844e-05, "loss": 0.6488, "step": 5929 }, { "epoch": 0.17313363113485739, "grad_norm": 0.6212297717003109, "learning_rate": 4.593836171938362e-05, "loss": 0.7094, "step": 5930 }, { "epoch": 0.17316282736270475, "grad_norm": 0.5957311433795286, "learning_rate": 4.59367396593674e-05, "loss": 0.6946, "step": 5931 }, { "epoch": 0.1731920235905521, "grad_norm": 0.5727240635167995, "learning_rate": 4.593511759935118e-05, "loss": 0.6477, "step": 5932 }, { "epoch": 0.17322121981839947, "grad_norm": 0.6365952208734006, "learning_rate": 4.593349553933496e-05, "loss": 0.7727, "step": 5933 }, { "epoch": 0.17325041604624683, "grad_norm": 0.5783114522360444, "learning_rate": 4.593187347931874e-05, "loss": 0.6574, "step": 5934 }, { "epoch": 0.1732796122740942, "grad_norm": 0.871961216858625, "learning_rate": 4.5930251419302514e-05, "loss": 0.6586, "step": 5935 }, { "epoch": 0.17330880850194155, "grad_norm": 0.5898285376797517, "learning_rate": 4.5928629359286296e-05, "loss": 0.6726, "step": 5936 }, { "epoch": 0.1733380047297889, "grad_norm": 0.5955666017649399, "learning_rate": 4.592700729927008e-05, "loss": 0.6975, "step": 5937 }, { "epoch": 0.17336720095763627, "grad_norm": 0.5843164646501192, "learning_rate": 4.592538523925385e-05, "loss": 0.6092, "step": 5938 }, { "epoch": 0.17339639718548364, "grad_norm": 0.5856768443519371, "learning_rate": 4.5923763179237634e-05, "loss": 0.6541, "step": 5939 }, { "epoch": 0.173425593413331, "grad_norm": 0.6065094564821197, "learning_rate": 4.592214111922141e-05, "loss": 0.6788, "step": 5940 }, { "epoch": 0.17345478964117836, "grad_norm": 0.6075286048135151, "learning_rate": 4.592051905920519e-05, "loss": 0.7292, "step": 5941 }, { "epoch": 0.17348398586902572, "grad_norm": 0.612589463838496, "learning_rate": 4.591889699918897e-05, "loss": 0.6999, "step": 5942 }, { "epoch": 0.17351318209687308, "grad_norm": 0.5657917626910454, "learning_rate": 4.591727493917275e-05, "loss": 0.6498, "step": 5943 }, { "epoch": 0.17354237832472044, "grad_norm": 0.5852062899728603, "learning_rate": 4.591565287915653e-05, "loss": 0.6501, "step": 5944 }, { "epoch": 0.1735715745525678, "grad_norm": 0.5953108623576416, "learning_rate": 4.5914030819140305e-05, "loss": 0.6811, "step": 5945 }, { "epoch": 0.17360077078041516, "grad_norm": 0.585641355147254, "learning_rate": 4.591240875912409e-05, "loss": 0.7075, "step": 5946 }, { "epoch": 0.17362996700826253, "grad_norm": 0.5858664576529101, "learning_rate": 4.5910786699107875e-05, "loss": 0.6767, "step": 5947 }, { "epoch": 0.1736591632361099, "grad_norm": 0.5581587227880745, "learning_rate": 4.590916463909165e-05, "loss": 0.631, "step": 5948 }, { "epoch": 0.17368835946395725, "grad_norm": 0.5937276251753212, "learning_rate": 4.590754257907543e-05, "loss": 0.6868, "step": 5949 }, { "epoch": 0.1737175556918046, "grad_norm": 0.6239015024304626, "learning_rate": 4.590592051905921e-05, "loss": 0.7597, "step": 5950 }, { "epoch": 0.17374675191965197, "grad_norm": 0.5721361630228857, "learning_rate": 4.590429845904299e-05, "loss": 0.6584, "step": 5951 }, { "epoch": 0.17377594814749933, "grad_norm": 0.5506370888033408, "learning_rate": 4.590267639902677e-05, "loss": 0.5807, "step": 5952 }, { "epoch": 0.1738051443753467, "grad_norm": 0.669491201751236, "learning_rate": 4.5901054339010545e-05, "loss": 0.8424, "step": 5953 }, { "epoch": 0.17383434060319405, "grad_norm": 0.584470792354902, "learning_rate": 4.589943227899433e-05, "loss": 0.6611, "step": 5954 }, { "epoch": 0.17386353683104144, "grad_norm": 0.5991599851735192, "learning_rate": 4.58978102189781e-05, "loss": 0.6286, "step": 5955 }, { "epoch": 0.1738927330588888, "grad_norm": 0.7025569810328682, "learning_rate": 4.5896188158961884e-05, "loss": 0.7102, "step": 5956 }, { "epoch": 0.17392192928673617, "grad_norm": 0.5750091170001972, "learning_rate": 4.5894566098945665e-05, "loss": 0.6206, "step": 5957 }, { "epoch": 0.17395112551458353, "grad_norm": 0.6699646709740534, "learning_rate": 4.589294403892944e-05, "loss": 0.7598, "step": 5958 }, { "epoch": 0.1739803217424309, "grad_norm": 0.6144032178972977, "learning_rate": 4.589132197891322e-05, "loss": 0.6651, "step": 5959 }, { "epoch": 0.17400951797027825, "grad_norm": 0.644547138994725, "learning_rate": 4.5889699918897e-05, "loss": 0.7402, "step": 5960 }, { "epoch": 0.1740387141981256, "grad_norm": 0.6563628903469806, "learning_rate": 4.588807785888078e-05, "loss": 0.6815, "step": 5961 }, { "epoch": 0.17406791042597297, "grad_norm": 0.5995568749535143, "learning_rate": 4.588645579886456e-05, "loss": 0.716, "step": 5962 }, { "epoch": 0.17409710665382033, "grad_norm": 0.5567485989876558, "learning_rate": 4.5884833738848336e-05, "loss": 0.6143, "step": 5963 }, { "epoch": 0.1741263028816677, "grad_norm": 0.5977187663367066, "learning_rate": 4.588321167883212e-05, "loss": 0.6954, "step": 5964 }, { "epoch": 0.17415549910951506, "grad_norm": 0.5534067559804434, "learning_rate": 4.58815896188159e-05, "loss": 0.5546, "step": 5965 }, { "epoch": 0.17418469533736242, "grad_norm": 0.6267896544291499, "learning_rate": 4.587996755879968e-05, "loss": 0.7536, "step": 5966 }, { "epoch": 0.17421389156520978, "grad_norm": 0.6742834469427204, "learning_rate": 4.5878345498783456e-05, "loss": 0.7215, "step": 5967 }, { "epoch": 0.17424308779305714, "grad_norm": 0.5626035497434564, "learning_rate": 4.587672343876724e-05, "loss": 0.6115, "step": 5968 }, { "epoch": 0.1742722840209045, "grad_norm": 0.5666298447678559, "learning_rate": 4.587510137875102e-05, "loss": 0.584, "step": 5969 }, { "epoch": 0.17430148024875186, "grad_norm": 0.5628874136286419, "learning_rate": 4.5873479318734795e-05, "loss": 0.6026, "step": 5970 }, { "epoch": 0.17433067647659922, "grad_norm": 0.61843307115381, "learning_rate": 4.5871857258718576e-05, "loss": 0.7102, "step": 5971 }, { "epoch": 0.17435987270444658, "grad_norm": 0.5824084806308024, "learning_rate": 4.587023519870236e-05, "loss": 0.6541, "step": 5972 }, { "epoch": 0.17438906893229394, "grad_norm": 0.5982039875818801, "learning_rate": 4.586861313868613e-05, "loss": 0.6524, "step": 5973 }, { "epoch": 0.1744182651601413, "grad_norm": 0.5675652890295294, "learning_rate": 4.5866991078669915e-05, "loss": 0.7042, "step": 5974 }, { "epoch": 0.17444746138798867, "grad_norm": 0.5677187521887558, "learning_rate": 4.586536901865369e-05, "loss": 0.6182, "step": 5975 }, { "epoch": 0.17447665761583603, "grad_norm": 0.7035405775024197, "learning_rate": 4.586374695863747e-05, "loss": 0.7789, "step": 5976 }, { "epoch": 0.1745058538436834, "grad_norm": 0.5434340106948609, "learning_rate": 4.586212489862125e-05, "loss": 0.5954, "step": 5977 }, { "epoch": 0.17453505007153075, "grad_norm": 0.5522949636143106, "learning_rate": 4.586050283860503e-05, "loss": 0.5305, "step": 5978 }, { "epoch": 0.1745642462993781, "grad_norm": 0.5912253970350672, "learning_rate": 4.585888077858881e-05, "loss": 0.6193, "step": 5979 }, { "epoch": 0.17459344252722547, "grad_norm": 0.5703119522206156, "learning_rate": 4.5857258718572585e-05, "loss": 0.5926, "step": 5980 }, { "epoch": 0.17462263875507283, "grad_norm": 0.5485247177190484, "learning_rate": 4.585563665855637e-05, "loss": 0.6101, "step": 5981 }, { "epoch": 0.1746518349829202, "grad_norm": 0.5863246390972624, "learning_rate": 4.585401459854015e-05, "loss": 0.6363, "step": 5982 }, { "epoch": 0.17468103121076756, "grad_norm": 0.5838445097689227, "learning_rate": 4.5852392538523924e-05, "loss": 0.6046, "step": 5983 }, { "epoch": 0.17471022743861492, "grad_norm": 0.5743526334375666, "learning_rate": 4.585077047850771e-05, "loss": 0.6134, "step": 5984 }, { "epoch": 0.1747394236664623, "grad_norm": 0.6167346968965645, "learning_rate": 4.584914841849149e-05, "loss": 0.7223, "step": 5985 }, { "epoch": 0.17476861989430967, "grad_norm": 0.596207813753275, "learning_rate": 4.584752635847527e-05, "loss": 0.6459, "step": 5986 }, { "epoch": 0.17479781612215703, "grad_norm": 0.6419171848998471, "learning_rate": 4.5845904298459044e-05, "loss": 0.7038, "step": 5987 }, { "epoch": 0.1748270123500044, "grad_norm": 0.6074491549058535, "learning_rate": 4.5844282238442826e-05, "loss": 0.6688, "step": 5988 }, { "epoch": 0.17485620857785175, "grad_norm": 0.5906842980098199, "learning_rate": 4.584266017842661e-05, "loss": 0.6793, "step": 5989 }, { "epoch": 0.1748854048056991, "grad_norm": 0.7070639547896849, "learning_rate": 4.584103811841038e-05, "loss": 0.7459, "step": 5990 }, { "epoch": 0.17491460103354647, "grad_norm": 0.6155114228609561, "learning_rate": 4.5839416058394164e-05, "loss": 0.7167, "step": 5991 }, { "epoch": 0.17494379726139384, "grad_norm": 0.5932309984437913, "learning_rate": 4.5837793998377946e-05, "loss": 0.7319, "step": 5992 }, { "epoch": 0.1749729934892412, "grad_norm": 0.5586824882808529, "learning_rate": 4.583617193836172e-05, "loss": 0.6323, "step": 5993 }, { "epoch": 0.17500218971708856, "grad_norm": 0.5835410811892314, "learning_rate": 4.58345498783455e-05, "loss": 0.6367, "step": 5994 }, { "epoch": 0.17503138594493592, "grad_norm": 0.5660095312811214, "learning_rate": 4.583292781832928e-05, "loss": 0.6162, "step": 5995 }, { "epoch": 0.17506058217278328, "grad_norm": 0.5969187609531292, "learning_rate": 4.583130575831306e-05, "loss": 0.6711, "step": 5996 }, { "epoch": 0.17508977840063064, "grad_norm": 0.5962793131834889, "learning_rate": 4.582968369829684e-05, "loss": 0.6974, "step": 5997 }, { "epoch": 0.175118974628478, "grad_norm": 0.6191384098508828, "learning_rate": 4.5828061638280616e-05, "loss": 0.7133, "step": 5998 }, { "epoch": 0.17514817085632536, "grad_norm": 0.5420802751729529, "learning_rate": 4.58264395782644e-05, "loss": 0.6138, "step": 5999 }, { "epoch": 0.17517736708417272, "grad_norm": 0.6101285142986081, "learning_rate": 4.582481751824817e-05, "loss": 0.7371, "step": 6000 }, { "epoch": 0.17520656331202009, "grad_norm": 0.5524991691398631, "learning_rate": 4.5823195458231955e-05, "loss": 0.6021, "step": 6001 }, { "epoch": 0.17523575953986745, "grad_norm": 0.6107423387255029, "learning_rate": 4.5821573398215736e-05, "loss": 0.687, "step": 6002 }, { "epoch": 0.1752649557677148, "grad_norm": 0.5711401321458954, "learning_rate": 4.581995133819952e-05, "loss": 0.585, "step": 6003 }, { "epoch": 0.17529415199556217, "grad_norm": 0.5550916892161926, "learning_rate": 4.58183292781833e-05, "loss": 0.6066, "step": 6004 }, { "epoch": 0.17532334822340953, "grad_norm": 0.6566003073321384, "learning_rate": 4.5816707218167075e-05, "loss": 0.6858, "step": 6005 }, { "epoch": 0.1753525444512569, "grad_norm": 0.5584432708099208, "learning_rate": 4.581508515815086e-05, "loss": 0.6558, "step": 6006 }, { "epoch": 0.17538174067910425, "grad_norm": 0.5268690035303469, "learning_rate": 4.581346309813463e-05, "loss": 0.527, "step": 6007 }, { "epoch": 0.17541093690695161, "grad_norm": 0.5590106001264222, "learning_rate": 4.581184103811841e-05, "loss": 0.5769, "step": 6008 }, { "epoch": 0.17544013313479898, "grad_norm": 0.7288374806469599, "learning_rate": 4.5810218978102195e-05, "loss": 0.7733, "step": 6009 }, { "epoch": 0.17546932936264634, "grad_norm": 0.6139864605464057, "learning_rate": 4.580859691808597e-05, "loss": 0.6529, "step": 6010 }, { "epoch": 0.1754985255904937, "grad_norm": 0.6272917821517018, "learning_rate": 4.580697485806975e-05, "loss": 0.7015, "step": 6011 }, { "epoch": 0.17552772181834106, "grad_norm": 0.6222895558165724, "learning_rate": 4.580535279805353e-05, "loss": 0.6887, "step": 6012 }, { "epoch": 0.17555691804618842, "grad_norm": 0.5861694780043523, "learning_rate": 4.580373073803731e-05, "loss": 0.6847, "step": 6013 }, { "epoch": 0.17558611427403578, "grad_norm": 0.5942726435841184, "learning_rate": 4.580210867802109e-05, "loss": 0.6788, "step": 6014 }, { "epoch": 0.17561531050188314, "grad_norm": 0.5646617194754215, "learning_rate": 4.5800486618004865e-05, "loss": 0.6663, "step": 6015 }, { "epoch": 0.17564450672973053, "grad_norm": 0.6091722829299163, "learning_rate": 4.579886455798865e-05, "loss": 0.6574, "step": 6016 }, { "epoch": 0.1756737029575779, "grad_norm": 0.5593454735937469, "learning_rate": 4.579724249797243e-05, "loss": 0.6147, "step": 6017 }, { "epoch": 0.17570289918542525, "grad_norm": 0.6049702559878932, "learning_rate": 4.5795620437956204e-05, "loss": 0.745, "step": 6018 }, { "epoch": 0.17573209541327262, "grad_norm": 0.5876179792863211, "learning_rate": 4.5793998377939986e-05, "loss": 0.7404, "step": 6019 }, { "epoch": 0.17576129164111998, "grad_norm": 0.5028907611130127, "learning_rate": 4.579237631792376e-05, "loss": 0.5275, "step": 6020 }, { "epoch": 0.17579048786896734, "grad_norm": 0.6031377497895393, "learning_rate": 4.579075425790754e-05, "loss": 0.7156, "step": 6021 }, { "epoch": 0.1758196840968147, "grad_norm": 0.6037123346521123, "learning_rate": 4.5789132197891324e-05, "loss": 0.6837, "step": 6022 }, { "epoch": 0.17584888032466206, "grad_norm": 0.5480674151724803, "learning_rate": 4.5787510137875106e-05, "loss": 0.6138, "step": 6023 }, { "epoch": 0.17587807655250942, "grad_norm": 0.5554803296247918, "learning_rate": 4.578588807785889e-05, "loss": 0.6161, "step": 6024 }, { "epoch": 0.17590727278035678, "grad_norm": 0.5595258155912444, "learning_rate": 4.578426601784266e-05, "loss": 0.6435, "step": 6025 }, { "epoch": 0.17593646900820414, "grad_norm": 0.609298778277565, "learning_rate": 4.5782643957826444e-05, "loss": 0.7628, "step": 6026 }, { "epoch": 0.1759656652360515, "grad_norm": 1.090789297815155, "learning_rate": 4.578102189781022e-05, "loss": 0.8447, "step": 6027 }, { "epoch": 0.17599486146389887, "grad_norm": 0.5645321508596314, "learning_rate": 4.5779399837794e-05, "loss": 0.6419, "step": 6028 }, { "epoch": 0.17602405769174623, "grad_norm": 0.594121888389272, "learning_rate": 4.577777777777778e-05, "loss": 0.6598, "step": 6029 }, { "epoch": 0.1760532539195936, "grad_norm": 0.5320376063899259, "learning_rate": 4.577615571776156e-05, "loss": 0.5797, "step": 6030 }, { "epoch": 0.17608245014744095, "grad_norm": 0.5803836136353633, "learning_rate": 4.577453365774534e-05, "loss": 0.6494, "step": 6031 }, { "epoch": 0.1761116463752883, "grad_norm": 0.593030534361447, "learning_rate": 4.5772911597729115e-05, "loss": 0.6716, "step": 6032 }, { "epoch": 0.17614084260313567, "grad_norm": 0.5668867058692877, "learning_rate": 4.5771289537712896e-05, "loss": 0.6723, "step": 6033 }, { "epoch": 0.17617003883098303, "grad_norm": 0.570369173931031, "learning_rate": 4.576966747769668e-05, "loss": 0.6483, "step": 6034 }, { "epoch": 0.1761992350588304, "grad_norm": 0.6404720119244698, "learning_rate": 4.576804541768045e-05, "loss": 0.7802, "step": 6035 }, { "epoch": 0.17622843128667776, "grad_norm": 0.5703326299835232, "learning_rate": 4.5766423357664235e-05, "loss": 0.6648, "step": 6036 }, { "epoch": 0.17625762751452512, "grad_norm": 0.5853594409906505, "learning_rate": 4.576480129764802e-05, "loss": 0.6659, "step": 6037 }, { "epoch": 0.17628682374237248, "grad_norm": 0.6748316421545751, "learning_rate": 4.576317923763179e-05, "loss": 0.8375, "step": 6038 }, { "epoch": 0.17631601997021984, "grad_norm": 0.5565069126185025, "learning_rate": 4.5761557177615573e-05, "loss": 0.6087, "step": 6039 }, { "epoch": 0.1763452161980672, "grad_norm": 0.5901094938493123, "learning_rate": 4.575993511759935e-05, "loss": 0.6591, "step": 6040 }, { "epoch": 0.17637441242591456, "grad_norm": 0.6469832930486583, "learning_rate": 4.575831305758314e-05, "loss": 0.715, "step": 6041 }, { "epoch": 0.17640360865376192, "grad_norm": 0.6312431439403833, "learning_rate": 4.575669099756691e-05, "loss": 0.795, "step": 6042 }, { "epoch": 0.17643280488160928, "grad_norm": 0.7354415557790096, "learning_rate": 4.5755068937550694e-05, "loss": 0.6611, "step": 6043 }, { "epoch": 0.17646200110945665, "grad_norm": 0.6678424824679647, "learning_rate": 4.5753446877534476e-05, "loss": 0.799, "step": 6044 }, { "epoch": 0.176491197337304, "grad_norm": 0.640676857106273, "learning_rate": 4.575182481751825e-05, "loss": 0.7574, "step": 6045 }, { "epoch": 0.1765203935651514, "grad_norm": 0.6802883848228841, "learning_rate": 4.575020275750203e-05, "loss": 0.7172, "step": 6046 }, { "epoch": 0.17654958979299876, "grad_norm": 0.7167135106390132, "learning_rate": 4.574858069748581e-05, "loss": 0.7698, "step": 6047 }, { "epoch": 0.17657878602084612, "grad_norm": 0.6227847917168634, "learning_rate": 4.574695863746959e-05, "loss": 0.7143, "step": 6048 }, { "epoch": 0.17660798224869348, "grad_norm": 0.6012653785033332, "learning_rate": 4.574533657745337e-05, "loss": 0.7178, "step": 6049 }, { "epoch": 0.17663717847654084, "grad_norm": 0.5780171683787174, "learning_rate": 4.5743714517437146e-05, "loss": 0.6673, "step": 6050 }, { "epoch": 0.1766663747043882, "grad_norm": 0.5913713011189533, "learning_rate": 4.574209245742093e-05, "loss": 0.6735, "step": 6051 }, { "epoch": 0.17669557093223556, "grad_norm": 0.584073531926541, "learning_rate": 4.57404703974047e-05, "loss": 0.6027, "step": 6052 }, { "epoch": 0.17672476716008292, "grad_norm": 0.6091153713556661, "learning_rate": 4.5738848337388484e-05, "loss": 0.675, "step": 6053 }, { "epoch": 0.17675396338793029, "grad_norm": 0.6242701734938538, "learning_rate": 4.5737226277372266e-05, "loss": 0.7549, "step": 6054 }, { "epoch": 0.17678315961577765, "grad_norm": 0.546106895152889, "learning_rate": 4.573560421735604e-05, "loss": 0.5867, "step": 6055 }, { "epoch": 0.176812355843625, "grad_norm": 0.584242176478783, "learning_rate": 4.573398215733982e-05, "loss": 0.696, "step": 6056 }, { "epoch": 0.17684155207147237, "grad_norm": 0.5795633367425356, "learning_rate": 4.57323600973236e-05, "loss": 0.6969, "step": 6057 }, { "epoch": 0.17687074829931973, "grad_norm": 0.5841619398809577, "learning_rate": 4.573073803730738e-05, "loss": 0.6558, "step": 6058 }, { "epoch": 0.1768999445271671, "grad_norm": 0.588264156496515, "learning_rate": 4.572911597729116e-05, "loss": 0.6715, "step": 6059 }, { "epoch": 0.17692914075501445, "grad_norm": 0.6487203074815949, "learning_rate": 4.572749391727494e-05, "loss": 0.7322, "step": 6060 }, { "epoch": 0.1769583369828618, "grad_norm": 0.6198515408239346, "learning_rate": 4.5725871857258725e-05, "loss": 0.7476, "step": 6061 }, { "epoch": 0.17698753321070917, "grad_norm": 0.5456161979488183, "learning_rate": 4.57242497972425e-05, "loss": 0.5476, "step": 6062 }, { "epoch": 0.17701672943855654, "grad_norm": 0.6127355727006544, "learning_rate": 4.572262773722628e-05, "loss": 0.7237, "step": 6063 }, { "epoch": 0.1770459256664039, "grad_norm": 0.6383221367945979, "learning_rate": 4.572100567721006e-05, "loss": 0.7392, "step": 6064 }, { "epoch": 0.17707512189425126, "grad_norm": 0.5640758301508574, "learning_rate": 4.571938361719384e-05, "loss": 0.6556, "step": 6065 }, { "epoch": 0.17710431812209862, "grad_norm": 0.5973339945605752, "learning_rate": 4.571776155717762e-05, "loss": 0.6192, "step": 6066 }, { "epoch": 0.17713351434994598, "grad_norm": 0.5947081617724828, "learning_rate": 4.5716139497161395e-05, "loss": 0.6698, "step": 6067 }, { "epoch": 0.17716271057779334, "grad_norm": 0.578835745804965, "learning_rate": 4.571451743714518e-05, "loss": 0.6441, "step": 6068 }, { "epoch": 0.1771919068056407, "grad_norm": 0.6159492265492589, "learning_rate": 4.571289537712896e-05, "loss": 0.6784, "step": 6069 }, { "epoch": 0.17722110303348806, "grad_norm": 0.5739911757097809, "learning_rate": 4.5711273317112734e-05, "loss": 0.6074, "step": 6070 }, { "epoch": 0.17725029926133543, "grad_norm": 0.5697793326748964, "learning_rate": 4.5709651257096515e-05, "loss": 0.6943, "step": 6071 }, { "epoch": 0.1772794954891828, "grad_norm": 0.5577792913839352, "learning_rate": 4.570802919708029e-05, "loss": 0.5958, "step": 6072 }, { "epoch": 0.17730869171703015, "grad_norm": 0.6045359561046014, "learning_rate": 4.570640713706407e-05, "loss": 0.7269, "step": 6073 }, { "epoch": 0.1773378879448775, "grad_norm": 0.6771237144923954, "learning_rate": 4.5704785077047854e-05, "loss": 0.7985, "step": 6074 }, { "epoch": 0.17736708417272487, "grad_norm": 0.5422852378485891, "learning_rate": 4.570316301703163e-05, "loss": 0.5712, "step": 6075 }, { "epoch": 0.17739628040057226, "grad_norm": 0.581361203856526, "learning_rate": 4.570154095701541e-05, "loss": 0.654, "step": 6076 }, { "epoch": 0.17742547662841962, "grad_norm": 0.5460212114788552, "learning_rate": 4.5699918896999186e-05, "loss": 0.5721, "step": 6077 }, { "epoch": 0.17745467285626698, "grad_norm": 0.6178653307453823, "learning_rate": 4.569829683698297e-05, "loss": 0.7179, "step": 6078 }, { "epoch": 0.17748386908411434, "grad_norm": 0.6646803809327348, "learning_rate": 4.5696674776966756e-05, "loss": 0.7252, "step": 6079 }, { "epoch": 0.1775130653119617, "grad_norm": 0.5781309152583262, "learning_rate": 4.569505271695053e-05, "loss": 0.6047, "step": 6080 }, { "epoch": 0.17754226153980907, "grad_norm": 0.5881107999489691, "learning_rate": 4.569343065693431e-05, "loss": 0.6591, "step": 6081 }, { "epoch": 0.17757145776765643, "grad_norm": 0.5725375778500251, "learning_rate": 4.569180859691809e-05, "loss": 0.618, "step": 6082 }, { "epoch": 0.1776006539955038, "grad_norm": 0.6459827671275353, "learning_rate": 4.569018653690187e-05, "loss": 0.7738, "step": 6083 }, { "epoch": 0.17762985022335115, "grad_norm": 0.5882064269370229, "learning_rate": 4.568856447688565e-05, "loss": 0.6981, "step": 6084 }, { "epoch": 0.1776590464511985, "grad_norm": 0.5722455253635033, "learning_rate": 4.5686942416869426e-05, "loss": 0.6299, "step": 6085 }, { "epoch": 0.17768824267904587, "grad_norm": 0.5675246854133905, "learning_rate": 4.568532035685321e-05, "loss": 0.6315, "step": 6086 }, { "epoch": 0.17771743890689323, "grad_norm": 0.5990440581073824, "learning_rate": 4.568369829683698e-05, "loss": 0.6563, "step": 6087 }, { "epoch": 0.1777466351347406, "grad_norm": 0.6128041958722452, "learning_rate": 4.5682076236820765e-05, "loss": 0.6553, "step": 6088 }, { "epoch": 0.17777583136258795, "grad_norm": 0.5915618193382404, "learning_rate": 4.5680454176804546e-05, "loss": 0.6876, "step": 6089 }, { "epoch": 0.17780502759043532, "grad_norm": 0.5911612667089116, "learning_rate": 4.567883211678832e-05, "loss": 0.6248, "step": 6090 }, { "epoch": 0.17783422381828268, "grad_norm": 0.5620571222636109, "learning_rate": 4.56772100567721e-05, "loss": 0.6317, "step": 6091 }, { "epoch": 0.17786342004613004, "grad_norm": 0.5501570867156111, "learning_rate": 4.567558799675588e-05, "loss": 0.6196, "step": 6092 }, { "epoch": 0.1778926162739774, "grad_norm": 0.6001463237022314, "learning_rate": 4.567396593673966e-05, "loss": 0.6958, "step": 6093 }, { "epoch": 0.17792181250182476, "grad_norm": 0.6043862134967741, "learning_rate": 4.567234387672344e-05, "loss": 0.7209, "step": 6094 }, { "epoch": 0.17795100872967212, "grad_norm": 0.635147797622544, "learning_rate": 4.567072181670722e-05, "loss": 0.6978, "step": 6095 }, { "epoch": 0.17798020495751948, "grad_norm": 0.5696948806363936, "learning_rate": 4.5669099756691e-05, "loss": 0.6009, "step": 6096 }, { "epoch": 0.17800940118536684, "grad_norm": 0.5985558835716396, "learning_rate": 4.566747769667478e-05, "loss": 0.6555, "step": 6097 }, { "epoch": 0.1780385974132142, "grad_norm": 0.6351229473542485, "learning_rate": 4.566585563665856e-05, "loss": 0.7346, "step": 6098 }, { "epoch": 0.17806779364106157, "grad_norm": 0.5433561120990652, "learning_rate": 4.566423357664234e-05, "loss": 0.6014, "step": 6099 }, { "epoch": 0.17809698986890893, "grad_norm": 0.554897081701225, "learning_rate": 4.566261151662612e-05, "loss": 0.6483, "step": 6100 }, { "epoch": 0.1781261860967563, "grad_norm": 0.6496918334276891, "learning_rate": 4.56609894566099e-05, "loss": 0.6955, "step": 6101 }, { "epoch": 0.17815538232460365, "grad_norm": 0.6297520189330784, "learning_rate": 4.5659367396593675e-05, "loss": 0.6961, "step": 6102 }, { "epoch": 0.178184578552451, "grad_norm": 0.5827430668090056, "learning_rate": 4.565774533657746e-05, "loss": 0.6735, "step": 6103 }, { "epoch": 0.17821377478029837, "grad_norm": 0.5827987440343863, "learning_rate": 4.565612327656124e-05, "loss": 0.6199, "step": 6104 }, { "epoch": 0.17824297100814573, "grad_norm": 0.6437656777256708, "learning_rate": 4.5654501216545014e-05, "loss": 0.797, "step": 6105 }, { "epoch": 0.17827216723599312, "grad_norm": 0.6004354766352193, "learning_rate": 4.5652879156528796e-05, "loss": 0.6624, "step": 6106 }, { "epoch": 0.17830136346384048, "grad_norm": 0.6242671176408888, "learning_rate": 4.565125709651257e-05, "loss": 0.6617, "step": 6107 }, { "epoch": 0.17833055969168785, "grad_norm": 0.577611820993359, "learning_rate": 4.564963503649635e-05, "loss": 0.6993, "step": 6108 }, { "epoch": 0.1783597559195352, "grad_norm": 0.585287596963774, "learning_rate": 4.5648012976480134e-05, "loss": 0.6181, "step": 6109 }, { "epoch": 0.17838895214738257, "grad_norm": 0.6185803184003633, "learning_rate": 4.564639091646391e-05, "loss": 0.7516, "step": 6110 }, { "epoch": 0.17841814837522993, "grad_norm": 0.5353845176464828, "learning_rate": 4.564476885644769e-05, "loss": 0.5713, "step": 6111 }, { "epoch": 0.1784473446030773, "grad_norm": 0.5930914253160393, "learning_rate": 4.5643146796431466e-05, "loss": 0.6516, "step": 6112 }, { "epoch": 0.17847654083092465, "grad_norm": 0.5758846756929922, "learning_rate": 4.564152473641525e-05, "loss": 0.6269, "step": 6113 }, { "epoch": 0.178505737058772, "grad_norm": 0.6083750340426812, "learning_rate": 4.563990267639903e-05, "loss": 0.7061, "step": 6114 }, { "epoch": 0.17853493328661937, "grad_norm": 0.5933228912920664, "learning_rate": 4.5638280616382804e-05, "loss": 0.7058, "step": 6115 }, { "epoch": 0.17856412951446674, "grad_norm": 0.5980461854663488, "learning_rate": 4.563665855636659e-05, "loss": 0.6482, "step": 6116 }, { "epoch": 0.1785933257423141, "grad_norm": 0.5665867494325814, "learning_rate": 4.563503649635037e-05, "loss": 0.6555, "step": 6117 }, { "epoch": 0.17862252197016146, "grad_norm": 0.5658242283293059, "learning_rate": 4.563341443633415e-05, "loss": 0.655, "step": 6118 }, { "epoch": 0.17865171819800882, "grad_norm": 0.5901249562306624, "learning_rate": 4.5631792376317925e-05, "loss": 0.722, "step": 6119 }, { "epoch": 0.17868091442585618, "grad_norm": 0.5731183888336225, "learning_rate": 4.5630170316301706e-05, "loss": 0.6243, "step": 6120 }, { "epoch": 0.17871011065370354, "grad_norm": 0.633079159772129, "learning_rate": 4.562854825628549e-05, "loss": 0.7077, "step": 6121 }, { "epoch": 0.1787393068815509, "grad_norm": 0.5722045858818486, "learning_rate": 4.562692619626926e-05, "loss": 0.6498, "step": 6122 }, { "epoch": 0.17876850310939826, "grad_norm": 0.5280334966115475, "learning_rate": 4.5625304136253045e-05, "loss": 0.5988, "step": 6123 }, { "epoch": 0.17879769933724562, "grad_norm": 0.5895438836537711, "learning_rate": 4.562368207623682e-05, "loss": 0.7057, "step": 6124 }, { "epoch": 0.17882689556509299, "grad_norm": 0.6289615438006939, "learning_rate": 4.56220600162206e-05, "loss": 0.7478, "step": 6125 }, { "epoch": 0.17885609179294035, "grad_norm": 0.580476522949248, "learning_rate": 4.5620437956204383e-05, "loss": 0.6736, "step": 6126 }, { "epoch": 0.1788852880207877, "grad_norm": 0.62615442367457, "learning_rate": 4.561881589618816e-05, "loss": 0.6596, "step": 6127 }, { "epoch": 0.17891448424863507, "grad_norm": 0.6011012708303499, "learning_rate": 4.561719383617194e-05, "loss": 0.6954, "step": 6128 }, { "epoch": 0.17894368047648243, "grad_norm": 0.7415993679110047, "learning_rate": 4.561557177615572e-05, "loss": 0.8079, "step": 6129 }, { "epoch": 0.1789728767043298, "grad_norm": 0.4864494648426251, "learning_rate": 4.56139497161395e-05, "loss": 0.5178, "step": 6130 }, { "epoch": 0.17900207293217715, "grad_norm": 0.6290492469238281, "learning_rate": 4.561232765612328e-05, "loss": 0.7311, "step": 6131 }, { "epoch": 0.17903126916002451, "grad_norm": 0.5632279038976256, "learning_rate": 4.5610705596107054e-05, "loss": 0.6766, "step": 6132 }, { "epoch": 0.17906046538787188, "grad_norm": 0.607248792257946, "learning_rate": 4.5609083536090835e-05, "loss": 0.7077, "step": 6133 }, { "epoch": 0.17908966161571924, "grad_norm": 0.6671706560777333, "learning_rate": 4.560746147607462e-05, "loss": 0.6197, "step": 6134 }, { "epoch": 0.1791188578435666, "grad_norm": 0.5999071200174343, "learning_rate": 4.56058394160584e-05, "loss": 0.6724, "step": 6135 }, { "epoch": 0.179148054071414, "grad_norm": 0.5465999998028768, "learning_rate": 4.560421735604218e-05, "loss": 0.6349, "step": 6136 }, { "epoch": 0.17917725029926135, "grad_norm": 0.5712236615148392, "learning_rate": 4.5602595296025956e-05, "loss": 0.6739, "step": 6137 }, { "epoch": 0.1792064465271087, "grad_norm": 0.6514009770524865, "learning_rate": 4.560097323600974e-05, "loss": 0.6768, "step": 6138 }, { "epoch": 0.17923564275495607, "grad_norm": 0.5647768878232678, "learning_rate": 4.559935117599351e-05, "loss": 0.6332, "step": 6139 }, { "epoch": 0.17926483898280343, "grad_norm": 0.613298484899828, "learning_rate": 4.5597729115977294e-05, "loss": 0.7224, "step": 6140 }, { "epoch": 0.1792940352106508, "grad_norm": 0.5632218504634717, "learning_rate": 4.5596107055961076e-05, "loss": 0.6165, "step": 6141 }, { "epoch": 0.17932323143849815, "grad_norm": 0.6113540035028794, "learning_rate": 4.559448499594485e-05, "loss": 0.672, "step": 6142 }, { "epoch": 0.17935242766634552, "grad_norm": 0.5898245259623504, "learning_rate": 4.559286293592863e-05, "loss": 0.691, "step": 6143 }, { "epoch": 0.17938162389419288, "grad_norm": 0.6561808205328615, "learning_rate": 4.559124087591241e-05, "loss": 0.7918, "step": 6144 }, { "epoch": 0.17941082012204024, "grad_norm": 0.5668411498922641, "learning_rate": 4.558961881589619e-05, "loss": 0.669, "step": 6145 }, { "epoch": 0.1794400163498876, "grad_norm": 0.5793922841906006, "learning_rate": 4.558799675587997e-05, "loss": 0.6427, "step": 6146 }, { "epoch": 0.17946921257773496, "grad_norm": 0.5543356224458553, "learning_rate": 4.5586374695863746e-05, "loss": 0.5417, "step": 6147 }, { "epoch": 0.17949840880558232, "grad_norm": 0.6588763822944314, "learning_rate": 4.558475263584753e-05, "loss": 0.5954, "step": 6148 }, { "epoch": 0.17952760503342968, "grad_norm": 0.619415017556679, "learning_rate": 4.558313057583131e-05, "loss": 0.6845, "step": 6149 }, { "epoch": 0.17955680126127704, "grad_norm": 0.6542959274834812, "learning_rate": 4.5581508515815085e-05, "loss": 0.7166, "step": 6150 }, { "epoch": 0.1795859974891244, "grad_norm": 0.573859116577552, "learning_rate": 4.5579886455798867e-05, "loss": 0.6694, "step": 6151 }, { "epoch": 0.17961519371697177, "grad_norm": 0.6524246738054416, "learning_rate": 4.557826439578264e-05, "loss": 0.771, "step": 6152 }, { "epoch": 0.17964438994481913, "grad_norm": 0.5991420545208687, "learning_rate": 4.557664233576642e-05, "loss": 0.6542, "step": 6153 }, { "epoch": 0.1796735861726665, "grad_norm": 0.6631004359614522, "learning_rate": 4.5575020275750205e-05, "loss": 0.703, "step": 6154 }, { "epoch": 0.17970278240051385, "grad_norm": 0.6250523867610843, "learning_rate": 4.557339821573399e-05, "loss": 0.7034, "step": 6155 }, { "epoch": 0.1797319786283612, "grad_norm": 0.5575647469663437, "learning_rate": 4.557177615571777e-05, "loss": 0.6194, "step": 6156 }, { "epoch": 0.17976117485620857, "grad_norm": 0.6319570536355045, "learning_rate": 4.5570154095701544e-05, "loss": 0.6818, "step": 6157 }, { "epoch": 0.17979037108405593, "grad_norm": 0.5468709849821216, "learning_rate": 4.5568532035685325e-05, "loss": 0.6465, "step": 6158 }, { "epoch": 0.1798195673119033, "grad_norm": 0.616448750607881, "learning_rate": 4.55669099756691e-05, "loss": 0.7442, "step": 6159 }, { "epoch": 0.17984876353975066, "grad_norm": 0.6862149393110749, "learning_rate": 4.556528791565288e-05, "loss": 0.6934, "step": 6160 }, { "epoch": 0.17987795976759802, "grad_norm": 0.6371612885467277, "learning_rate": 4.5563665855636664e-05, "loss": 0.7126, "step": 6161 }, { "epoch": 0.17990715599544538, "grad_norm": 0.6324955447818515, "learning_rate": 4.556204379562044e-05, "loss": 0.7187, "step": 6162 }, { "epoch": 0.17993635222329274, "grad_norm": 0.6425529092988954, "learning_rate": 4.556042173560422e-05, "loss": 0.7583, "step": 6163 }, { "epoch": 0.1799655484511401, "grad_norm": 0.6427350068989383, "learning_rate": 4.5558799675587996e-05, "loss": 0.7517, "step": 6164 }, { "epoch": 0.17999474467898746, "grad_norm": 0.6742929554213944, "learning_rate": 4.555717761557178e-05, "loss": 0.8007, "step": 6165 }, { "epoch": 0.18002394090683485, "grad_norm": 0.5887346829993523, "learning_rate": 4.555555555555556e-05, "loss": 0.6781, "step": 6166 }, { "epoch": 0.1800531371346822, "grad_norm": 0.6654829052134087, "learning_rate": 4.5553933495539334e-05, "loss": 0.7427, "step": 6167 }, { "epoch": 0.18008233336252957, "grad_norm": 0.585610396160591, "learning_rate": 4.5552311435523116e-05, "loss": 0.6431, "step": 6168 }, { "epoch": 0.18011152959037693, "grad_norm": 0.6233831296674457, "learning_rate": 4.555068937550689e-05, "loss": 0.7533, "step": 6169 }, { "epoch": 0.1801407258182243, "grad_norm": 0.6251098385146371, "learning_rate": 4.554906731549067e-05, "loss": 0.7236, "step": 6170 }, { "epoch": 0.18016992204607166, "grad_norm": 0.583371995344199, "learning_rate": 4.5547445255474454e-05, "loss": 0.6754, "step": 6171 }, { "epoch": 0.18019911827391902, "grad_norm": 0.6609883116419019, "learning_rate": 4.554582319545823e-05, "loss": 0.7149, "step": 6172 }, { "epoch": 0.18022831450176638, "grad_norm": 0.5994120585443817, "learning_rate": 4.554420113544202e-05, "loss": 0.6556, "step": 6173 }, { "epoch": 0.18025751072961374, "grad_norm": 0.5416033093990091, "learning_rate": 4.554257907542579e-05, "loss": 0.5232, "step": 6174 }, { "epoch": 0.1802867069574611, "grad_norm": 0.6292683987936403, "learning_rate": 4.5540957015409575e-05, "loss": 0.7467, "step": 6175 }, { "epoch": 0.18031590318530846, "grad_norm": 0.5231106142997461, "learning_rate": 4.5539334955393356e-05, "loss": 0.598, "step": 6176 }, { "epoch": 0.18034509941315582, "grad_norm": 0.5998649930454, "learning_rate": 4.553771289537713e-05, "loss": 0.6355, "step": 6177 }, { "epoch": 0.18037429564100319, "grad_norm": 0.5723241504008366, "learning_rate": 4.553609083536091e-05, "loss": 0.6655, "step": 6178 }, { "epoch": 0.18040349186885055, "grad_norm": 0.5431764002240086, "learning_rate": 4.553446877534469e-05, "loss": 0.5865, "step": 6179 }, { "epoch": 0.1804326880966979, "grad_norm": 0.5907113324560967, "learning_rate": 4.553284671532847e-05, "loss": 0.5741, "step": 6180 }, { "epoch": 0.18046188432454527, "grad_norm": 0.5703464692870696, "learning_rate": 4.553122465531225e-05, "loss": 0.64, "step": 6181 }, { "epoch": 0.18049108055239263, "grad_norm": 0.5575077053260407, "learning_rate": 4.552960259529603e-05, "loss": 0.5998, "step": 6182 }, { "epoch": 0.18052027678024, "grad_norm": 0.5486222655610541, "learning_rate": 4.552798053527981e-05, "loss": 0.6228, "step": 6183 }, { "epoch": 0.18054947300808735, "grad_norm": 0.5924027902120907, "learning_rate": 4.552635847526358e-05, "loss": 0.5971, "step": 6184 }, { "epoch": 0.1805786692359347, "grad_norm": 0.56862288426256, "learning_rate": 4.5524736415247365e-05, "loss": 0.5987, "step": 6185 }, { "epoch": 0.18060786546378207, "grad_norm": 0.6896637652611073, "learning_rate": 4.552311435523115e-05, "loss": 0.7943, "step": 6186 }, { "epoch": 0.18063706169162944, "grad_norm": 0.5559938083620174, "learning_rate": 4.552149229521492e-05, "loss": 0.6486, "step": 6187 }, { "epoch": 0.1806662579194768, "grad_norm": 0.6117417029106339, "learning_rate": 4.5519870235198704e-05, "loss": 0.7161, "step": 6188 }, { "epoch": 0.18069545414732416, "grad_norm": 0.608502485824124, "learning_rate": 4.551824817518248e-05, "loss": 0.6831, "step": 6189 }, { "epoch": 0.18072465037517152, "grad_norm": 0.6702730798177913, "learning_rate": 4.551662611516626e-05, "loss": 0.825, "step": 6190 }, { "epoch": 0.18075384660301888, "grad_norm": 0.6569888731530601, "learning_rate": 4.551500405515004e-05, "loss": 0.6977, "step": 6191 }, { "epoch": 0.18078304283086624, "grad_norm": 0.5963681733698578, "learning_rate": 4.5513381995133824e-05, "loss": 0.6697, "step": 6192 }, { "epoch": 0.1808122390587136, "grad_norm": 0.5809284534757542, "learning_rate": 4.5511759935117606e-05, "loss": 0.6207, "step": 6193 }, { "epoch": 0.18084143528656096, "grad_norm": 0.5652315278269331, "learning_rate": 4.551013787510138e-05, "loss": 0.6721, "step": 6194 }, { "epoch": 0.18087063151440833, "grad_norm": 0.617240794082778, "learning_rate": 4.550851581508516e-05, "loss": 0.7412, "step": 6195 }, { "epoch": 0.18089982774225571, "grad_norm": 0.5811331457483918, "learning_rate": 4.5506893755068944e-05, "loss": 0.6241, "step": 6196 }, { "epoch": 0.18092902397010308, "grad_norm": 0.7023041794212141, "learning_rate": 4.550527169505272e-05, "loss": 0.8083, "step": 6197 }, { "epoch": 0.18095822019795044, "grad_norm": 0.6345445988287934, "learning_rate": 4.55036496350365e-05, "loss": 0.7175, "step": 6198 }, { "epoch": 0.1809874164257978, "grad_norm": 0.6351808475101284, "learning_rate": 4.5502027575020276e-05, "loss": 0.7783, "step": 6199 }, { "epoch": 0.18101661265364516, "grad_norm": 0.6559561044119769, "learning_rate": 4.550040551500406e-05, "loss": 0.707, "step": 6200 }, { "epoch": 0.18104580888149252, "grad_norm": 0.7458180745538777, "learning_rate": 4.549878345498784e-05, "loss": 0.6838, "step": 6201 }, { "epoch": 0.18107500510933988, "grad_norm": 0.5656880496375456, "learning_rate": 4.5497161394971614e-05, "loss": 0.6659, "step": 6202 }, { "epoch": 0.18110420133718724, "grad_norm": 0.5610351558421718, "learning_rate": 4.5495539334955396e-05, "loss": 0.6578, "step": 6203 }, { "epoch": 0.1811333975650346, "grad_norm": 0.5857702084863762, "learning_rate": 4.549391727493917e-05, "loss": 0.6805, "step": 6204 }, { "epoch": 0.18116259379288197, "grad_norm": 0.5646496726160779, "learning_rate": 4.549229521492295e-05, "loss": 0.6033, "step": 6205 }, { "epoch": 0.18119179002072933, "grad_norm": 0.6095060459065611, "learning_rate": 4.5490673154906735e-05, "loss": 0.6852, "step": 6206 }, { "epoch": 0.1812209862485767, "grad_norm": 0.5923006549565696, "learning_rate": 4.548905109489051e-05, "loss": 0.6957, "step": 6207 }, { "epoch": 0.18125018247642405, "grad_norm": 0.5323276260994662, "learning_rate": 4.548742903487429e-05, "loss": 0.5597, "step": 6208 }, { "epoch": 0.1812793787042714, "grad_norm": 0.5636054134343678, "learning_rate": 4.5485806974858066e-05, "loss": 0.6109, "step": 6209 }, { "epoch": 0.18130857493211877, "grad_norm": 0.6403355667406699, "learning_rate": 4.548418491484185e-05, "loss": 0.7848, "step": 6210 }, { "epoch": 0.18133777115996613, "grad_norm": 0.5945284750778849, "learning_rate": 4.548256285482563e-05, "loss": 0.6625, "step": 6211 }, { "epoch": 0.1813669673878135, "grad_norm": 0.5943636466176991, "learning_rate": 4.548094079480941e-05, "loss": 0.6701, "step": 6212 }, { "epoch": 0.18139616361566085, "grad_norm": 0.5629309194653805, "learning_rate": 4.5479318734793193e-05, "loss": 0.6024, "step": 6213 }, { "epoch": 0.18142535984350822, "grad_norm": 0.6061522319597712, "learning_rate": 4.547769667477697e-05, "loss": 0.6635, "step": 6214 }, { "epoch": 0.18145455607135558, "grad_norm": 0.6498789844105866, "learning_rate": 4.547607461476075e-05, "loss": 0.7525, "step": 6215 }, { "epoch": 0.18148375229920294, "grad_norm": 0.4996152527665154, "learning_rate": 4.547445255474453e-05, "loss": 0.5217, "step": 6216 }, { "epoch": 0.1815129485270503, "grad_norm": 0.5521224784707719, "learning_rate": 4.547283049472831e-05, "loss": 0.651, "step": 6217 }, { "epoch": 0.18154214475489766, "grad_norm": 0.6145975319782966, "learning_rate": 4.547120843471209e-05, "loss": 0.7827, "step": 6218 }, { "epoch": 0.18157134098274502, "grad_norm": 0.616429559366953, "learning_rate": 4.5469586374695864e-05, "loss": 0.666, "step": 6219 }, { "epoch": 0.18160053721059238, "grad_norm": 0.6076437405182467, "learning_rate": 4.5467964314679645e-05, "loss": 0.7706, "step": 6220 }, { "epoch": 0.18162973343843974, "grad_norm": 0.5710902394474786, "learning_rate": 4.546634225466343e-05, "loss": 0.6388, "step": 6221 }, { "epoch": 0.1816589296662871, "grad_norm": 0.6432035396899132, "learning_rate": 4.54647201946472e-05, "loss": 0.7497, "step": 6222 }, { "epoch": 0.18168812589413447, "grad_norm": 0.561810491392332, "learning_rate": 4.5463098134630984e-05, "loss": 0.5987, "step": 6223 }, { "epoch": 0.18171732212198183, "grad_norm": 0.6125920099628932, "learning_rate": 4.546147607461476e-05, "loss": 0.7042, "step": 6224 }, { "epoch": 0.1817465183498292, "grad_norm": 0.6162365317803382, "learning_rate": 4.545985401459854e-05, "loss": 0.6822, "step": 6225 }, { "epoch": 0.18177571457767655, "grad_norm": 0.563430267429232, "learning_rate": 4.545823195458232e-05, "loss": 0.6115, "step": 6226 }, { "epoch": 0.18180491080552394, "grad_norm": 0.6129169724905886, "learning_rate": 4.54566098945661e-05, "loss": 0.7415, "step": 6227 }, { "epoch": 0.1818341070333713, "grad_norm": 0.617005433014583, "learning_rate": 4.545498783454988e-05, "loss": 0.6233, "step": 6228 }, { "epoch": 0.18186330326121866, "grad_norm": 0.6081699993001008, "learning_rate": 4.5453365774533654e-05, "loss": 0.7172, "step": 6229 }, { "epoch": 0.18189249948906602, "grad_norm": 0.6197011903610652, "learning_rate": 4.545174371451744e-05, "loss": 0.6834, "step": 6230 }, { "epoch": 0.18192169571691338, "grad_norm": 0.5674448357321163, "learning_rate": 4.545012165450122e-05, "loss": 0.5521, "step": 6231 }, { "epoch": 0.18195089194476075, "grad_norm": 0.6235092815889185, "learning_rate": 4.5448499594485e-05, "loss": 0.7436, "step": 6232 }, { "epoch": 0.1819800881726081, "grad_norm": 0.6074432561046661, "learning_rate": 4.544687753446878e-05, "loss": 0.6199, "step": 6233 }, { "epoch": 0.18200928440045547, "grad_norm": 0.6350028109481709, "learning_rate": 4.5445255474452556e-05, "loss": 0.7352, "step": 6234 }, { "epoch": 0.18203848062830283, "grad_norm": 0.5486913073316223, "learning_rate": 4.544363341443634e-05, "loss": 0.6028, "step": 6235 }, { "epoch": 0.1820676768561502, "grad_norm": 0.6204920176672699, "learning_rate": 4.544201135442012e-05, "loss": 0.6937, "step": 6236 }, { "epoch": 0.18209687308399755, "grad_norm": 0.6030732701069691, "learning_rate": 4.5440389294403895e-05, "loss": 0.6489, "step": 6237 }, { "epoch": 0.1821260693118449, "grad_norm": 0.5505584305664079, "learning_rate": 4.5438767234387677e-05, "loss": 0.6345, "step": 6238 }, { "epoch": 0.18215526553969227, "grad_norm": 0.6303142279075638, "learning_rate": 4.543714517437145e-05, "loss": 0.6837, "step": 6239 }, { "epoch": 0.18218446176753963, "grad_norm": 0.577708007674763, "learning_rate": 4.543552311435523e-05, "loss": 0.667, "step": 6240 }, { "epoch": 0.182213657995387, "grad_norm": 0.5742371879732528, "learning_rate": 4.5433901054339015e-05, "loss": 0.6297, "step": 6241 }, { "epoch": 0.18224285422323436, "grad_norm": 0.661675125323885, "learning_rate": 4.543227899432279e-05, "loss": 0.7543, "step": 6242 }, { "epoch": 0.18227205045108172, "grad_norm": 0.5857556516271272, "learning_rate": 4.543065693430657e-05, "loss": 0.6841, "step": 6243 }, { "epoch": 0.18230124667892908, "grad_norm": 0.5401012769513227, "learning_rate": 4.542903487429035e-05, "loss": 0.5852, "step": 6244 }, { "epoch": 0.18233044290677644, "grad_norm": 0.5466391767007324, "learning_rate": 4.542741281427413e-05, "loss": 0.598, "step": 6245 }, { "epoch": 0.1823596391346238, "grad_norm": 0.5910609179615971, "learning_rate": 4.542579075425791e-05, "loss": 0.6884, "step": 6246 }, { "epoch": 0.18238883536247116, "grad_norm": 0.5592177773432915, "learning_rate": 4.5424168694241685e-05, "loss": 0.6502, "step": 6247 }, { "epoch": 0.18241803159031852, "grad_norm": 0.5556750198120046, "learning_rate": 4.542254663422547e-05, "loss": 0.5978, "step": 6248 }, { "epoch": 0.18244722781816589, "grad_norm": 0.5658433738111373, "learning_rate": 4.542092457420925e-05, "loss": 0.622, "step": 6249 }, { "epoch": 0.18247642404601325, "grad_norm": 0.608810510093275, "learning_rate": 4.541930251419303e-05, "loss": 0.6402, "step": 6250 }, { "epoch": 0.1825056202738606, "grad_norm": 0.5454456440794083, "learning_rate": 4.5417680454176806e-05, "loss": 0.5855, "step": 6251 }, { "epoch": 0.18253481650170797, "grad_norm": 0.6204672135285758, "learning_rate": 4.541605839416059e-05, "loss": 0.7122, "step": 6252 }, { "epoch": 0.18256401272955533, "grad_norm": 0.6309504367027114, "learning_rate": 4.541443633414437e-05, "loss": 0.6794, "step": 6253 }, { "epoch": 0.1825932089574027, "grad_norm": 0.5912898422711916, "learning_rate": 4.5412814274128144e-05, "loss": 0.7252, "step": 6254 }, { "epoch": 0.18262240518525005, "grad_norm": 0.679062517128649, "learning_rate": 4.5411192214111926e-05, "loss": 0.6681, "step": 6255 }, { "epoch": 0.18265160141309741, "grad_norm": 0.593350144349565, "learning_rate": 4.54095701540957e-05, "loss": 0.6578, "step": 6256 }, { "epoch": 0.1826807976409448, "grad_norm": 0.5929911898120762, "learning_rate": 4.540794809407948e-05, "loss": 0.6388, "step": 6257 }, { "epoch": 0.18270999386879216, "grad_norm": 0.5733350681365471, "learning_rate": 4.5406326034063264e-05, "loss": 0.6353, "step": 6258 }, { "epoch": 0.18273919009663953, "grad_norm": 0.5847144730813869, "learning_rate": 4.540470397404704e-05, "loss": 0.632, "step": 6259 }, { "epoch": 0.1827683863244869, "grad_norm": 0.5800585971656789, "learning_rate": 4.540308191403082e-05, "loss": 0.6661, "step": 6260 }, { "epoch": 0.18279758255233425, "grad_norm": 0.5431396488196532, "learning_rate": 4.54014598540146e-05, "loss": 0.5623, "step": 6261 }, { "epoch": 0.1828267787801816, "grad_norm": 0.598546956505794, "learning_rate": 4.539983779399838e-05, "loss": 0.7014, "step": 6262 }, { "epoch": 0.18285597500802897, "grad_norm": 0.6509265043379063, "learning_rate": 4.539821573398216e-05, "loss": 0.7667, "step": 6263 }, { "epoch": 0.18288517123587633, "grad_norm": 0.5663610085241878, "learning_rate": 4.5396593673965935e-05, "loss": 0.693, "step": 6264 }, { "epoch": 0.1829143674637237, "grad_norm": 0.5825859713303276, "learning_rate": 4.5394971613949716e-05, "loss": 0.6925, "step": 6265 }, { "epoch": 0.18294356369157105, "grad_norm": 0.6208169967930794, "learning_rate": 4.53933495539335e-05, "loss": 0.7415, "step": 6266 }, { "epoch": 0.18297275991941842, "grad_norm": 0.6657484911965122, "learning_rate": 4.539172749391728e-05, "loss": 0.8308, "step": 6267 }, { "epoch": 0.18300195614726578, "grad_norm": 0.6183493119472894, "learning_rate": 4.539010543390106e-05, "loss": 0.6895, "step": 6268 }, { "epoch": 0.18303115237511314, "grad_norm": 0.6123950093196985, "learning_rate": 4.538848337388484e-05, "loss": 0.6697, "step": 6269 }, { "epoch": 0.1830603486029605, "grad_norm": 0.5731381053310884, "learning_rate": 4.538686131386862e-05, "loss": 0.6436, "step": 6270 }, { "epoch": 0.18308954483080786, "grad_norm": 0.5951018830442676, "learning_rate": 4.538523925385239e-05, "loss": 0.711, "step": 6271 }, { "epoch": 0.18311874105865522, "grad_norm": 0.6515539146885985, "learning_rate": 4.5383617193836175e-05, "loss": 0.7291, "step": 6272 }, { "epoch": 0.18314793728650258, "grad_norm": 0.555869012713115, "learning_rate": 4.538199513381996e-05, "loss": 0.6137, "step": 6273 }, { "epoch": 0.18317713351434994, "grad_norm": 0.624072789564702, "learning_rate": 4.538037307380373e-05, "loss": 0.7475, "step": 6274 }, { "epoch": 0.1832063297421973, "grad_norm": 0.60060297094585, "learning_rate": 4.5378751013787514e-05, "loss": 0.7531, "step": 6275 }, { "epoch": 0.18323552597004467, "grad_norm": 0.6318070645147434, "learning_rate": 4.537712895377129e-05, "loss": 0.7495, "step": 6276 }, { "epoch": 0.18326472219789203, "grad_norm": 0.5671870428232101, "learning_rate": 4.537550689375507e-05, "loss": 0.6391, "step": 6277 }, { "epoch": 0.1832939184257394, "grad_norm": 0.9353901974651596, "learning_rate": 4.537388483373885e-05, "loss": 0.7112, "step": 6278 }, { "epoch": 0.18332311465358675, "grad_norm": 0.5716722538726289, "learning_rate": 4.537226277372263e-05, "loss": 0.5899, "step": 6279 }, { "epoch": 0.1833523108814341, "grad_norm": 0.6546881278511094, "learning_rate": 4.537064071370641e-05, "loss": 0.7288, "step": 6280 }, { "epoch": 0.18338150710928147, "grad_norm": 0.6367570811224129, "learning_rate": 4.536901865369019e-05, "loss": 0.7652, "step": 6281 }, { "epoch": 0.18341070333712883, "grad_norm": 20.01489910157718, "learning_rate": 4.5367396593673966e-05, "loss": 1.2983, "step": 6282 }, { "epoch": 0.1834398995649762, "grad_norm": 0.6233540537406618, "learning_rate": 4.536577453365775e-05, "loss": 0.7059, "step": 6283 }, { "epoch": 0.18346909579282356, "grad_norm": 0.6186395057005217, "learning_rate": 4.536415247364152e-05, "loss": 0.6196, "step": 6284 }, { "epoch": 0.18349829202067092, "grad_norm": 0.5522933890271131, "learning_rate": 4.5362530413625304e-05, "loss": 0.5814, "step": 6285 }, { "epoch": 0.18352748824851828, "grad_norm": 0.5961019403848131, "learning_rate": 4.5360908353609086e-05, "loss": 0.6839, "step": 6286 }, { "epoch": 0.18355668447636567, "grad_norm": 0.6115460273914385, "learning_rate": 4.535928629359287e-05, "loss": 0.6864, "step": 6287 }, { "epoch": 0.18358588070421303, "grad_norm": 0.6296195505064777, "learning_rate": 4.535766423357665e-05, "loss": 0.7632, "step": 6288 }, { "epoch": 0.1836150769320604, "grad_norm": 0.584908784716136, "learning_rate": 4.5356042173560424e-05, "loss": 0.6793, "step": 6289 }, { "epoch": 0.18364427315990775, "grad_norm": 0.5432092731047575, "learning_rate": 4.5354420113544206e-05, "loss": 0.5701, "step": 6290 }, { "epoch": 0.1836734693877551, "grad_norm": 0.6169459710965081, "learning_rate": 4.535279805352798e-05, "loss": 0.6847, "step": 6291 }, { "epoch": 0.18370266561560247, "grad_norm": 0.5876279296587932, "learning_rate": 4.535117599351176e-05, "loss": 0.6861, "step": 6292 }, { "epoch": 0.18373186184344983, "grad_norm": 0.5781427489980925, "learning_rate": 4.5349553933495545e-05, "loss": 0.6717, "step": 6293 }, { "epoch": 0.1837610580712972, "grad_norm": 0.632820049333848, "learning_rate": 4.534793187347932e-05, "loss": 0.7217, "step": 6294 }, { "epoch": 0.18379025429914456, "grad_norm": 0.5632917740270974, "learning_rate": 4.53463098134631e-05, "loss": 0.6567, "step": 6295 }, { "epoch": 0.18381945052699192, "grad_norm": 0.6256837596730458, "learning_rate": 4.5344687753446876e-05, "loss": 0.6414, "step": 6296 }, { "epoch": 0.18384864675483928, "grad_norm": 0.601797473961596, "learning_rate": 4.534306569343066e-05, "loss": 0.7279, "step": 6297 }, { "epoch": 0.18387784298268664, "grad_norm": 0.5708325883258589, "learning_rate": 4.534144363341444e-05, "loss": 0.6429, "step": 6298 }, { "epoch": 0.183907039210534, "grad_norm": 0.5891303651828139, "learning_rate": 4.5339821573398215e-05, "loss": 0.7083, "step": 6299 }, { "epoch": 0.18393623543838136, "grad_norm": 0.5979318053616615, "learning_rate": 4.5338199513382e-05, "loss": 0.751, "step": 6300 }, { "epoch": 0.18396543166622872, "grad_norm": 0.5993043260841561, "learning_rate": 4.533657745336577e-05, "loss": 0.6882, "step": 6301 }, { "epoch": 0.18399462789407608, "grad_norm": 0.6612401664613354, "learning_rate": 4.5334955393349553e-05, "loss": 0.7448, "step": 6302 }, { "epoch": 0.18402382412192345, "grad_norm": 0.6228308349958778, "learning_rate": 4.5333333333333335e-05, "loss": 0.6791, "step": 6303 }, { "epoch": 0.1840530203497708, "grad_norm": 0.5501364693158632, "learning_rate": 4.533171127331711e-05, "loss": 0.5908, "step": 6304 }, { "epoch": 0.18408221657761817, "grad_norm": 0.4771109996600251, "learning_rate": 4.53300892133009e-05, "loss": 0.5175, "step": 6305 }, { "epoch": 0.18411141280546553, "grad_norm": 0.6457287174496816, "learning_rate": 4.5328467153284674e-05, "loss": 0.7357, "step": 6306 }, { "epoch": 0.1841406090333129, "grad_norm": 0.5694968122224098, "learning_rate": 4.5326845093268456e-05, "loss": 0.7027, "step": 6307 }, { "epoch": 0.18416980526116025, "grad_norm": 1.063111016263309, "learning_rate": 4.532522303325224e-05, "loss": 0.7399, "step": 6308 }, { "epoch": 0.1841990014890076, "grad_norm": 0.5978516409768024, "learning_rate": 4.532360097323601e-05, "loss": 0.6348, "step": 6309 }, { "epoch": 0.18422819771685497, "grad_norm": 0.6208364409818047, "learning_rate": 4.5321978913219794e-05, "loss": 0.7081, "step": 6310 }, { "epoch": 0.18425739394470234, "grad_norm": 0.6356554349678772, "learning_rate": 4.532035685320357e-05, "loss": 0.7382, "step": 6311 }, { "epoch": 0.1842865901725497, "grad_norm": 0.6156059280008895, "learning_rate": 4.531873479318735e-05, "loss": 0.6777, "step": 6312 }, { "epoch": 0.18431578640039706, "grad_norm": 0.6239754013692836, "learning_rate": 4.531711273317113e-05, "loss": 0.6657, "step": 6313 }, { "epoch": 0.18434498262824442, "grad_norm": 0.6046920192582937, "learning_rate": 4.531549067315491e-05, "loss": 0.6607, "step": 6314 }, { "epoch": 0.18437417885609178, "grad_norm": 0.6012543590167369, "learning_rate": 4.531386861313869e-05, "loss": 0.6268, "step": 6315 }, { "epoch": 0.18440337508393914, "grad_norm": 0.6453774655088194, "learning_rate": 4.5312246553122464e-05, "loss": 0.7319, "step": 6316 }, { "epoch": 0.18443257131178653, "grad_norm": 0.5705227510408655, "learning_rate": 4.5310624493106246e-05, "loss": 0.6378, "step": 6317 }, { "epoch": 0.1844617675396339, "grad_norm": 0.6533265507476277, "learning_rate": 4.530900243309003e-05, "loss": 0.6702, "step": 6318 }, { "epoch": 0.18449096376748125, "grad_norm": 0.6100553931637627, "learning_rate": 4.53073803730738e-05, "loss": 0.6693, "step": 6319 }, { "epoch": 0.18452015999532861, "grad_norm": 0.6116979353588446, "learning_rate": 4.5305758313057585e-05, "loss": 0.7209, "step": 6320 }, { "epoch": 0.18454935622317598, "grad_norm": 0.6122738387833625, "learning_rate": 4.530413625304136e-05, "loss": 0.6892, "step": 6321 }, { "epoch": 0.18457855245102334, "grad_norm": 0.5694399733687701, "learning_rate": 4.530251419302514e-05, "loss": 0.606, "step": 6322 }, { "epoch": 0.1846077486788707, "grad_norm": 0.5752406985306409, "learning_rate": 4.530089213300892e-05, "loss": 0.637, "step": 6323 }, { "epoch": 0.18463694490671806, "grad_norm": 0.5984277060541674, "learning_rate": 4.5299270072992705e-05, "loss": 0.6832, "step": 6324 }, { "epoch": 0.18466614113456542, "grad_norm": 0.593691431139866, "learning_rate": 4.5297648012976487e-05, "loss": 0.7011, "step": 6325 }, { "epoch": 0.18469533736241278, "grad_norm": 0.6083805714296101, "learning_rate": 4.529602595296026e-05, "loss": 0.6502, "step": 6326 }, { "epoch": 0.18472453359026014, "grad_norm": 0.579595116835415, "learning_rate": 4.529440389294404e-05, "loss": 0.7191, "step": 6327 }, { "epoch": 0.1847537298181075, "grad_norm": 0.5547152020085692, "learning_rate": 4.5292781832927825e-05, "loss": 0.6541, "step": 6328 }, { "epoch": 0.18478292604595487, "grad_norm": 0.6940419621803026, "learning_rate": 4.52911597729116e-05, "loss": 0.7934, "step": 6329 }, { "epoch": 0.18481212227380223, "grad_norm": 0.6031106455246888, "learning_rate": 4.528953771289538e-05, "loss": 0.7397, "step": 6330 }, { "epoch": 0.1848413185016496, "grad_norm": 0.5688617053121623, "learning_rate": 4.528791565287916e-05, "loss": 0.6122, "step": 6331 }, { "epoch": 0.18487051472949695, "grad_norm": 0.5821028069390626, "learning_rate": 4.528629359286294e-05, "loss": 0.6273, "step": 6332 }, { "epoch": 0.1848997109573443, "grad_norm": 0.5899071355197757, "learning_rate": 4.528467153284672e-05, "loss": 0.6603, "step": 6333 }, { "epoch": 0.18492890718519167, "grad_norm": 0.602121867897929, "learning_rate": 4.5283049472830495e-05, "loss": 0.7154, "step": 6334 }, { "epoch": 0.18495810341303903, "grad_norm": 0.610664949839811, "learning_rate": 4.528142741281428e-05, "loss": 0.739, "step": 6335 }, { "epoch": 0.1849872996408864, "grad_norm": 0.5700797874011619, "learning_rate": 4.527980535279805e-05, "loss": 0.6376, "step": 6336 }, { "epoch": 0.18501649586873375, "grad_norm": 0.5623686063029546, "learning_rate": 4.5278183292781834e-05, "loss": 0.6381, "step": 6337 }, { "epoch": 0.18504569209658112, "grad_norm": 0.732463122708172, "learning_rate": 4.5276561232765616e-05, "loss": 0.6668, "step": 6338 }, { "epoch": 0.18507488832442848, "grad_norm": 0.5928193553431144, "learning_rate": 4.527493917274939e-05, "loss": 0.6555, "step": 6339 }, { "epoch": 0.18510408455227584, "grad_norm": 0.5403073413661175, "learning_rate": 4.527331711273317e-05, "loss": 0.6194, "step": 6340 }, { "epoch": 0.1851332807801232, "grad_norm": 0.5770711327371189, "learning_rate": 4.527169505271695e-05, "loss": 0.6482, "step": 6341 }, { "epoch": 0.18516247700797056, "grad_norm": 0.632491905656265, "learning_rate": 4.527007299270073e-05, "loss": 0.7186, "step": 6342 }, { "epoch": 0.18519167323581792, "grad_norm": 0.605809751423566, "learning_rate": 4.526845093268451e-05, "loss": 0.7199, "step": 6343 }, { "epoch": 0.18522086946366528, "grad_norm": 0.6567436997907083, "learning_rate": 4.526682887266829e-05, "loss": 0.7406, "step": 6344 }, { "epoch": 0.18525006569151264, "grad_norm": 0.5826387415460728, "learning_rate": 4.5265206812652074e-05, "loss": 0.6439, "step": 6345 }, { "epoch": 0.18527926191936, "grad_norm": 0.6008528804140039, "learning_rate": 4.526358475263585e-05, "loss": 0.6549, "step": 6346 }, { "epoch": 0.1853084581472074, "grad_norm": 0.6095463071927686, "learning_rate": 4.526196269261963e-05, "loss": 0.6283, "step": 6347 }, { "epoch": 0.18533765437505476, "grad_norm": 0.5646381563396347, "learning_rate": 4.526034063260341e-05, "loss": 0.6729, "step": 6348 }, { "epoch": 0.18536685060290212, "grad_norm": 0.584888350402422, "learning_rate": 4.525871857258719e-05, "loss": 0.6693, "step": 6349 }, { "epoch": 0.18539604683074948, "grad_norm": 0.5958973034464234, "learning_rate": 4.525709651257097e-05, "loss": 0.6708, "step": 6350 }, { "epoch": 0.18542524305859684, "grad_norm": 0.5888163274290827, "learning_rate": 4.5255474452554745e-05, "loss": 0.7023, "step": 6351 }, { "epoch": 0.1854544392864442, "grad_norm": 0.613723453759616, "learning_rate": 4.5253852392538526e-05, "loss": 0.6863, "step": 6352 }, { "epoch": 0.18548363551429156, "grad_norm": 0.5681978952585027, "learning_rate": 4.525223033252231e-05, "loss": 0.6656, "step": 6353 }, { "epoch": 0.18551283174213892, "grad_norm": 0.5924102383820303, "learning_rate": 4.525060827250608e-05, "loss": 0.6594, "step": 6354 }, { "epoch": 0.18554202796998628, "grad_norm": 0.5814357162822444, "learning_rate": 4.5248986212489865e-05, "loss": 0.6367, "step": 6355 }, { "epoch": 0.18557122419783365, "grad_norm": 0.5711937400161385, "learning_rate": 4.524736415247364e-05, "loss": 0.6552, "step": 6356 }, { "epoch": 0.185600420425681, "grad_norm": 0.5398320482968425, "learning_rate": 4.524574209245742e-05, "loss": 0.5734, "step": 6357 }, { "epoch": 0.18562961665352837, "grad_norm": 0.6061735641773943, "learning_rate": 4.5244120032441203e-05, "loss": 0.683, "step": 6358 }, { "epoch": 0.18565881288137573, "grad_norm": 0.6444783602929027, "learning_rate": 4.524249797242498e-05, "loss": 0.7514, "step": 6359 }, { "epoch": 0.1856880091092231, "grad_norm": 0.5688994112895912, "learning_rate": 4.524087591240876e-05, "loss": 0.6393, "step": 6360 }, { "epoch": 0.18571720533707045, "grad_norm": 0.5657487993822722, "learning_rate": 4.5239253852392535e-05, "loss": 0.6602, "step": 6361 }, { "epoch": 0.1857464015649178, "grad_norm": 0.604791880708441, "learning_rate": 4.5237631792376324e-05, "loss": 0.6907, "step": 6362 }, { "epoch": 0.18577559779276517, "grad_norm": 0.5613415266093216, "learning_rate": 4.52360097323601e-05, "loss": 0.5737, "step": 6363 }, { "epoch": 0.18580479402061253, "grad_norm": 0.6303965451815742, "learning_rate": 4.523438767234388e-05, "loss": 0.7294, "step": 6364 }, { "epoch": 0.1858339902484599, "grad_norm": 0.6770534309870778, "learning_rate": 4.523276561232766e-05, "loss": 0.7575, "step": 6365 }, { "epoch": 0.18586318647630726, "grad_norm": 0.624610584162177, "learning_rate": 4.523114355231144e-05, "loss": 0.7561, "step": 6366 }, { "epoch": 0.18589238270415462, "grad_norm": 0.600468579592401, "learning_rate": 4.522952149229522e-05, "loss": 0.711, "step": 6367 }, { "epoch": 0.18592157893200198, "grad_norm": 0.5822088533568363, "learning_rate": 4.5227899432278994e-05, "loss": 0.6942, "step": 6368 }, { "epoch": 0.18595077515984934, "grad_norm": 0.5878734575066831, "learning_rate": 4.5226277372262776e-05, "loss": 0.7275, "step": 6369 }, { "epoch": 0.1859799713876967, "grad_norm": 0.5493444922539269, "learning_rate": 4.522465531224656e-05, "loss": 0.5244, "step": 6370 }, { "epoch": 0.18600916761554406, "grad_norm": 0.589405586061179, "learning_rate": 4.522303325223033e-05, "loss": 0.6992, "step": 6371 }, { "epoch": 0.18603836384339142, "grad_norm": 0.5594017391140524, "learning_rate": 4.5221411192214114e-05, "loss": 0.6161, "step": 6372 }, { "epoch": 0.18606756007123879, "grad_norm": 0.5342352748455022, "learning_rate": 4.5219789132197896e-05, "loss": 0.5914, "step": 6373 }, { "epoch": 0.18609675629908615, "grad_norm": 0.6120271670420414, "learning_rate": 4.521816707218167e-05, "loss": 0.6948, "step": 6374 }, { "epoch": 0.1861259525269335, "grad_norm": 0.6210684610666273, "learning_rate": 4.521654501216545e-05, "loss": 0.7334, "step": 6375 }, { "epoch": 0.18615514875478087, "grad_norm": 0.569014191829966, "learning_rate": 4.521492295214923e-05, "loss": 0.62, "step": 6376 }, { "epoch": 0.18618434498262826, "grad_norm": 0.5729383913859006, "learning_rate": 4.521330089213301e-05, "loss": 0.6066, "step": 6377 }, { "epoch": 0.18621354121047562, "grad_norm": 0.5763671191601802, "learning_rate": 4.521167883211679e-05, "loss": 0.6664, "step": 6378 }, { "epoch": 0.18624273743832298, "grad_norm": 0.5910399758992464, "learning_rate": 4.5210056772100566e-05, "loss": 0.6533, "step": 6379 }, { "epoch": 0.18627193366617034, "grad_norm": 0.6190087135494858, "learning_rate": 4.520843471208435e-05, "loss": 0.6972, "step": 6380 }, { "epoch": 0.1863011298940177, "grad_norm": 0.6177558293965417, "learning_rate": 4.520681265206813e-05, "loss": 0.7404, "step": 6381 }, { "epoch": 0.18633032612186506, "grad_norm": 0.6250992143202787, "learning_rate": 4.520519059205191e-05, "loss": 0.7689, "step": 6382 }, { "epoch": 0.18635952234971243, "grad_norm": 0.5922201873868731, "learning_rate": 4.5203568532035686e-05, "loss": 0.6422, "step": 6383 }, { "epoch": 0.1863887185775598, "grad_norm": 0.5926305729876072, "learning_rate": 4.520194647201947e-05, "loss": 0.7019, "step": 6384 }, { "epoch": 0.18641791480540715, "grad_norm": 0.5713865181044657, "learning_rate": 4.520032441200325e-05, "loss": 0.6772, "step": 6385 }, { "epoch": 0.1864471110332545, "grad_norm": 0.5742803321657863, "learning_rate": 4.5198702351987025e-05, "loss": 0.6446, "step": 6386 }, { "epoch": 0.18647630726110187, "grad_norm": 0.5870100053031263, "learning_rate": 4.519708029197081e-05, "loss": 0.6516, "step": 6387 }, { "epoch": 0.18650550348894923, "grad_norm": 0.5828468072059023, "learning_rate": 4.519545823195458e-05, "loss": 0.7066, "step": 6388 }, { "epoch": 0.1865346997167966, "grad_norm": 0.7171798992217386, "learning_rate": 4.5193836171938363e-05, "loss": 0.7207, "step": 6389 }, { "epoch": 0.18656389594464395, "grad_norm": 0.5674931744065669, "learning_rate": 4.5192214111922145e-05, "loss": 0.7059, "step": 6390 }, { "epoch": 0.18659309217249131, "grad_norm": 0.5517230372970846, "learning_rate": 4.519059205190592e-05, "loss": 0.6635, "step": 6391 }, { "epoch": 0.18662228840033868, "grad_norm": 0.566236908062789, "learning_rate": 4.51889699918897e-05, "loss": 0.6252, "step": 6392 }, { "epoch": 0.18665148462818604, "grad_norm": 0.5148631115262041, "learning_rate": 4.5187347931873484e-05, "loss": 0.5424, "step": 6393 }, { "epoch": 0.1866806808560334, "grad_norm": 0.5518476270746049, "learning_rate": 4.518572587185726e-05, "loss": 0.6403, "step": 6394 }, { "epoch": 0.18670987708388076, "grad_norm": 0.5845872014207254, "learning_rate": 4.518410381184104e-05, "loss": 0.6649, "step": 6395 }, { "epoch": 0.18673907331172812, "grad_norm": 0.5628671991761317, "learning_rate": 4.5182481751824815e-05, "loss": 0.593, "step": 6396 }, { "epoch": 0.18676826953957548, "grad_norm": 0.5884957936246284, "learning_rate": 4.51808596918086e-05, "loss": 0.7139, "step": 6397 }, { "epoch": 0.18679746576742284, "grad_norm": 0.5733177668898671, "learning_rate": 4.517923763179238e-05, "loss": 0.6707, "step": 6398 }, { "epoch": 0.1868266619952702, "grad_norm": 0.6072463225332789, "learning_rate": 4.5177615571776154e-05, "loss": 0.7241, "step": 6399 }, { "epoch": 0.18685585822311757, "grad_norm": 0.5776957926359214, "learning_rate": 4.517599351175994e-05, "loss": 0.6582, "step": 6400 }, { "epoch": 0.18688505445096493, "grad_norm": 0.5744310531226959, "learning_rate": 4.517437145174372e-05, "loss": 0.596, "step": 6401 }, { "epoch": 0.1869142506788123, "grad_norm": 0.5724153940354305, "learning_rate": 4.51727493917275e-05, "loss": 0.6474, "step": 6402 }, { "epoch": 0.18694344690665965, "grad_norm": 0.5751183020170264, "learning_rate": 4.5171127331711274e-05, "loss": 0.6517, "step": 6403 }, { "epoch": 0.186972643134507, "grad_norm": 0.5710259251545068, "learning_rate": 4.5169505271695056e-05, "loss": 0.6253, "step": 6404 }, { "epoch": 0.18700183936235437, "grad_norm": 0.5697458370376964, "learning_rate": 4.516788321167884e-05, "loss": 0.6842, "step": 6405 }, { "epoch": 0.18703103559020173, "grad_norm": 0.6881339971798635, "learning_rate": 4.516626115166261e-05, "loss": 0.6529, "step": 6406 }, { "epoch": 0.18706023181804912, "grad_norm": 0.5952458065317264, "learning_rate": 4.5164639091646395e-05, "loss": 0.6401, "step": 6407 }, { "epoch": 0.18708942804589648, "grad_norm": 0.6518457868937692, "learning_rate": 4.516301703163017e-05, "loss": 0.7364, "step": 6408 }, { "epoch": 0.18711862427374384, "grad_norm": 0.6707607705502722, "learning_rate": 4.516139497161395e-05, "loss": 0.7259, "step": 6409 }, { "epoch": 0.1871478205015912, "grad_norm": 0.5894725062894857, "learning_rate": 4.515977291159773e-05, "loss": 0.6521, "step": 6410 }, { "epoch": 0.18717701672943857, "grad_norm": 0.5719390250029219, "learning_rate": 4.515815085158151e-05, "loss": 0.6356, "step": 6411 }, { "epoch": 0.18720621295728593, "grad_norm": 0.6999785034608825, "learning_rate": 4.515652879156529e-05, "loss": 0.7532, "step": 6412 }, { "epoch": 0.1872354091851333, "grad_norm": 0.5971367712676192, "learning_rate": 4.5154906731549065e-05, "loss": 0.6489, "step": 6413 }, { "epoch": 0.18726460541298065, "grad_norm": 0.6326859283440955, "learning_rate": 4.5153284671532847e-05, "loss": 0.7041, "step": 6414 }, { "epoch": 0.187293801640828, "grad_norm": 0.6971605491736094, "learning_rate": 4.515166261151663e-05, "loss": 0.8055, "step": 6415 }, { "epoch": 0.18732299786867537, "grad_norm": 0.6338310808764347, "learning_rate": 4.51500405515004e-05, "loss": 0.7162, "step": 6416 }, { "epoch": 0.18735219409652273, "grad_norm": 0.635017263266989, "learning_rate": 4.5148418491484185e-05, "loss": 0.7245, "step": 6417 }, { "epoch": 0.1873813903243701, "grad_norm": 0.5991968876457086, "learning_rate": 4.514679643146797e-05, "loss": 0.6565, "step": 6418 }, { "epoch": 0.18741058655221746, "grad_norm": 0.6075107670413732, "learning_rate": 4.514517437145175e-05, "loss": 0.7512, "step": 6419 }, { "epoch": 0.18743978278006482, "grad_norm": 0.6260107978627768, "learning_rate": 4.514355231143553e-05, "loss": 0.7297, "step": 6420 }, { "epoch": 0.18746897900791218, "grad_norm": 0.5649188131027402, "learning_rate": 4.5141930251419305e-05, "loss": 0.636, "step": 6421 }, { "epoch": 0.18749817523575954, "grad_norm": 0.6280845744200265, "learning_rate": 4.514030819140309e-05, "loss": 0.7544, "step": 6422 }, { "epoch": 0.1875273714636069, "grad_norm": 0.6399645788319176, "learning_rate": 4.513868613138686e-05, "loss": 0.7147, "step": 6423 }, { "epoch": 0.18755656769145426, "grad_norm": 0.5888887526646439, "learning_rate": 4.5137064071370644e-05, "loss": 0.6758, "step": 6424 }, { "epoch": 0.18758576391930162, "grad_norm": 0.6402170263221276, "learning_rate": 4.5135442011354426e-05, "loss": 0.7348, "step": 6425 }, { "epoch": 0.18761496014714898, "grad_norm": 0.556725585502649, "learning_rate": 4.51338199513382e-05, "loss": 0.6244, "step": 6426 }, { "epoch": 0.18764415637499635, "grad_norm": 0.6533299817194276, "learning_rate": 4.513219789132198e-05, "loss": 0.8016, "step": 6427 }, { "epoch": 0.1876733526028437, "grad_norm": 0.6026000860599467, "learning_rate": 4.513057583130576e-05, "loss": 0.7103, "step": 6428 }, { "epoch": 0.18770254883069107, "grad_norm": 0.607014706890029, "learning_rate": 4.512895377128954e-05, "loss": 0.7525, "step": 6429 }, { "epoch": 0.18773174505853843, "grad_norm": 0.5853692640465599, "learning_rate": 4.512733171127332e-05, "loss": 0.7078, "step": 6430 }, { "epoch": 0.1877609412863858, "grad_norm": 0.5730640346715836, "learning_rate": 4.5125709651257096e-05, "loss": 0.6767, "step": 6431 }, { "epoch": 0.18779013751423315, "grad_norm": 0.5470041499664519, "learning_rate": 4.512408759124088e-05, "loss": 0.5929, "step": 6432 }, { "epoch": 0.1878193337420805, "grad_norm": 0.5797933738823354, "learning_rate": 4.512246553122465e-05, "loss": 0.682, "step": 6433 }, { "epoch": 0.18784852996992787, "grad_norm": 0.5401280395081229, "learning_rate": 4.5120843471208434e-05, "loss": 0.5818, "step": 6434 }, { "epoch": 0.18787772619777524, "grad_norm": 0.5888415495725509, "learning_rate": 4.5119221411192216e-05, "loss": 0.6398, "step": 6435 }, { "epoch": 0.1879069224256226, "grad_norm": 0.569379168232211, "learning_rate": 4.511759935117599e-05, "loss": 0.6473, "step": 6436 }, { "epoch": 0.18793611865346996, "grad_norm": 0.5969828150179173, "learning_rate": 4.511597729115978e-05, "loss": 0.6969, "step": 6437 }, { "epoch": 0.18796531488131735, "grad_norm": 0.5527227882776862, "learning_rate": 4.5114355231143555e-05, "loss": 0.6353, "step": 6438 }, { "epoch": 0.1879945111091647, "grad_norm": 0.6276738339165657, "learning_rate": 4.5112733171127336e-05, "loss": 0.7775, "step": 6439 }, { "epoch": 0.18802370733701207, "grad_norm": 0.5351178784731843, "learning_rate": 4.511111111111112e-05, "loss": 0.57, "step": 6440 }, { "epoch": 0.18805290356485943, "grad_norm": 0.5784673065522402, "learning_rate": 4.510948905109489e-05, "loss": 0.6773, "step": 6441 }, { "epoch": 0.1880820997927068, "grad_norm": 0.5706738960732922, "learning_rate": 4.5107866991078675e-05, "loss": 0.6442, "step": 6442 }, { "epoch": 0.18811129602055415, "grad_norm": 0.5692086179655272, "learning_rate": 4.510624493106245e-05, "loss": 0.6796, "step": 6443 }, { "epoch": 0.18814049224840151, "grad_norm": 0.8583772589890782, "learning_rate": 4.510462287104623e-05, "loss": 0.8467, "step": 6444 }, { "epoch": 0.18816968847624888, "grad_norm": 0.5426596501241634, "learning_rate": 4.5103000811030013e-05, "loss": 0.6106, "step": 6445 }, { "epoch": 0.18819888470409624, "grad_norm": 0.5702576691053086, "learning_rate": 4.510137875101379e-05, "loss": 0.622, "step": 6446 }, { "epoch": 0.1882280809319436, "grad_norm": 0.6580383637342876, "learning_rate": 4.509975669099757e-05, "loss": 0.6526, "step": 6447 }, { "epoch": 0.18825727715979096, "grad_norm": 0.6148223219530322, "learning_rate": 4.5098134630981345e-05, "loss": 0.5949, "step": 6448 }, { "epoch": 0.18828647338763832, "grad_norm": 0.5593368543172996, "learning_rate": 4.509651257096513e-05, "loss": 0.6746, "step": 6449 }, { "epoch": 0.18831566961548568, "grad_norm": 0.616534341125033, "learning_rate": 4.509489051094891e-05, "loss": 0.7574, "step": 6450 }, { "epoch": 0.18834486584333304, "grad_norm": 0.5607662616772723, "learning_rate": 4.5093268450932684e-05, "loss": 0.6617, "step": 6451 }, { "epoch": 0.1883740620711804, "grad_norm": 0.6130049529035683, "learning_rate": 4.5091646390916465e-05, "loss": 0.6583, "step": 6452 }, { "epoch": 0.18840325829902776, "grad_norm": 0.6264457080347227, "learning_rate": 4.509002433090024e-05, "loss": 0.7493, "step": 6453 }, { "epoch": 0.18843245452687513, "grad_norm": 0.5716471802960347, "learning_rate": 4.508840227088402e-05, "loss": 0.6559, "step": 6454 }, { "epoch": 0.1884616507547225, "grad_norm": 0.6112172509498546, "learning_rate": 4.5086780210867804e-05, "loss": 0.6797, "step": 6455 }, { "epoch": 0.18849084698256985, "grad_norm": 0.6183442250601043, "learning_rate": 4.5085158150851586e-05, "loss": 0.7026, "step": 6456 }, { "epoch": 0.1885200432104172, "grad_norm": 0.6009550456851079, "learning_rate": 4.508353609083537e-05, "loss": 0.7236, "step": 6457 }, { "epoch": 0.18854923943826457, "grad_norm": 0.5896037174531015, "learning_rate": 4.508191403081914e-05, "loss": 0.6518, "step": 6458 }, { "epoch": 0.18857843566611193, "grad_norm": 0.5923997907246096, "learning_rate": 4.5080291970802924e-05, "loss": 0.6706, "step": 6459 }, { "epoch": 0.1886076318939593, "grad_norm": 0.5981912324008621, "learning_rate": 4.5078669910786706e-05, "loss": 0.6609, "step": 6460 }, { "epoch": 0.18863682812180665, "grad_norm": 0.6657110634253122, "learning_rate": 4.507704785077048e-05, "loss": 0.7171, "step": 6461 }, { "epoch": 0.18866602434965402, "grad_norm": 0.5938910722681796, "learning_rate": 4.507542579075426e-05, "loss": 0.6547, "step": 6462 }, { "epoch": 0.18869522057750138, "grad_norm": 0.6291054349087384, "learning_rate": 4.507380373073804e-05, "loss": 0.717, "step": 6463 }, { "epoch": 0.18872441680534874, "grad_norm": 0.6096544838981558, "learning_rate": 4.507218167072182e-05, "loss": 0.715, "step": 6464 }, { "epoch": 0.1887536130331961, "grad_norm": 0.5684741165730677, "learning_rate": 4.50705596107056e-05, "loss": 0.648, "step": 6465 }, { "epoch": 0.18878280926104346, "grad_norm": 0.5819417896582579, "learning_rate": 4.5068937550689376e-05, "loss": 0.5742, "step": 6466 }, { "epoch": 0.18881200548889082, "grad_norm": 0.5650395109970996, "learning_rate": 4.506731549067316e-05, "loss": 0.6196, "step": 6467 }, { "epoch": 0.1888412017167382, "grad_norm": 0.6117179352527427, "learning_rate": 4.506569343065693e-05, "loss": 0.661, "step": 6468 }, { "epoch": 0.18887039794458557, "grad_norm": 0.608004569608005, "learning_rate": 4.5064071370640715e-05, "loss": 0.7155, "step": 6469 }, { "epoch": 0.18889959417243293, "grad_norm": 0.5645581099267283, "learning_rate": 4.5062449310624496e-05, "loss": 0.6457, "step": 6470 }, { "epoch": 0.1889287904002803, "grad_norm": 0.559665460208897, "learning_rate": 4.506082725060827e-05, "loss": 0.6798, "step": 6471 }, { "epoch": 0.18895798662812766, "grad_norm": 0.5663674705141695, "learning_rate": 4.505920519059205e-05, "loss": 0.6536, "step": 6472 }, { "epoch": 0.18898718285597502, "grad_norm": 0.5866460679228818, "learning_rate": 4.505758313057583e-05, "loss": 0.6729, "step": 6473 }, { "epoch": 0.18901637908382238, "grad_norm": 0.5178689896263219, "learning_rate": 4.505596107055961e-05, "loss": 0.5407, "step": 6474 }, { "epoch": 0.18904557531166974, "grad_norm": 0.624707707086516, "learning_rate": 4.505433901054339e-05, "loss": 0.725, "step": 6475 }, { "epoch": 0.1890747715395171, "grad_norm": 0.5959806557422422, "learning_rate": 4.5052716950527173e-05, "loss": 0.6385, "step": 6476 }, { "epoch": 0.18910396776736446, "grad_norm": 0.6104241727462, "learning_rate": 4.5051094890510955e-05, "loss": 0.6375, "step": 6477 }, { "epoch": 0.18913316399521182, "grad_norm": 0.6755433115078809, "learning_rate": 4.504947283049473e-05, "loss": 0.7227, "step": 6478 }, { "epoch": 0.18916236022305918, "grad_norm": 0.6044434932969718, "learning_rate": 4.504785077047851e-05, "loss": 0.6987, "step": 6479 }, { "epoch": 0.18919155645090655, "grad_norm": 0.6199718605691077, "learning_rate": 4.5046228710462294e-05, "loss": 0.682, "step": 6480 }, { "epoch": 0.1892207526787539, "grad_norm": 0.597815683635009, "learning_rate": 4.504460665044607e-05, "loss": 0.7068, "step": 6481 }, { "epoch": 0.18924994890660127, "grad_norm": 0.5894379044212165, "learning_rate": 4.504298459042985e-05, "loss": 0.6765, "step": 6482 }, { "epoch": 0.18927914513444863, "grad_norm": 0.6443577058567381, "learning_rate": 4.5041362530413625e-05, "loss": 0.7118, "step": 6483 }, { "epoch": 0.189308341362296, "grad_norm": 0.5912266881729987, "learning_rate": 4.503974047039741e-05, "loss": 0.6686, "step": 6484 }, { "epoch": 0.18933753759014335, "grad_norm": 0.5751858602682574, "learning_rate": 4.503811841038119e-05, "loss": 0.6547, "step": 6485 }, { "epoch": 0.1893667338179907, "grad_norm": 0.5550304460447059, "learning_rate": 4.5036496350364964e-05, "loss": 0.6095, "step": 6486 }, { "epoch": 0.18939593004583807, "grad_norm": 0.592878497232499, "learning_rate": 4.5034874290348746e-05, "loss": 0.6345, "step": 6487 }, { "epoch": 0.18942512627368543, "grad_norm": 0.6657918570356178, "learning_rate": 4.503325223033252e-05, "loss": 0.7836, "step": 6488 }, { "epoch": 0.1894543225015328, "grad_norm": 0.6255684146995402, "learning_rate": 4.50316301703163e-05, "loss": 0.6741, "step": 6489 }, { "epoch": 0.18948351872938016, "grad_norm": 0.6199426815072652, "learning_rate": 4.5030008110300084e-05, "loss": 0.7255, "step": 6490 }, { "epoch": 0.18951271495722752, "grad_norm": 0.5625299930750152, "learning_rate": 4.502838605028386e-05, "loss": 0.7203, "step": 6491 }, { "epoch": 0.18954191118507488, "grad_norm": 0.5725201007146218, "learning_rate": 4.502676399026764e-05, "loss": 0.6714, "step": 6492 }, { "epoch": 0.18957110741292224, "grad_norm": 0.6537331984897766, "learning_rate": 4.5025141930251416e-05, "loss": 0.7527, "step": 6493 }, { "epoch": 0.1896003036407696, "grad_norm": 0.5712169893053548, "learning_rate": 4.5023519870235205e-05, "loss": 0.7183, "step": 6494 }, { "epoch": 0.18962949986861696, "grad_norm": 0.5337281642150391, "learning_rate": 4.502189781021898e-05, "loss": 0.6077, "step": 6495 }, { "epoch": 0.18965869609646432, "grad_norm": 0.5835793985690655, "learning_rate": 4.502027575020276e-05, "loss": 0.6454, "step": 6496 }, { "epoch": 0.18968789232431169, "grad_norm": 0.5272811301097557, "learning_rate": 4.501865369018654e-05, "loss": 0.5687, "step": 6497 }, { "epoch": 0.18971708855215907, "grad_norm": 0.5923382547560925, "learning_rate": 4.501703163017032e-05, "loss": 0.6928, "step": 6498 }, { "epoch": 0.18974628478000644, "grad_norm": 0.5861165700455214, "learning_rate": 4.50154095701541e-05, "loss": 0.6049, "step": 6499 }, { "epoch": 0.1897754810078538, "grad_norm": 0.5714042715515869, "learning_rate": 4.5013787510137875e-05, "loss": 0.6306, "step": 6500 }, { "epoch": 0.18980467723570116, "grad_norm": 0.6459872387784389, "learning_rate": 4.5012165450121657e-05, "loss": 0.6236, "step": 6501 }, { "epoch": 0.18983387346354852, "grad_norm": 0.5701168118309043, "learning_rate": 4.501054339010544e-05, "loss": 0.6367, "step": 6502 }, { "epoch": 0.18986306969139588, "grad_norm": 0.7399226137782962, "learning_rate": 4.500892133008921e-05, "loss": 0.7478, "step": 6503 }, { "epoch": 0.18989226591924324, "grad_norm": 0.5703408192991226, "learning_rate": 4.5007299270072995e-05, "loss": 0.6785, "step": 6504 }, { "epoch": 0.1899214621470906, "grad_norm": 0.6374555849535096, "learning_rate": 4.500567721005678e-05, "loss": 0.7496, "step": 6505 }, { "epoch": 0.18995065837493796, "grad_norm": 0.6312245288734178, "learning_rate": 4.500405515004055e-05, "loss": 0.7506, "step": 6506 }, { "epoch": 0.18997985460278533, "grad_norm": 0.5685853842171086, "learning_rate": 4.5002433090024334e-05, "loss": 0.6446, "step": 6507 }, { "epoch": 0.1900090508306327, "grad_norm": 0.5425926194224551, "learning_rate": 4.500081103000811e-05, "loss": 0.6373, "step": 6508 }, { "epoch": 0.19003824705848005, "grad_norm": 0.5242398780400406, "learning_rate": 4.499918896999189e-05, "loss": 0.5739, "step": 6509 }, { "epoch": 0.1900674432863274, "grad_norm": 0.590412904156673, "learning_rate": 4.499756690997567e-05, "loss": 0.6599, "step": 6510 }, { "epoch": 0.19009663951417477, "grad_norm": 0.6270177654483324, "learning_rate": 4.499594484995945e-05, "loss": 0.6703, "step": 6511 }, { "epoch": 0.19012583574202213, "grad_norm": 0.6166105934610411, "learning_rate": 4.499432278994323e-05, "loss": 0.7383, "step": 6512 }, { "epoch": 0.1901550319698695, "grad_norm": 0.5956084785284842, "learning_rate": 4.499270072992701e-05, "loss": 0.7262, "step": 6513 }, { "epoch": 0.19018422819771685, "grad_norm": 0.6035802203556959, "learning_rate": 4.499107866991079e-05, "loss": 0.6845, "step": 6514 }, { "epoch": 0.19021342442556421, "grad_norm": 0.5659118918392166, "learning_rate": 4.498945660989457e-05, "loss": 0.6613, "step": 6515 }, { "epoch": 0.19024262065341158, "grad_norm": 0.6179768798495954, "learning_rate": 4.498783454987835e-05, "loss": 0.7068, "step": 6516 }, { "epoch": 0.19027181688125894, "grad_norm": 0.6661331139630988, "learning_rate": 4.498621248986213e-05, "loss": 0.7878, "step": 6517 }, { "epoch": 0.1903010131091063, "grad_norm": 0.6329503802460557, "learning_rate": 4.4984590429845906e-05, "loss": 0.7302, "step": 6518 }, { "epoch": 0.19033020933695366, "grad_norm": 0.5600694990538557, "learning_rate": 4.498296836982969e-05, "loss": 0.6202, "step": 6519 }, { "epoch": 0.19035940556480102, "grad_norm": 0.5928533136180771, "learning_rate": 4.498134630981346e-05, "loss": 0.7096, "step": 6520 }, { "epoch": 0.19038860179264838, "grad_norm": 0.5672973987395439, "learning_rate": 4.4979724249797244e-05, "loss": 0.6484, "step": 6521 }, { "epoch": 0.19041779802049574, "grad_norm": 0.5251607732720563, "learning_rate": 4.4978102189781026e-05, "loss": 0.5796, "step": 6522 }, { "epoch": 0.1904469942483431, "grad_norm": 0.5743283657650634, "learning_rate": 4.49764801297648e-05, "loss": 0.6537, "step": 6523 }, { "epoch": 0.19047619047619047, "grad_norm": 0.5559545530076515, "learning_rate": 4.497485806974858e-05, "loss": 0.6929, "step": 6524 }, { "epoch": 0.19050538670403783, "grad_norm": 0.5614655589742249, "learning_rate": 4.4973236009732365e-05, "loss": 0.6249, "step": 6525 }, { "epoch": 0.1905345829318852, "grad_norm": 0.5950565755309815, "learning_rate": 4.497161394971614e-05, "loss": 0.6747, "step": 6526 }, { "epoch": 0.19056377915973255, "grad_norm": 0.6330943669291722, "learning_rate": 4.496999188969992e-05, "loss": 0.7298, "step": 6527 }, { "epoch": 0.19059297538757994, "grad_norm": 0.5751279419557908, "learning_rate": 4.4968369829683696e-05, "loss": 0.6284, "step": 6528 }, { "epoch": 0.1906221716154273, "grad_norm": 0.5257056125328666, "learning_rate": 4.496674776966748e-05, "loss": 0.5518, "step": 6529 }, { "epoch": 0.19065136784327466, "grad_norm": 0.5996689472477646, "learning_rate": 4.496512570965126e-05, "loss": 0.6823, "step": 6530 }, { "epoch": 0.19068056407112202, "grad_norm": 0.5940998401508553, "learning_rate": 4.4963503649635035e-05, "loss": 0.6486, "step": 6531 }, { "epoch": 0.19070976029896938, "grad_norm": 0.5469761864191784, "learning_rate": 4.4961881589618823e-05, "loss": 0.5675, "step": 6532 }, { "epoch": 0.19073895652681674, "grad_norm": 0.6318578292333897, "learning_rate": 4.49602595296026e-05, "loss": 0.6631, "step": 6533 }, { "epoch": 0.1907681527546641, "grad_norm": 0.6537204017009353, "learning_rate": 4.495863746958638e-05, "loss": 0.7649, "step": 6534 }, { "epoch": 0.19079734898251147, "grad_norm": 0.6401515347140775, "learning_rate": 4.4957015409570155e-05, "loss": 0.7402, "step": 6535 }, { "epoch": 0.19082654521035883, "grad_norm": 0.5566156877081461, "learning_rate": 4.495539334955394e-05, "loss": 0.6394, "step": 6536 }, { "epoch": 0.1908557414382062, "grad_norm": 0.5774163378403036, "learning_rate": 4.495377128953772e-05, "loss": 0.7224, "step": 6537 }, { "epoch": 0.19088493766605355, "grad_norm": 0.5904614831658656, "learning_rate": 4.4952149229521494e-05, "loss": 0.6481, "step": 6538 }, { "epoch": 0.1909141338939009, "grad_norm": 0.5432623396716254, "learning_rate": 4.4950527169505275e-05, "loss": 0.5793, "step": 6539 }, { "epoch": 0.19094333012174827, "grad_norm": 0.5964033610209111, "learning_rate": 4.494890510948905e-05, "loss": 0.7201, "step": 6540 }, { "epoch": 0.19097252634959563, "grad_norm": 0.6105559752416972, "learning_rate": 4.494728304947283e-05, "loss": 0.5865, "step": 6541 }, { "epoch": 0.191001722577443, "grad_norm": 0.5137569406801653, "learning_rate": 4.4945660989456614e-05, "loss": 0.5452, "step": 6542 }, { "epoch": 0.19103091880529036, "grad_norm": 0.5980476097157934, "learning_rate": 4.494403892944039e-05, "loss": 0.7612, "step": 6543 }, { "epoch": 0.19106011503313772, "grad_norm": 0.6148866067027725, "learning_rate": 4.494241686942417e-05, "loss": 0.6121, "step": 6544 }, { "epoch": 0.19108931126098508, "grad_norm": 0.5587882177395865, "learning_rate": 4.4940794809407946e-05, "loss": 0.7103, "step": 6545 }, { "epoch": 0.19111850748883244, "grad_norm": 0.5792553219636126, "learning_rate": 4.493917274939173e-05, "loss": 0.6828, "step": 6546 }, { "epoch": 0.1911477037166798, "grad_norm": 0.5930042491701746, "learning_rate": 4.493755068937551e-05, "loss": 0.6411, "step": 6547 }, { "epoch": 0.19117689994452716, "grad_norm": 0.5850183488194965, "learning_rate": 4.4935928629359284e-05, "loss": 0.7068, "step": 6548 }, { "epoch": 0.19120609617237452, "grad_norm": 0.6023444159723842, "learning_rate": 4.4934306569343066e-05, "loss": 0.6956, "step": 6549 }, { "epoch": 0.19123529240022188, "grad_norm": 0.6352527396972001, "learning_rate": 4.493268450932685e-05, "loss": 0.7629, "step": 6550 }, { "epoch": 0.19126448862806925, "grad_norm": 0.6087107907253917, "learning_rate": 4.493106244931063e-05, "loss": 0.7204, "step": 6551 }, { "epoch": 0.1912936848559166, "grad_norm": 0.5595452997193658, "learning_rate": 4.492944038929441e-05, "loss": 0.6465, "step": 6552 }, { "epoch": 0.19132288108376397, "grad_norm": 0.6131051299418997, "learning_rate": 4.4927818329278186e-05, "loss": 0.7153, "step": 6553 }, { "epoch": 0.19135207731161133, "grad_norm": 0.5730315803300304, "learning_rate": 4.492619626926197e-05, "loss": 0.6605, "step": 6554 }, { "epoch": 0.1913812735394587, "grad_norm": 0.5703452842776746, "learning_rate": 4.492457420924574e-05, "loss": 0.5856, "step": 6555 }, { "epoch": 0.19141046976730605, "grad_norm": 0.630034719713465, "learning_rate": 4.4922952149229525e-05, "loss": 0.7362, "step": 6556 }, { "epoch": 0.1914396659951534, "grad_norm": 0.5400825875487925, "learning_rate": 4.4921330089213306e-05, "loss": 0.5816, "step": 6557 }, { "epoch": 0.1914688622230008, "grad_norm": 0.5821826001819901, "learning_rate": 4.491970802919708e-05, "loss": 0.7013, "step": 6558 }, { "epoch": 0.19149805845084816, "grad_norm": 0.6036723450359729, "learning_rate": 4.491808596918086e-05, "loss": 0.7192, "step": 6559 }, { "epoch": 0.19152725467869552, "grad_norm": 0.5618094592292977, "learning_rate": 4.491646390916464e-05, "loss": 0.5731, "step": 6560 }, { "epoch": 0.19155645090654289, "grad_norm": 0.6137730783027354, "learning_rate": 4.491484184914842e-05, "loss": 0.6974, "step": 6561 }, { "epoch": 0.19158564713439025, "grad_norm": 0.5839896577508275, "learning_rate": 4.49132197891322e-05, "loss": 0.6591, "step": 6562 }, { "epoch": 0.1916148433622376, "grad_norm": 0.5580312047867858, "learning_rate": 4.491159772911598e-05, "loss": 0.6035, "step": 6563 }, { "epoch": 0.19164403959008497, "grad_norm": 0.5647780429996592, "learning_rate": 4.490997566909976e-05, "loss": 0.6472, "step": 6564 }, { "epoch": 0.19167323581793233, "grad_norm": 0.5894022832728028, "learning_rate": 4.4908353609083533e-05, "loss": 0.6517, "step": 6565 }, { "epoch": 0.1917024320457797, "grad_norm": 0.5764484610354284, "learning_rate": 4.4906731549067315e-05, "loss": 0.6677, "step": 6566 }, { "epoch": 0.19173162827362705, "grad_norm": 0.6249548041703067, "learning_rate": 4.49051094890511e-05, "loss": 0.6852, "step": 6567 }, { "epoch": 0.19176082450147441, "grad_norm": 0.6038626197881524, "learning_rate": 4.490348742903487e-05, "loss": 0.7414, "step": 6568 }, { "epoch": 0.19179002072932178, "grad_norm": 0.6123430338930875, "learning_rate": 4.490186536901866e-05, "loss": 0.6903, "step": 6569 }, { "epoch": 0.19181921695716914, "grad_norm": 0.5802142406510264, "learning_rate": 4.4900243309002436e-05, "loss": 0.6943, "step": 6570 }, { "epoch": 0.1918484131850165, "grad_norm": 0.6495837044220991, "learning_rate": 4.489862124898622e-05, "loss": 0.6587, "step": 6571 }, { "epoch": 0.19187760941286386, "grad_norm": 0.5556741671315684, "learning_rate": 4.489699918897e-05, "loss": 0.6183, "step": 6572 }, { "epoch": 0.19190680564071122, "grad_norm": 0.6077672838544114, "learning_rate": 4.4895377128953774e-05, "loss": 0.6789, "step": 6573 }, { "epoch": 0.19193600186855858, "grad_norm": 0.6107893668057939, "learning_rate": 4.4893755068937556e-05, "loss": 0.6562, "step": 6574 }, { "epoch": 0.19196519809640594, "grad_norm": 0.5947971168801981, "learning_rate": 4.489213300892133e-05, "loss": 0.6563, "step": 6575 }, { "epoch": 0.1919943943242533, "grad_norm": 0.613940496307331, "learning_rate": 4.489051094890511e-05, "loss": 0.7183, "step": 6576 }, { "epoch": 0.19202359055210066, "grad_norm": 0.6553019944670965, "learning_rate": 4.4888888888888894e-05, "loss": 0.7558, "step": 6577 }, { "epoch": 0.19205278677994803, "grad_norm": 0.5644878446849266, "learning_rate": 4.488726682887267e-05, "loss": 0.6538, "step": 6578 }, { "epoch": 0.1920819830077954, "grad_norm": 0.5500743274764015, "learning_rate": 4.488564476885645e-05, "loss": 0.6434, "step": 6579 }, { "epoch": 0.19211117923564275, "grad_norm": 0.6257672572963516, "learning_rate": 4.4884022708840226e-05, "loss": 0.726, "step": 6580 }, { "epoch": 0.1921403754634901, "grad_norm": 0.5961514976155742, "learning_rate": 4.488240064882401e-05, "loss": 0.646, "step": 6581 }, { "epoch": 0.19216957169133747, "grad_norm": 0.6043933538873684, "learning_rate": 4.488077858880779e-05, "loss": 0.7258, "step": 6582 }, { "epoch": 0.19219876791918483, "grad_norm": 0.5803053759018675, "learning_rate": 4.4879156528791565e-05, "loss": 0.674, "step": 6583 }, { "epoch": 0.1922279641470322, "grad_norm": 0.5972626296078325, "learning_rate": 4.4877534468775346e-05, "loss": 0.6763, "step": 6584 }, { "epoch": 0.19225716037487955, "grad_norm": 0.5683726906327689, "learning_rate": 4.487591240875912e-05, "loss": 0.6591, "step": 6585 }, { "epoch": 0.19228635660272692, "grad_norm": 0.6327358451083492, "learning_rate": 4.48742903487429e-05, "loss": 0.7319, "step": 6586 }, { "epoch": 0.19231555283057428, "grad_norm": 0.5235448688671469, "learning_rate": 4.4872668288726685e-05, "loss": 0.5474, "step": 6587 }, { "epoch": 0.19234474905842167, "grad_norm": 0.557573341802381, "learning_rate": 4.4871046228710467e-05, "loss": 0.6005, "step": 6588 }, { "epoch": 0.19237394528626903, "grad_norm": 0.672206543691357, "learning_rate": 4.486942416869425e-05, "loss": 0.7218, "step": 6589 }, { "epoch": 0.1924031415141164, "grad_norm": 0.5614730604903235, "learning_rate": 4.486780210867802e-05, "loss": 0.5966, "step": 6590 }, { "epoch": 0.19243233774196375, "grad_norm": 0.5444571622751042, "learning_rate": 4.4866180048661805e-05, "loss": 0.588, "step": 6591 }, { "epoch": 0.1924615339698111, "grad_norm": 0.5911714324380046, "learning_rate": 4.486455798864559e-05, "loss": 0.6731, "step": 6592 }, { "epoch": 0.19249073019765847, "grad_norm": 0.6582213495122963, "learning_rate": 4.486293592862936e-05, "loss": 0.71, "step": 6593 }, { "epoch": 0.19251992642550583, "grad_norm": 0.5170632469069767, "learning_rate": 4.4861313868613144e-05, "loss": 0.5493, "step": 6594 }, { "epoch": 0.1925491226533532, "grad_norm": 0.5523454460600015, "learning_rate": 4.485969180859692e-05, "loss": 0.5912, "step": 6595 }, { "epoch": 0.19257831888120056, "grad_norm": 0.6031086754794567, "learning_rate": 4.48580697485807e-05, "loss": 0.7587, "step": 6596 }, { "epoch": 0.19260751510904792, "grad_norm": 0.5772674641383315, "learning_rate": 4.485644768856448e-05, "loss": 0.7068, "step": 6597 }, { "epoch": 0.19263671133689528, "grad_norm": 0.5605769464540137, "learning_rate": 4.485482562854826e-05, "loss": 0.5722, "step": 6598 }, { "epoch": 0.19266590756474264, "grad_norm": 0.6003178765272583, "learning_rate": 4.485320356853204e-05, "loss": 0.7129, "step": 6599 }, { "epoch": 0.19269510379259, "grad_norm": 0.6041891425125385, "learning_rate": 4.4851581508515814e-05, "loss": 0.7232, "step": 6600 }, { "epoch": 0.19272430002043736, "grad_norm": 0.5714463227627685, "learning_rate": 4.4849959448499596e-05, "loss": 0.6392, "step": 6601 }, { "epoch": 0.19275349624828472, "grad_norm": 0.6486303022406752, "learning_rate": 4.484833738848338e-05, "loss": 0.7119, "step": 6602 }, { "epoch": 0.19278269247613208, "grad_norm": 0.5638327454808405, "learning_rate": 4.484671532846715e-05, "loss": 0.6498, "step": 6603 }, { "epoch": 0.19281188870397944, "grad_norm": 0.5777810816417418, "learning_rate": 4.4845093268450934e-05, "loss": 0.6619, "step": 6604 }, { "epoch": 0.1928410849318268, "grad_norm": 0.5720808843323772, "learning_rate": 4.484347120843471e-05, "loss": 0.6756, "step": 6605 }, { "epoch": 0.19287028115967417, "grad_norm": 0.6532626739059014, "learning_rate": 4.484184914841849e-05, "loss": 0.7429, "step": 6606 }, { "epoch": 0.19289947738752153, "grad_norm": 0.5578705685336665, "learning_rate": 4.484022708840227e-05, "loss": 0.6304, "step": 6607 }, { "epoch": 0.1929286736153689, "grad_norm": 0.6147322546113891, "learning_rate": 4.4838605028386054e-05, "loss": 0.7423, "step": 6608 }, { "epoch": 0.19295786984321625, "grad_norm": 0.6241525767323539, "learning_rate": 4.4836982968369836e-05, "loss": 0.7506, "step": 6609 }, { "epoch": 0.1929870660710636, "grad_norm": 0.5664396657213037, "learning_rate": 4.483536090835361e-05, "loss": 0.6645, "step": 6610 }, { "epoch": 0.19301626229891097, "grad_norm": 0.558237956964899, "learning_rate": 4.483373884833739e-05, "loss": 0.6304, "step": 6611 }, { "epoch": 0.19304545852675833, "grad_norm": 0.5829409974130805, "learning_rate": 4.483211678832117e-05, "loss": 0.7392, "step": 6612 }, { "epoch": 0.1930746547546057, "grad_norm": 0.5640177581047958, "learning_rate": 4.483049472830495e-05, "loss": 0.6128, "step": 6613 }, { "epoch": 0.19310385098245306, "grad_norm": 0.5712016719816748, "learning_rate": 4.482887266828873e-05, "loss": 0.6537, "step": 6614 }, { "epoch": 0.19313304721030042, "grad_norm": 0.597721280967499, "learning_rate": 4.4827250608272506e-05, "loss": 0.6624, "step": 6615 }, { "epoch": 0.19316224343814778, "grad_norm": 0.5899155696325225, "learning_rate": 4.482562854825629e-05, "loss": 0.6489, "step": 6616 }, { "epoch": 0.19319143966599514, "grad_norm": 0.5809423834462312, "learning_rate": 4.482400648824007e-05, "loss": 0.6176, "step": 6617 }, { "epoch": 0.1932206358938425, "grad_norm": 0.5803148688374195, "learning_rate": 4.4822384428223845e-05, "loss": 0.6397, "step": 6618 }, { "epoch": 0.1932498321216899, "grad_norm": 0.5912205620980132, "learning_rate": 4.482076236820763e-05, "loss": 0.6658, "step": 6619 }, { "epoch": 0.19327902834953725, "grad_norm": 0.5479922322252555, "learning_rate": 4.48191403081914e-05, "loss": 0.6411, "step": 6620 }, { "epoch": 0.1933082245773846, "grad_norm": 0.5786659325826968, "learning_rate": 4.4817518248175183e-05, "loss": 0.6526, "step": 6621 }, { "epoch": 0.19333742080523197, "grad_norm": 0.5787093117027143, "learning_rate": 4.4815896188158965e-05, "loss": 0.6455, "step": 6622 }, { "epoch": 0.19336661703307934, "grad_norm": 0.6123481428814439, "learning_rate": 4.481427412814274e-05, "loss": 0.7077, "step": 6623 }, { "epoch": 0.1933958132609267, "grad_norm": 0.580540756566591, "learning_rate": 4.481265206812652e-05, "loss": 0.6519, "step": 6624 }, { "epoch": 0.19342500948877406, "grad_norm": 0.6175061505768747, "learning_rate": 4.48110300081103e-05, "loss": 0.6701, "step": 6625 }, { "epoch": 0.19345420571662142, "grad_norm": 0.5997353861704314, "learning_rate": 4.4809407948094085e-05, "loss": 0.6953, "step": 6626 }, { "epoch": 0.19348340194446878, "grad_norm": 0.5756435413521612, "learning_rate": 4.480778588807786e-05, "loss": 0.6256, "step": 6627 }, { "epoch": 0.19351259817231614, "grad_norm": 0.5669991752486341, "learning_rate": 4.480616382806164e-05, "loss": 0.6181, "step": 6628 }, { "epoch": 0.1935417944001635, "grad_norm": 0.5562263685032138, "learning_rate": 4.4804541768045424e-05, "loss": 0.646, "step": 6629 }, { "epoch": 0.19357099062801086, "grad_norm": 0.6103002966758392, "learning_rate": 4.48029197080292e-05, "loss": 0.684, "step": 6630 }, { "epoch": 0.19360018685585823, "grad_norm": 0.5707255169631305, "learning_rate": 4.480129764801298e-05, "loss": 0.6719, "step": 6631 }, { "epoch": 0.1936293830837056, "grad_norm": 0.6097685233796193, "learning_rate": 4.4799675587996756e-05, "loss": 0.7038, "step": 6632 }, { "epoch": 0.19365857931155295, "grad_norm": 0.5535433331859917, "learning_rate": 4.479805352798054e-05, "loss": 0.6508, "step": 6633 }, { "epoch": 0.1936877755394003, "grad_norm": 0.6156396207641472, "learning_rate": 4.479643146796432e-05, "loss": 0.7111, "step": 6634 }, { "epoch": 0.19371697176724767, "grad_norm": 0.5612871310440432, "learning_rate": 4.4794809407948094e-05, "loss": 0.6606, "step": 6635 }, { "epoch": 0.19374616799509503, "grad_norm": 0.5336549107369593, "learning_rate": 4.4793187347931876e-05, "loss": 0.57, "step": 6636 }, { "epoch": 0.1937753642229424, "grad_norm": 0.6041993122767332, "learning_rate": 4.479156528791566e-05, "loss": 0.741, "step": 6637 }, { "epoch": 0.19380456045078975, "grad_norm": 0.5304977235740833, "learning_rate": 4.478994322789943e-05, "loss": 0.5776, "step": 6638 }, { "epoch": 0.19383375667863711, "grad_norm": 0.6489898967750171, "learning_rate": 4.4788321167883214e-05, "loss": 0.7195, "step": 6639 }, { "epoch": 0.19386295290648448, "grad_norm": 0.5817040861726591, "learning_rate": 4.478669910786699e-05, "loss": 0.651, "step": 6640 }, { "epoch": 0.19389214913433184, "grad_norm": 0.6998960405703273, "learning_rate": 4.478507704785077e-05, "loss": 0.6934, "step": 6641 }, { "epoch": 0.1939213453621792, "grad_norm": 0.7059340279542035, "learning_rate": 4.478345498783455e-05, "loss": 0.6682, "step": 6642 }, { "epoch": 0.19395054159002656, "grad_norm": 0.5776099664041247, "learning_rate": 4.478183292781833e-05, "loss": 0.6575, "step": 6643 }, { "epoch": 0.19397973781787392, "grad_norm": 0.564176392127792, "learning_rate": 4.478021086780211e-05, "loss": 0.5768, "step": 6644 }, { "epoch": 0.19400893404572128, "grad_norm": 0.60225652119866, "learning_rate": 4.477858880778589e-05, "loss": 0.6931, "step": 6645 }, { "epoch": 0.19403813027356864, "grad_norm": 0.6344954010676312, "learning_rate": 4.477696674776967e-05, "loss": 0.6976, "step": 6646 }, { "epoch": 0.194067326501416, "grad_norm": 0.5507776046144883, "learning_rate": 4.477534468775345e-05, "loss": 0.5625, "step": 6647 }, { "epoch": 0.19409652272926337, "grad_norm": 0.580206184776871, "learning_rate": 4.477372262773723e-05, "loss": 0.6716, "step": 6648 }, { "epoch": 0.19412571895711075, "grad_norm": 0.5474604846141871, "learning_rate": 4.477210056772101e-05, "loss": 0.6255, "step": 6649 }, { "epoch": 0.19415491518495812, "grad_norm": 0.6055653032888632, "learning_rate": 4.477047850770479e-05, "loss": 0.6941, "step": 6650 }, { "epoch": 0.19418411141280548, "grad_norm": 0.572510350088792, "learning_rate": 4.476885644768857e-05, "loss": 0.5934, "step": 6651 }, { "epoch": 0.19421330764065284, "grad_norm": 0.5931788304071896, "learning_rate": 4.4767234387672343e-05, "loss": 0.639, "step": 6652 }, { "epoch": 0.1942425038685002, "grad_norm": 0.6179867252440522, "learning_rate": 4.4765612327656125e-05, "loss": 0.701, "step": 6653 }, { "epoch": 0.19427170009634756, "grad_norm": 0.5351740809720356, "learning_rate": 4.476399026763991e-05, "loss": 0.5927, "step": 6654 }, { "epoch": 0.19430089632419492, "grad_norm": 0.6027952586687721, "learning_rate": 4.476236820762368e-05, "loss": 0.6974, "step": 6655 }, { "epoch": 0.19433009255204228, "grad_norm": 0.6371510062407051, "learning_rate": 4.4760746147607464e-05, "loss": 0.6865, "step": 6656 }, { "epoch": 0.19435928877988964, "grad_norm": 0.5470123287446741, "learning_rate": 4.475912408759124e-05, "loss": 0.6116, "step": 6657 }, { "epoch": 0.194388485007737, "grad_norm": 0.7748266936215994, "learning_rate": 4.475750202757502e-05, "loss": 0.5905, "step": 6658 }, { "epoch": 0.19441768123558437, "grad_norm": 0.6270840500930916, "learning_rate": 4.47558799675588e-05, "loss": 0.7327, "step": 6659 }, { "epoch": 0.19444687746343173, "grad_norm": 0.6014347895381638, "learning_rate": 4.475425790754258e-05, "loss": 0.7317, "step": 6660 }, { "epoch": 0.1944760736912791, "grad_norm": 0.6071334084117651, "learning_rate": 4.475263584752636e-05, "loss": 0.6678, "step": 6661 }, { "epoch": 0.19450526991912645, "grad_norm": 0.6546033480732559, "learning_rate": 4.475101378751014e-05, "loss": 0.7314, "step": 6662 }, { "epoch": 0.1945344661469738, "grad_norm": 0.5842840331595024, "learning_rate": 4.4749391727493916e-05, "loss": 0.6988, "step": 6663 }, { "epoch": 0.19456366237482117, "grad_norm": 0.5125713107280875, "learning_rate": 4.4747769667477704e-05, "loss": 0.5782, "step": 6664 }, { "epoch": 0.19459285860266853, "grad_norm": 0.5634424774491144, "learning_rate": 4.474614760746148e-05, "loss": 0.6376, "step": 6665 }, { "epoch": 0.1946220548305159, "grad_norm": 0.6121844500119563, "learning_rate": 4.474452554744526e-05, "loss": 0.7572, "step": 6666 }, { "epoch": 0.19465125105836326, "grad_norm": 0.6002137302581895, "learning_rate": 4.4742903487429036e-05, "loss": 0.7158, "step": 6667 }, { "epoch": 0.19468044728621062, "grad_norm": 0.6435280852015904, "learning_rate": 4.474128142741282e-05, "loss": 0.6574, "step": 6668 }, { "epoch": 0.19470964351405798, "grad_norm": 0.5590997634608988, "learning_rate": 4.47396593673966e-05, "loss": 0.6057, "step": 6669 }, { "epoch": 0.19473883974190534, "grad_norm": 0.5598204115714588, "learning_rate": 4.4738037307380375e-05, "loss": 0.65, "step": 6670 }, { "epoch": 0.1947680359697527, "grad_norm": 0.6152251366284436, "learning_rate": 4.4736415247364156e-05, "loss": 0.7168, "step": 6671 }, { "epoch": 0.19479723219760006, "grad_norm": 0.6021154977877291, "learning_rate": 4.473479318734793e-05, "loss": 0.717, "step": 6672 }, { "epoch": 0.19482642842544742, "grad_norm": 0.5680389709386443, "learning_rate": 4.473317112733171e-05, "loss": 0.6682, "step": 6673 }, { "epoch": 0.19485562465329478, "grad_norm": 0.5933587003291257, "learning_rate": 4.4731549067315495e-05, "loss": 0.7055, "step": 6674 }, { "epoch": 0.19488482088114215, "grad_norm": 0.5450768864406532, "learning_rate": 4.472992700729927e-05, "loss": 0.5753, "step": 6675 }, { "epoch": 0.1949140171089895, "grad_norm": 0.6170295661589255, "learning_rate": 4.472830494728305e-05, "loss": 0.7557, "step": 6676 }, { "epoch": 0.19494321333683687, "grad_norm": 0.5975419156528499, "learning_rate": 4.4726682887266827e-05, "loss": 0.6159, "step": 6677 }, { "epoch": 0.19497240956468423, "grad_norm": 0.552206342789874, "learning_rate": 4.472506082725061e-05, "loss": 0.6446, "step": 6678 }, { "epoch": 0.19500160579253162, "grad_norm": 0.6080721151886104, "learning_rate": 4.472343876723439e-05, "loss": 0.6857, "step": 6679 }, { "epoch": 0.19503080202037898, "grad_norm": 0.6179158502268773, "learning_rate": 4.4721816707218165e-05, "loss": 0.7189, "step": 6680 }, { "epoch": 0.19505999824822634, "grad_norm": 0.5337454158603496, "learning_rate": 4.472019464720195e-05, "loss": 0.5928, "step": 6681 }, { "epoch": 0.1950891944760737, "grad_norm": 0.5690544307051526, "learning_rate": 4.471857258718573e-05, "loss": 0.6391, "step": 6682 }, { "epoch": 0.19511839070392106, "grad_norm": 0.5606553916103463, "learning_rate": 4.471695052716951e-05, "loss": 0.6198, "step": 6683 }, { "epoch": 0.19514758693176842, "grad_norm": 0.5503695935614665, "learning_rate": 4.471532846715329e-05, "loss": 0.6267, "step": 6684 }, { "epoch": 0.19517678315961579, "grad_norm": 0.5729705238562421, "learning_rate": 4.471370640713707e-05, "loss": 0.6384, "step": 6685 }, { "epoch": 0.19520597938746315, "grad_norm": 0.6374783492804033, "learning_rate": 4.471208434712085e-05, "loss": 0.6477, "step": 6686 }, { "epoch": 0.1952351756153105, "grad_norm": 0.6408208383439017, "learning_rate": 4.4710462287104624e-05, "loss": 0.7089, "step": 6687 }, { "epoch": 0.19526437184315787, "grad_norm": 0.5699604777967126, "learning_rate": 4.4708840227088406e-05, "loss": 0.613, "step": 6688 }, { "epoch": 0.19529356807100523, "grad_norm": 0.5984434463567393, "learning_rate": 4.470721816707219e-05, "loss": 0.6991, "step": 6689 }, { "epoch": 0.1953227642988526, "grad_norm": 0.5488519289122233, "learning_rate": 4.470559610705596e-05, "loss": 0.6947, "step": 6690 }, { "epoch": 0.19535196052669995, "grad_norm": 0.5947411737026558, "learning_rate": 4.4703974047039744e-05, "loss": 0.7395, "step": 6691 }, { "epoch": 0.1953811567545473, "grad_norm": 0.5655941468417496, "learning_rate": 4.470235198702352e-05, "loss": 0.6592, "step": 6692 }, { "epoch": 0.19541035298239467, "grad_norm": 0.7978773779467135, "learning_rate": 4.47007299270073e-05, "loss": 0.8793, "step": 6693 }, { "epoch": 0.19543954921024204, "grad_norm": 0.5625124899196439, "learning_rate": 4.469910786699108e-05, "loss": 0.6315, "step": 6694 }, { "epoch": 0.1954687454380894, "grad_norm": 0.5460392092934048, "learning_rate": 4.469748580697486e-05, "loss": 0.6126, "step": 6695 }, { "epoch": 0.19549794166593676, "grad_norm": 0.6002831275590101, "learning_rate": 4.469586374695864e-05, "loss": 0.603, "step": 6696 }, { "epoch": 0.19552713789378412, "grad_norm": 0.5235884847312217, "learning_rate": 4.4694241686942414e-05, "loss": 0.5926, "step": 6697 }, { "epoch": 0.19555633412163148, "grad_norm": 0.5610559189369257, "learning_rate": 4.4692619626926196e-05, "loss": 0.6047, "step": 6698 }, { "epoch": 0.19558553034947884, "grad_norm": 0.616892363884413, "learning_rate": 4.469099756690998e-05, "loss": 0.6817, "step": 6699 }, { "epoch": 0.1956147265773262, "grad_norm": 0.6179079905848578, "learning_rate": 4.468937550689375e-05, "loss": 0.7097, "step": 6700 }, { "epoch": 0.19564392280517356, "grad_norm": 0.5814626190973795, "learning_rate": 4.4687753446877535e-05, "loss": 0.6684, "step": 6701 }, { "epoch": 0.19567311903302093, "grad_norm": 0.6130237923127378, "learning_rate": 4.4686131386861316e-05, "loss": 0.7553, "step": 6702 }, { "epoch": 0.1957023152608683, "grad_norm": 0.5741243600743341, "learning_rate": 4.46845093268451e-05, "loss": 0.6915, "step": 6703 }, { "epoch": 0.19573151148871565, "grad_norm": 0.6093484617796943, "learning_rate": 4.468288726682888e-05, "loss": 0.6733, "step": 6704 }, { "epoch": 0.195760707716563, "grad_norm": 0.5660214610570776, "learning_rate": 4.4681265206812655e-05, "loss": 0.6197, "step": 6705 }, { "epoch": 0.19578990394441037, "grad_norm": 0.5832649081434879, "learning_rate": 4.467964314679644e-05, "loss": 0.6244, "step": 6706 }, { "epoch": 0.19581910017225773, "grad_norm": 0.5360805336638376, "learning_rate": 4.467802108678021e-05, "loss": 0.5718, "step": 6707 }, { "epoch": 0.1958482964001051, "grad_norm": 0.5931857588191302, "learning_rate": 4.4676399026763993e-05, "loss": 0.7272, "step": 6708 }, { "epoch": 0.19587749262795248, "grad_norm": 0.5704690167841479, "learning_rate": 4.4674776966747775e-05, "loss": 0.6964, "step": 6709 }, { "epoch": 0.19590668885579984, "grad_norm": 0.5948246666390915, "learning_rate": 4.467315490673155e-05, "loss": 0.6897, "step": 6710 }, { "epoch": 0.1959358850836472, "grad_norm": 0.836086066834989, "learning_rate": 4.467153284671533e-05, "loss": 0.6983, "step": 6711 }, { "epoch": 0.19596508131149457, "grad_norm": 0.5898959166368797, "learning_rate": 4.466991078669911e-05, "loss": 0.5859, "step": 6712 }, { "epoch": 0.19599427753934193, "grad_norm": 0.55099135114458, "learning_rate": 4.466828872668289e-05, "loss": 0.625, "step": 6713 }, { "epoch": 0.1960234737671893, "grad_norm": 0.5940345993103218, "learning_rate": 4.466666666666667e-05, "loss": 0.7118, "step": 6714 }, { "epoch": 0.19605266999503665, "grad_norm": 0.544367438116517, "learning_rate": 4.4665044606650445e-05, "loss": 0.651, "step": 6715 }, { "epoch": 0.196081866222884, "grad_norm": 0.564062298562981, "learning_rate": 4.466342254663423e-05, "loss": 0.6695, "step": 6716 }, { "epoch": 0.19611106245073137, "grad_norm": 0.5143855318172552, "learning_rate": 4.4661800486618e-05, "loss": 0.5713, "step": 6717 }, { "epoch": 0.19614025867857873, "grad_norm": 0.5391266406739695, "learning_rate": 4.4660178426601784e-05, "loss": 0.6169, "step": 6718 }, { "epoch": 0.1961694549064261, "grad_norm": 0.6633015528418812, "learning_rate": 4.4658556366585566e-05, "loss": 0.719, "step": 6719 }, { "epoch": 0.19619865113427346, "grad_norm": 0.6303199431341173, "learning_rate": 4.465693430656935e-05, "loss": 0.7476, "step": 6720 }, { "epoch": 0.19622784736212082, "grad_norm": 0.5728388987268266, "learning_rate": 4.465531224655313e-05, "loss": 0.6773, "step": 6721 }, { "epoch": 0.19625704358996818, "grad_norm": 0.6171393228523837, "learning_rate": 4.4653690186536904e-05, "loss": 0.7448, "step": 6722 }, { "epoch": 0.19628623981781554, "grad_norm": 0.5331978973866286, "learning_rate": 4.4652068126520686e-05, "loss": 0.5654, "step": 6723 }, { "epoch": 0.1963154360456629, "grad_norm": 0.5922457410732577, "learning_rate": 4.465044606650447e-05, "loss": 0.7058, "step": 6724 }, { "epoch": 0.19634463227351026, "grad_norm": 0.6868584587101959, "learning_rate": 4.464882400648824e-05, "loss": 0.7446, "step": 6725 }, { "epoch": 0.19637382850135762, "grad_norm": 0.5550747845906303, "learning_rate": 4.4647201946472024e-05, "loss": 0.6558, "step": 6726 }, { "epoch": 0.19640302472920498, "grad_norm": 0.6955052556359282, "learning_rate": 4.46455798864558e-05, "loss": 0.6539, "step": 6727 }, { "epoch": 0.19643222095705234, "grad_norm": 0.595649389072116, "learning_rate": 4.464395782643958e-05, "loss": 0.6624, "step": 6728 }, { "epoch": 0.1964614171848997, "grad_norm": 0.5632086213585292, "learning_rate": 4.464233576642336e-05, "loss": 0.6818, "step": 6729 }, { "epoch": 0.19649061341274707, "grad_norm": 0.5929294468037154, "learning_rate": 4.464071370640714e-05, "loss": 0.6935, "step": 6730 }, { "epoch": 0.19651980964059443, "grad_norm": 0.5461621249033264, "learning_rate": 4.463909164639092e-05, "loss": 0.587, "step": 6731 }, { "epoch": 0.1965490058684418, "grad_norm": 0.6388629340710699, "learning_rate": 4.4637469586374695e-05, "loss": 0.7371, "step": 6732 }, { "epoch": 0.19657820209628915, "grad_norm": 0.5810756654138509, "learning_rate": 4.4635847526358476e-05, "loss": 0.6689, "step": 6733 }, { "epoch": 0.1966073983241365, "grad_norm": 0.5627887050325645, "learning_rate": 4.463422546634226e-05, "loss": 0.6251, "step": 6734 }, { "epoch": 0.19663659455198387, "grad_norm": 0.588948090421085, "learning_rate": 4.463260340632603e-05, "loss": 0.6674, "step": 6735 }, { "epoch": 0.19666579077983123, "grad_norm": 0.5619878134677635, "learning_rate": 4.4630981346309815e-05, "loss": 0.6494, "step": 6736 }, { "epoch": 0.1966949870076786, "grad_norm": 0.562192526008742, "learning_rate": 4.462935928629359e-05, "loss": 0.633, "step": 6737 }, { "epoch": 0.19672418323552596, "grad_norm": 0.5864842160038379, "learning_rate": 4.462773722627737e-05, "loss": 0.6638, "step": 6738 }, { "epoch": 0.19675337946337335, "grad_norm": 0.6345276872340285, "learning_rate": 4.4626115166261153e-05, "loss": 0.7628, "step": 6739 }, { "epoch": 0.1967825756912207, "grad_norm": 0.6125797508523544, "learning_rate": 4.4624493106244935e-05, "loss": 0.7749, "step": 6740 }, { "epoch": 0.19681177191906807, "grad_norm": 0.5775739204227552, "learning_rate": 4.462287104622872e-05, "loss": 0.6456, "step": 6741 }, { "epoch": 0.19684096814691543, "grad_norm": 0.5604372800228679, "learning_rate": 4.462124898621249e-05, "loss": 0.6325, "step": 6742 }, { "epoch": 0.1968701643747628, "grad_norm": 0.5806131539150626, "learning_rate": 4.4619626926196274e-05, "loss": 0.6999, "step": 6743 }, { "epoch": 0.19689936060261015, "grad_norm": 0.5959931710365247, "learning_rate": 4.461800486618005e-05, "loss": 0.7584, "step": 6744 }, { "epoch": 0.1969285568304575, "grad_norm": 0.5654309175392739, "learning_rate": 4.461638280616383e-05, "loss": 0.638, "step": 6745 }, { "epoch": 0.19695775305830487, "grad_norm": 0.5484510641432583, "learning_rate": 4.461476074614761e-05, "loss": 0.6193, "step": 6746 }, { "epoch": 0.19698694928615224, "grad_norm": 0.6005909320006587, "learning_rate": 4.461313868613139e-05, "loss": 0.6695, "step": 6747 }, { "epoch": 0.1970161455139996, "grad_norm": 0.5781432177940241, "learning_rate": 4.461151662611517e-05, "loss": 0.7155, "step": 6748 }, { "epoch": 0.19704534174184696, "grad_norm": 0.6383407442888855, "learning_rate": 4.460989456609895e-05, "loss": 0.6885, "step": 6749 }, { "epoch": 0.19707453796969432, "grad_norm": 0.6641198795636207, "learning_rate": 4.4608272506082726e-05, "loss": 0.7331, "step": 6750 }, { "epoch": 0.19710373419754168, "grad_norm": 0.6020424132848148, "learning_rate": 4.460665044606651e-05, "loss": 0.7487, "step": 6751 }, { "epoch": 0.19713293042538904, "grad_norm": 0.6372341684928109, "learning_rate": 4.460502838605028e-05, "loss": 0.7721, "step": 6752 }, { "epoch": 0.1971621266532364, "grad_norm": 0.5651900495181253, "learning_rate": 4.4603406326034064e-05, "loss": 0.6548, "step": 6753 }, { "epoch": 0.19719132288108376, "grad_norm": 0.5994837648373972, "learning_rate": 4.4601784266017846e-05, "loss": 0.7081, "step": 6754 }, { "epoch": 0.19722051910893112, "grad_norm": 0.6215366524016561, "learning_rate": 4.460016220600162e-05, "loss": 0.7605, "step": 6755 }, { "epoch": 0.19724971533677849, "grad_norm": 0.5941978185232925, "learning_rate": 4.45985401459854e-05, "loss": 0.7032, "step": 6756 }, { "epoch": 0.19727891156462585, "grad_norm": 0.6098363226628598, "learning_rate": 4.459691808596918e-05, "loss": 0.6487, "step": 6757 }, { "epoch": 0.1973081077924732, "grad_norm": 0.7309508365897922, "learning_rate": 4.4595296025952966e-05, "loss": 0.6931, "step": 6758 }, { "epoch": 0.19733730402032057, "grad_norm": 0.5654040377802838, "learning_rate": 4.459367396593674e-05, "loss": 0.6527, "step": 6759 }, { "epoch": 0.19736650024816793, "grad_norm": 0.5839066448038259, "learning_rate": 4.459205190592052e-05, "loss": 0.681, "step": 6760 }, { "epoch": 0.1973956964760153, "grad_norm": 0.5797742268201318, "learning_rate": 4.4590429845904305e-05, "loss": 0.6692, "step": 6761 }, { "epoch": 0.19742489270386265, "grad_norm": 0.5876642936529115, "learning_rate": 4.458880778588808e-05, "loss": 0.7005, "step": 6762 }, { "epoch": 0.19745408893171001, "grad_norm": 0.5406437218688376, "learning_rate": 4.458718572587186e-05, "loss": 0.5732, "step": 6763 }, { "epoch": 0.19748328515955738, "grad_norm": 0.5857111200371933, "learning_rate": 4.4585563665855637e-05, "loss": 0.6945, "step": 6764 }, { "epoch": 0.19751248138740474, "grad_norm": 0.64036608475807, "learning_rate": 4.458394160583942e-05, "loss": 0.6783, "step": 6765 }, { "epoch": 0.1975416776152521, "grad_norm": 0.6035611105299048, "learning_rate": 4.45823195458232e-05, "loss": 0.7475, "step": 6766 }, { "epoch": 0.19757087384309946, "grad_norm": 0.638365557612668, "learning_rate": 4.4580697485806975e-05, "loss": 0.741, "step": 6767 }, { "epoch": 0.19760007007094682, "grad_norm": 0.7127411194817098, "learning_rate": 4.457907542579076e-05, "loss": 0.6411, "step": 6768 }, { "epoch": 0.1976292662987942, "grad_norm": 0.6376558339957308, "learning_rate": 4.457745336577454e-05, "loss": 0.7058, "step": 6769 }, { "epoch": 0.19765846252664157, "grad_norm": 0.5852449496911829, "learning_rate": 4.4575831305758314e-05, "loss": 0.6339, "step": 6770 }, { "epoch": 0.19768765875448893, "grad_norm": 0.5826754468241055, "learning_rate": 4.4574209245742095e-05, "loss": 0.6873, "step": 6771 }, { "epoch": 0.1977168549823363, "grad_norm": 0.6196409590161917, "learning_rate": 4.457258718572587e-05, "loss": 0.7219, "step": 6772 }, { "epoch": 0.19774605121018365, "grad_norm": 0.6025928110097961, "learning_rate": 4.457096512570965e-05, "loss": 0.6705, "step": 6773 }, { "epoch": 0.19777524743803102, "grad_norm": 0.6158324273689543, "learning_rate": 4.4569343065693434e-05, "loss": 0.7289, "step": 6774 }, { "epoch": 0.19780444366587838, "grad_norm": 0.5502436326458537, "learning_rate": 4.456772100567721e-05, "loss": 0.5987, "step": 6775 }, { "epoch": 0.19783363989372574, "grad_norm": 0.6315598869338007, "learning_rate": 4.456609894566099e-05, "loss": 0.7745, "step": 6776 }, { "epoch": 0.1978628361215731, "grad_norm": 0.565993034547435, "learning_rate": 4.456447688564477e-05, "loss": 0.6836, "step": 6777 }, { "epoch": 0.19789203234942046, "grad_norm": 0.5868180984486205, "learning_rate": 4.4562854825628554e-05, "loss": 0.666, "step": 6778 }, { "epoch": 0.19792122857726782, "grad_norm": 0.6231445201270465, "learning_rate": 4.456123276561233e-05, "loss": 0.6703, "step": 6779 }, { "epoch": 0.19795042480511518, "grad_norm": 0.5434945653920182, "learning_rate": 4.455961070559611e-05, "loss": 0.6149, "step": 6780 }, { "epoch": 0.19797962103296254, "grad_norm": 0.5469095573975924, "learning_rate": 4.455798864557989e-05, "loss": 0.5421, "step": 6781 }, { "epoch": 0.1980088172608099, "grad_norm": 0.6023172232210217, "learning_rate": 4.455636658556367e-05, "loss": 0.6704, "step": 6782 }, { "epoch": 0.19803801348865727, "grad_norm": 0.6094956298637854, "learning_rate": 4.455474452554745e-05, "loss": 0.6938, "step": 6783 }, { "epoch": 0.19806720971650463, "grad_norm": 0.5792389401145358, "learning_rate": 4.4553122465531224e-05, "loss": 0.6293, "step": 6784 }, { "epoch": 0.198096405944352, "grad_norm": 0.7042376841264435, "learning_rate": 4.4551500405515006e-05, "loss": 0.7599, "step": 6785 }, { "epoch": 0.19812560217219935, "grad_norm": 0.5872486505848826, "learning_rate": 4.454987834549879e-05, "loss": 0.6448, "step": 6786 }, { "epoch": 0.1981547984000467, "grad_norm": 0.5827742195848035, "learning_rate": 4.454825628548256e-05, "loss": 0.6523, "step": 6787 }, { "epoch": 0.19818399462789407, "grad_norm": 0.6342528105720343, "learning_rate": 4.4546634225466345e-05, "loss": 0.7885, "step": 6788 }, { "epoch": 0.19821319085574143, "grad_norm": 0.6114044525164101, "learning_rate": 4.454501216545012e-05, "loss": 0.6975, "step": 6789 }, { "epoch": 0.1982423870835888, "grad_norm": 0.5689518871289618, "learning_rate": 4.45433901054339e-05, "loss": 0.6472, "step": 6790 }, { "epoch": 0.19827158331143616, "grad_norm": 0.5734792515522633, "learning_rate": 4.454176804541768e-05, "loss": 0.6196, "step": 6791 }, { "epoch": 0.19830077953928352, "grad_norm": 0.5723016341796272, "learning_rate": 4.454014598540146e-05, "loss": 0.6655, "step": 6792 }, { "epoch": 0.19832997576713088, "grad_norm": 0.5511276450107664, "learning_rate": 4.453852392538524e-05, "loss": 0.5998, "step": 6793 }, { "epoch": 0.19835917199497824, "grad_norm": 0.5822194187026999, "learning_rate": 4.453690186536902e-05, "loss": 0.7015, "step": 6794 }, { "epoch": 0.1983883682228256, "grad_norm": 0.6091463765746282, "learning_rate": 4.45352798053528e-05, "loss": 0.7334, "step": 6795 }, { "epoch": 0.19841756445067296, "grad_norm": 0.6185167111557919, "learning_rate": 4.4533657745336585e-05, "loss": 0.7124, "step": 6796 }, { "epoch": 0.19844676067852032, "grad_norm": 0.5468377956521975, "learning_rate": 4.453203568532036e-05, "loss": 0.5941, "step": 6797 }, { "epoch": 0.19847595690636768, "grad_norm": 0.5648672970692944, "learning_rate": 4.453041362530414e-05, "loss": 0.6715, "step": 6798 }, { "epoch": 0.19850515313421507, "grad_norm": 0.5341599716643818, "learning_rate": 4.452879156528792e-05, "loss": 0.5736, "step": 6799 }, { "epoch": 0.19853434936206243, "grad_norm": 0.6550484932307664, "learning_rate": 4.45271695052717e-05, "loss": 0.7007, "step": 6800 }, { "epoch": 0.1985635455899098, "grad_norm": 0.6114843465515629, "learning_rate": 4.452554744525548e-05, "loss": 0.7634, "step": 6801 }, { "epoch": 0.19859274181775716, "grad_norm": 0.5414173030063244, "learning_rate": 4.4523925385239255e-05, "loss": 0.5799, "step": 6802 }, { "epoch": 0.19862193804560452, "grad_norm": 0.5466377291369126, "learning_rate": 4.452230332522304e-05, "loss": 0.5782, "step": 6803 }, { "epoch": 0.19865113427345188, "grad_norm": 0.5137566342914796, "learning_rate": 4.452068126520681e-05, "loss": 0.5697, "step": 6804 }, { "epoch": 0.19868033050129924, "grad_norm": 0.6421884371001269, "learning_rate": 4.4519059205190594e-05, "loss": 0.698, "step": 6805 }, { "epoch": 0.1987095267291466, "grad_norm": 0.5790038088218116, "learning_rate": 4.4517437145174376e-05, "loss": 0.7174, "step": 6806 }, { "epoch": 0.19873872295699396, "grad_norm": 0.5594872431201597, "learning_rate": 4.451581508515815e-05, "loss": 0.6481, "step": 6807 }, { "epoch": 0.19876791918484132, "grad_norm": 0.5686316677237708, "learning_rate": 4.451419302514193e-05, "loss": 0.6854, "step": 6808 }, { "epoch": 0.19879711541268869, "grad_norm": 0.5643484189664477, "learning_rate": 4.451257096512571e-05, "loss": 0.6447, "step": 6809 }, { "epoch": 0.19882631164053605, "grad_norm": 0.5803485235831993, "learning_rate": 4.451094890510949e-05, "loss": 0.6868, "step": 6810 }, { "epoch": 0.1988555078683834, "grad_norm": 0.5981418909108062, "learning_rate": 4.450932684509327e-05, "loss": 0.6492, "step": 6811 }, { "epoch": 0.19888470409623077, "grad_norm": 0.5837345739567726, "learning_rate": 4.4507704785077046e-05, "loss": 0.6618, "step": 6812 }, { "epoch": 0.19891390032407813, "grad_norm": 0.6528209279415756, "learning_rate": 4.450608272506083e-05, "loss": 0.7638, "step": 6813 }, { "epoch": 0.1989430965519255, "grad_norm": 0.5860451007331201, "learning_rate": 4.450446066504461e-05, "loss": 0.6631, "step": 6814 }, { "epoch": 0.19897229277977285, "grad_norm": 0.5989486646153898, "learning_rate": 4.450283860502839e-05, "loss": 0.6111, "step": 6815 }, { "epoch": 0.1990014890076202, "grad_norm": 0.5884217200035198, "learning_rate": 4.450121654501217e-05, "loss": 0.6388, "step": 6816 }, { "epoch": 0.19903068523546757, "grad_norm": 0.6315697671464271, "learning_rate": 4.449959448499595e-05, "loss": 0.761, "step": 6817 }, { "epoch": 0.19905988146331494, "grad_norm": 0.5747579543792659, "learning_rate": 4.449797242497973e-05, "loss": 0.6998, "step": 6818 }, { "epoch": 0.1990890776911623, "grad_norm": 0.606990923043991, "learning_rate": 4.4496350364963505e-05, "loss": 0.7251, "step": 6819 }, { "epoch": 0.19911827391900966, "grad_norm": 0.5872991035660307, "learning_rate": 4.4494728304947286e-05, "loss": 0.6891, "step": 6820 }, { "epoch": 0.19914747014685702, "grad_norm": 0.5404112750650805, "learning_rate": 4.449310624493107e-05, "loss": 0.5955, "step": 6821 }, { "epoch": 0.19917666637470438, "grad_norm": 0.5651421560017353, "learning_rate": 4.449148418491484e-05, "loss": 0.6193, "step": 6822 }, { "epoch": 0.19920586260255174, "grad_norm": 0.5504113585739178, "learning_rate": 4.4489862124898625e-05, "loss": 0.6478, "step": 6823 }, { "epoch": 0.1992350588303991, "grad_norm": 0.5549136055550382, "learning_rate": 4.44882400648824e-05, "loss": 0.629, "step": 6824 }, { "epoch": 0.19926425505824646, "grad_norm": 0.607178248870162, "learning_rate": 4.448661800486618e-05, "loss": 0.6914, "step": 6825 }, { "epoch": 0.19929345128609383, "grad_norm": 0.6467573759114579, "learning_rate": 4.4484995944849964e-05, "loss": 0.6743, "step": 6826 }, { "epoch": 0.1993226475139412, "grad_norm": 0.6031484680196167, "learning_rate": 4.448337388483374e-05, "loss": 0.6347, "step": 6827 }, { "epoch": 0.19935184374178855, "grad_norm": 0.5860110177466391, "learning_rate": 4.448175182481752e-05, "loss": 0.6592, "step": 6828 }, { "epoch": 0.1993810399696359, "grad_norm": 0.5899458789344216, "learning_rate": 4.4480129764801295e-05, "loss": 0.7043, "step": 6829 }, { "epoch": 0.1994102361974833, "grad_norm": 0.5504361651899045, "learning_rate": 4.447850770478508e-05, "loss": 0.6402, "step": 6830 }, { "epoch": 0.19943943242533066, "grad_norm": 0.6452823935837105, "learning_rate": 4.447688564476886e-05, "loss": 0.8008, "step": 6831 }, { "epoch": 0.19946862865317802, "grad_norm": 0.5877232414307199, "learning_rate": 4.4475263584752634e-05, "loss": 0.6075, "step": 6832 }, { "epoch": 0.19949782488102538, "grad_norm": 0.6295182694651358, "learning_rate": 4.4473641524736416e-05, "loss": 0.7548, "step": 6833 }, { "epoch": 0.19952702110887274, "grad_norm": 0.6120825766170781, "learning_rate": 4.44720194647202e-05, "loss": 0.6721, "step": 6834 }, { "epoch": 0.1995562173367201, "grad_norm": 0.5484937174030695, "learning_rate": 4.447039740470398e-05, "loss": 0.5869, "step": 6835 }, { "epoch": 0.19958541356456747, "grad_norm": 0.6036013163691424, "learning_rate": 4.446877534468776e-05, "loss": 0.6688, "step": 6836 }, { "epoch": 0.19961460979241483, "grad_norm": 0.6121495531812833, "learning_rate": 4.4467153284671536e-05, "loss": 0.6978, "step": 6837 }, { "epoch": 0.1996438060202622, "grad_norm": 0.5984250719264389, "learning_rate": 4.446553122465532e-05, "loss": 0.6651, "step": 6838 }, { "epoch": 0.19967300224810955, "grad_norm": 0.5460243097594166, "learning_rate": 4.446390916463909e-05, "loss": 0.6027, "step": 6839 }, { "epoch": 0.1997021984759569, "grad_norm": 0.5860520475132901, "learning_rate": 4.4462287104622874e-05, "loss": 0.6251, "step": 6840 }, { "epoch": 0.19973139470380427, "grad_norm": 0.6350154775668555, "learning_rate": 4.4460665044606656e-05, "loss": 0.6898, "step": 6841 }, { "epoch": 0.19976059093165163, "grad_norm": 0.574198682990051, "learning_rate": 4.445904298459043e-05, "loss": 0.6215, "step": 6842 }, { "epoch": 0.199789787159499, "grad_norm": 0.5818932224094985, "learning_rate": 4.445742092457421e-05, "loss": 0.647, "step": 6843 }, { "epoch": 0.19981898338734636, "grad_norm": 0.5717824573329804, "learning_rate": 4.445579886455799e-05, "loss": 0.6172, "step": 6844 }, { "epoch": 0.19984817961519372, "grad_norm": 0.5897592977408076, "learning_rate": 4.445417680454177e-05, "loss": 0.674, "step": 6845 }, { "epoch": 0.19987737584304108, "grad_norm": 0.5744600698936126, "learning_rate": 4.445255474452555e-05, "loss": 0.6567, "step": 6846 }, { "epoch": 0.19990657207088844, "grad_norm": 0.5523168052844588, "learning_rate": 4.4450932684509326e-05, "loss": 0.5915, "step": 6847 }, { "epoch": 0.1999357682987358, "grad_norm": 0.5531061417221907, "learning_rate": 4.444931062449311e-05, "loss": 0.633, "step": 6848 }, { "epoch": 0.19996496452658316, "grad_norm": 0.5580333549057814, "learning_rate": 4.444768856447688e-05, "loss": 0.5831, "step": 6849 }, { "epoch": 0.19999416075443052, "grad_norm": 0.5958482159036311, "learning_rate": 4.4446066504460665e-05, "loss": 0.6453, "step": 6850 }, { "epoch": 0.20002335698227788, "grad_norm": 0.578652848504145, "learning_rate": 4.4444444444444447e-05, "loss": 0.5984, "step": 6851 }, { "epoch": 0.20005255321012524, "grad_norm": 0.5531775444467051, "learning_rate": 4.444282238442822e-05, "loss": 0.5791, "step": 6852 }, { "epoch": 0.2000817494379726, "grad_norm": 0.63751145849726, "learning_rate": 4.444120032441201e-05, "loss": 0.7428, "step": 6853 }, { "epoch": 0.20011094566581997, "grad_norm": 0.6457153170567597, "learning_rate": 4.4439578264395785e-05, "loss": 0.7295, "step": 6854 }, { "epoch": 0.20014014189366733, "grad_norm": 0.6165439283477273, "learning_rate": 4.443795620437957e-05, "loss": 0.781, "step": 6855 }, { "epoch": 0.2001693381215147, "grad_norm": 0.6118210774124193, "learning_rate": 4.443633414436334e-05, "loss": 0.7364, "step": 6856 }, { "epoch": 0.20019853434936205, "grad_norm": 0.5514681064093614, "learning_rate": 4.4434712084347124e-05, "loss": 0.6189, "step": 6857 }, { "epoch": 0.2002277305772094, "grad_norm": 0.5929355516896703, "learning_rate": 4.4433090024330905e-05, "loss": 0.6722, "step": 6858 }, { "epoch": 0.20025692680505677, "grad_norm": 0.5929782154443332, "learning_rate": 4.443146796431468e-05, "loss": 0.6696, "step": 6859 }, { "epoch": 0.20028612303290416, "grad_norm": 0.6156743293903254, "learning_rate": 4.442984590429846e-05, "loss": 0.6189, "step": 6860 }, { "epoch": 0.20031531926075152, "grad_norm": 0.5854435906119992, "learning_rate": 4.4428223844282244e-05, "loss": 0.6851, "step": 6861 }, { "epoch": 0.20034451548859888, "grad_norm": 0.6117885182159141, "learning_rate": 4.442660178426602e-05, "loss": 0.6989, "step": 6862 }, { "epoch": 0.20037371171644625, "grad_norm": 0.5594873794980428, "learning_rate": 4.44249797242498e-05, "loss": 0.6353, "step": 6863 }, { "epoch": 0.2004029079442936, "grad_norm": 0.5625395616249853, "learning_rate": 4.4423357664233576e-05, "loss": 0.7047, "step": 6864 }, { "epoch": 0.20043210417214097, "grad_norm": 0.5652115161701375, "learning_rate": 4.442173560421736e-05, "loss": 0.5844, "step": 6865 }, { "epoch": 0.20046130039998833, "grad_norm": 0.5374119728964579, "learning_rate": 4.442011354420114e-05, "loss": 0.6059, "step": 6866 }, { "epoch": 0.2004904966278357, "grad_norm": 0.6159226584715489, "learning_rate": 4.4418491484184914e-05, "loss": 0.7251, "step": 6867 }, { "epoch": 0.20051969285568305, "grad_norm": 0.5753123088709095, "learning_rate": 4.4416869424168696e-05, "loss": 0.6589, "step": 6868 }, { "epoch": 0.2005488890835304, "grad_norm": 0.6044695245739357, "learning_rate": 4.441524736415247e-05, "loss": 0.7028, "step": 6869 }, { "epoch": 0.20057808531137777, "grad_norm": 0.54497254013782, "learning_rate": 4.441362530413625e-05, "loss": 0.6478, "step": 6870 }, { "epoch": 0.20060728153922514, "grad_norm": 0.5494200323117358, "learning_rate": 4.4412003244120034e-05, "loss": 0.6018, "step": 6871 }, { "epoch": 0.2006364777670725, "grad_norm": 0.5866580379458565, "learning_rate": 4.4410381184103816e-05, "loss": 0.6667, "step": 6872 }, { "epoch": 0.20066567399491986, "grad_norm": 0.571772328591562, "learning_rate": 4.44087591240876e-05, "loss": 0.6196, "step": 6873 }, { "epoch": 0.20069487022276722, "grad_norm": 0.5639810013011723, "learning_rate": 4.440713706407137e-05, "loss": 0.6191, "step": 6874 }, { "epoch": 0.20072406645061458, "grad_norm": 0.6100648133050274, "learning_rate": 4.4405515004055155e-05, "loss": 0.6953, "step": 6875 }, { "epoch": 0.20075326267846194, "grad_norm": 0.5678500385643407, "learning_rate": 4.440389294403893e-05, "loss": 0.6633, "step": 6876 }, { "epoch": 0.2007824589063093, "grad_norm": 0.5904799916712888, "learning_rate": 4.440227088402271e-05, "loss": 0.7082, "step": 6877 }, { "epoch": 0.20081165513415666, "grad_norm": 0.5898779563096965, "learning_rate": 4.440064882400649e-05, "loss": 0.6774, "step": 6878 }, { "epoch": 0.20084085136200402, "grad_norm": 0.6103736861211067, "learning_rate": 4.439902676399027e-05, "loss": 0.7671, "step": 6879 }, { "epoch": 0.20087004758985139, "grad_norm": 0.5646053739545103, "learning_rate": 4.439740470397405e-05, "loss": 0.6289, "step": 6880 }, { "epoch": 0.20089924381769875, "grad_norm": 0.5714065150283046, "learning_rate": 4.439578264395783e-05, "loss": 0.6013, "step": 6881 }, { "epoch": 0.2009284400455461, "grad_norm": 0.5768591398379351, "learning_rate": 4.439416058394161e-05, "loss": 0.683, "step": 6882 }, { "epoch": 0.20095763627339347, "grad_norm": 0.556981603751615, "learning_rate": 4.439253852392539e-05, "loss": 0.648, "step": 6883 }, { "epoch": 0.20098683250124083, "grad_norm": 0.5421948514718963, "learning_rate": 4.4390916463909163e-05, "loss": 0.6185, "step": 6884 }, { "epoch": 0.2010160287290882, "grad_norm": 0.6826611056621239, "learning_rate": 4.4389294403892945e-05, "loss": 0.7522, "step": 6885 }, { "epoch": 0.20104522495693555, "grad_norm": 0.5890236959419162, "learning_rate": 4.438767234387673e-05, "loss": 0.7218, "step": 6886 }, { "epoch": 0.20107442118478291, "grad_norm": 0.531202419794709, "learning_rate": 4.43860502838605e-05, "loss": 0.6035, "step": 6887 }, { "epoch": 0.20110361741263028, "grad_norm": 0.548618758141771, "learning_rate": 4.4384428223844284e-05, "loss": 0.5886, "step": 6888 }, { "epoch": 0.20113281364047764, "grad_norm": 0.5795416018572509, "learning_rate": 4.438280616382806e-05, "loss": 0.6503, "step": 6889 }, { "epoch": 0.20116200986832503, "grad_norm": 0.5788982700615498, "learning_rate": 4.438118410381185e-05, "loss": 0.6782, "step": 6890 }, { "epoch": 0.2011912060961724, "grad_norm": 0.5821353136425162, "learning_rate": 4.437956204379562e-05, "loss": 0.6199, "step": 6891 }, { "epoch": 0.20122040232401975, "grad_norm": 0.5665684679608368, "learning_rate": 4.4377939983779404e-05, "loss": 0.6807, "step": 6892 }, { "epoch": 0.2012495985518671, "grad_norm": 0.564091293326356, "learning_rate": 4.4376317923763186e-05, "loss": 0.6378, "step": 6893 }, { "epoch": 0.20127879477971447, "grad_norm": 0.6155684286320886, "learning_rate": 4.437469586374696e-05, "loss": 0.732, "step": 6894 }, { "epoch": 0.20130799100756183, "grad_norm": 0.5859525181085136, "learning_rate": 4.437307380373074e-05, "loss": 0.6536, "step": 6895 }, { "epoch": 0.2013371872354092, "grad_norm": 0.549376795479146, "learning_rate": 4.437145174371452e-05, "loss": 0.6012, "step": 6896 }, { "epoch": 0.20136638346325655, "grad_norm": 0.6207626282071416, "learning_rate": 4.43698296836983e-05, "loss": 0.7292, "step": 6897 }, { "epoch": 0.20139557969110392, "grad_norm": 0.5541425554855657, "learning_rate": 4.436820762368208e-05, "loss": 0.595, "step": 6898 }, { "epoch": 0.20142477591895128, "grad_norm": 0.576402013145005, "learning_rate": 4.4366585563665856e-05, "loss": 0.6396, "step": 6899 }, { "epoch": 0.20145397214679864, "grad_norm": 0.5315493067672264, "learning_rate": 4.436496350364964e-05, "loss": 0.5827, "step": 6900 }, { "epoch": 0.201483168374646, "grad_norm": 0.6054921751923439, "learning_rate": 4.436334144363341e-05, "loss": 0.694, "step": 6901 }, { "epoch": 0.20151236460249336, "grad_norm": 0.5797584551871927, "learning_rate": 4.4361719383617194e-05, "loss": 0.626, "step": 6902 }, { "epoch": 0.20154156083034072, "grad_norm": 0.5859273008971212, "learning_rate": 4.4360097323600976e-05, "loss": 0.6913, "step": 6903 }, { "epoch": 0.20157075705818808, "grad_norm": 0.6176344028961316, "learning_rate": 4.435847526358475e-05, "loss": 0.7098, "step": 6904 }, { "epoch": 0.20159995328603544, "grad_norm": 0.5512730658923717, "learning_rate": 4.435685320356853e-05, "loss": 0.591, "step": 6905 }, { "epoch": 0.2016291495138828, "grad_norm": 0.5523739240671262, "learning_rate": 4.4355231143552315e-05, "loss": 0.6186, "step": 6906 }, { "epoch": 0.20165834574173017, "grad_norm": 0.5546148676874034, "learning_rate": 4.435360908353609e-05, "loss": 0.6087, "step": 6907 }, { "epoch": 0.20168754196957753, "grad_norm": 0.7423489123616858, "learning_rate": 4.435198702351987e-05, "loss": 0.7528, "step": 6908 }, { "epoch": 0.2017167381974249, "grad_norm": 0.5883440927952236, "learning_rate": 4.435036496350365e-05, "loss": 0.7186, "step": 6909 }, { "epoch": 0.20174593442527225, "grad_norm": 0.6195684215260433, "learning_rate": 4.4348742903487435e-05, "loss": 0.7088, "step": 6910 }, { "epoch": 0.2017751306531196, "grad_norm": 0.5946159484712263, "learning_rate": 4.434712084347121e-05, "loss": 0.6887, "step": 6911 }, { "epoch": 0.20180432688096697, "grad_norm": 0.6313377683091628, "learning_rate": 4.434549878345499e-05, "loss": 0.7535, "step": 6912 }, { "epoch": 0.20183352310881433, "grad_norm": 0.601548259302404, "learning_rate": 4.4343876723438774e-05, "loss": 0.7098, "step": 6913 }, { "epoch": 0.2018627193366617, "grad_norm": 0.5904360548049588, "learning_rate": 4.434225466342255e-05, "loss": 0.6574, "step": 6914 }, { "epoch": 0.20189191556450906, "grad_norm": 0.6232531371770398, "learning_rate": 4.434063260340633e-05, "loss": 0.7484, "step": 6915 }, { "epoch": 0.20192111179235642, "grad_norm": 0.6047094172992389, "learning_rate": 4.4339010543390105e-05, "loss": 0.7239, "step": 6916 }, { "epoch": 0.20195030802020378, "grad_norm": 0.5938374672230393, "learning_rate": 4.433738848337389e-05, "loss": 0.6864, "step": 6917 }, { "epoch": 0.20197950424805114, "grad_norm": 0.5779954767894272, "learning_rate": 4.433576642335767e-05, "loss": 0.6775, "step": 6918 }, { "epoch": 0.2020087004758985, "grad_norm": 0.6015727039909379, "learning_rate": 4.4334144363341444e-05, "loss": 0.6757, "step": 6919 }, { "epoch": 0.2020378967037459, "grad_norm": 0.5660008100715836, "learning_rate": 4.4332522303325226e-05, "loss": 0.591, "step": 6920 }, { "epoch": 0.20206709293159325, "grad_norm": 0.5919175911446123, "learning_rate": 4.4330900243309e-05, "loss": 0.6087, "step": 6921 }, { "epoch": 0.2020962891594406, "grad_norm": 0.6204933329779148, "learning_rate": 4.432927818329278e-05, "loss": 0.6841, "step": 6922 }, { "epoch": 0.20212548538728797, "grad_norm": 0.5344288666013627, "learning_rate": 4.4327656123276564e-05, "loss": 0.5898, "step": 6923 }, { "epoch": 0.20215468161513533, "grad_norm": 0.5521718582940593, "learning_rate": 4.432603406326034e-05, "loss": 0.627, "step": 6924 }, { "epoch": 0.2021838778429827, "grad_norm": 0.5902395772348958, "learning_rate": 4.432441200324412e-05, "loss": 0.6372, "step": 6925 }, { "epoch": 0.20221307407083006, "grad_norm": 0.6106259722546397, "learning_rate": 4.43227899432279e-05, "loss": 0.6797, "step": 6926 }, { "epoch": 0.20224227029867742, "grad_norm": 0.6728551608711343, "learning_rate": 4.432116788321168e-05, "loss": 0.772, "step": 6927 }, { "epoch": 0.20227146652652478, "grad_norm": 0.5494272793964139, "learning_rate": 4.4319545823195466e-05, "loss": 0.6029, "step": 6928 }, { "epoch": 0.20230066275437214, "grad_norm": 0.6048754319484747, "learning_rate": 4.431792376317924e-05, "loss": 0.7296, "step": 6929 }, { "epoch": 0.2023298589822195, "grad_norm": 0.6392885949272935, "learning_rate": 4.431630170316302e-05, "loss": 0.7413, "step": 6930 }, { "epoch": 0.20235905521006686, "grad_norm": 0.5498184551665574, "learning_rate": 4.43146796431468e-05, "loss": 0.6464, "step": 6931 }, { "epoch": 0.20238825143791422, "grad_norm": 0.526986091747444, "learning_rate": 4.431305758313058e-05, "loss": 0.6008, "step": 6932 }, { "epoch": 0.20241744766576159, "grad_norm": 0.5512944435282742, "learning_rate": 4.431143552311436e-05, "loss": 0.6383, "step": 6933 }, { "epoch": 0.20244664389360895, "grad_norm": 0.5456182023933158, "learning_rate": 4.4309813463098136e-05, "loss": 0.6171, "step": 6934 }, { "epoch": 0.2024758401214563, "grad_norm": 0.6959915224656337, "learning_rate": 4.430819140308192e-05, "loss": 0.8809, "step": 6935 }, { "epoch": 0.20250503634930367, "grad_norm": 0.5424539088581455, "learning_rate": 4.430656934306569e-05, "loss": 0.6085, "step": 6936 }, { "epoch": 0.20253423257715103, "grad_norm": 0.5548564065212094, "learning_rate": 4.4304947283049475e-05, "loss": 0.5829, "step": 6937 }, { "epoch": 0.2025634288049984, "grad_norm": 0.6123085020985946, "learning_rate": 4.4303325223033257e-05, "loss": 0.6411, "step": 6938 }, { "epoch": 0.20259262503284575, "grad_norm": 0.587156214227023, "learning_rate": 4.430170316301703e-05, "loss": 0.6695, "step": 6939 }, { "epoch": 0.2026218212606931, "grad_norm": 0.5636110500713022, "learning_rate": 4.430008110300081e-05, "loss": 0.661, "step": 6940 }, { "epoch": 0.20265101748854047, "grad_norm": 0.5848054733443648, "learning_rate": 4.429845904298459e-05, "loss": 0.6916, "step": 6941 }, { "epoch": 0.20268021371638784, "grad_norm": 0.6249214675948008, "learning_rate": 4.429683698296837e-05, "loss": 0.6556, "step": 6942 }, { "epoch": 0.2027094099442352, "grad_norm": 0.5792587818250641, "learning_rate": 4.429521492295215e-05, "loss": 0.6716, "step": 6943 }, { "epoch": 0.20273860617208256, "grad_norm": 0.5801225486136357, "learning_rate": 4.429359286293593e-05, "loss": 0.6731, "step": 6944 }, { "epoch": 0.20276780239992992, "grad_norm": 0.6010343810893225, "learning_rate": 4.429197080291971e-05, "loss": 0.7019, "step": 6945 }, { "epoch": 0.20279699862777728, "grad_norm": 0.5519404494894874, "learning_rate": 4.4290348742903484e-05, "loss": 0.6247, "step": 6946 }, { "epoch": 0.20282619485562464, "grad_norm": 0.5430698240025951, "learning_rate": 4.428872668288727e-05, "loss": 0.6187, "step": 6947 }, { "epoch": 0.202855391083472, "grad_norm": 0.6705812119090284, "learning_rate": 4.4287104622871054e-05, "loss": 0.7054, "step": 6948 }, { "epoch": 0.20288458731131936, "grad_norm": 0.5384097519888004, "learning_rate": 4.428548256285483e-05, "loss": 0.649, "step": 6949 }, { "epoch": 0.20291378353916675, "grad_norm": 0.5545646957310459, "learning_rate": 4.428386050283861e-05, "loss": 0.6467, "step": 6950 }, { "epoch": 0.20294297976701411, "grad_norm": 0.5399772191504983, "learning_rate": 4.4282238442822386e-05, "loss": 0.5429, "step": 6951 }, { "epoch": 0.20297217599486148, "grad_norm": 0.5816182245167818, "learning_rate": 4.428061638280617e-05, "loss": 0.5809, "step": 6952 }, { "epoch": 0.20300137222270884, "grad_norm": 0.5486861662806309, "learning_rate": 4.427899432278995e-05, "loss": 0.567, "step": 6953 }, { "epoch": 0.2030305684505562, "grad_norm": 0.5947160882091846, "learning_rate": 4.4277372262773724e-05, "loss": 0.6309, "step": 6954 }, { "epoch": 0.20305976467840356, "grad_norm": 0.6382743870805401, "learning_rate": 4.4275750202757506e-05, "loss": 0.7758, "step": 6955 }, { "epoch": 0.20308896090625092, "grad_norm": 0.600031128673753, "learning_rate": 4.427412814274128e-05, "loss": 0.7316, "step": 6956 }, { "epoch": 0.20311815713409828, "grad_norm": 0.5642945412794489, "learning_rate": 4.427250608272506e-05, "loss": 0.674, "step": 6957 }, { "epoch": 0.20314735336194564, "grad_norm": 0.5591255602399715, "learning_rate": 4.4270884022708844e-05, "loss": 0.5921, "step": 6958 }, { "epoch": 0.203176549589793, "grad_norm": 0.5466804072702903, "learning_rate": 4.426926196269262e-05, "loss": 0.6001, "step": 6959 }, { "epoch": 0.20320574581764037, "grad_norm": 0.6277771658209284, "learning_rate": 4.42676399026764e-05, "loss": 0.6841, "step": 6960 }, { "epoch": 0.20323494204548773, "grad_norm": 0.643943797452862, "learning_rate": 4.4266017842660176e-05, "loss": 0.7334, "step": 6961 }, { "epoch": 0.2032641382733351, "grad_norm": 0.582995165615875, "learning_rate": 4.426439578264396e-05, "loss": 0.6691, "step": 6962 }, { "epoch": 0.20329333450118245, "grad_norm": 0.548297175587229, "learning_rate": 4.426277372262774e-05, "loss": 0.6477, "step": 6963 }, { "epoch": 0.2033225307290298, "grad_norm": 0.590801587949977, "learning_rate": 4.4261151662611515e-05, "loss": 0.6963, "step": 6964 }, { "epoch": 0.20335172695687717, "grad_norm": 0.6034407327272361, "learning_rate": 4.4259529602595296e-05, "loss": 0.6798, "step": 6965 }, { "epoch": 0.20338092318472453, "grad_norm": 0.5708647648273412, "learning_rate": 4.425790754257908e-05, "loss": 0.63, "step": 6966 }, { "epoch": 0.2034101194125719, "grad_norm": 0.6006353207335791, "learning_rate": 4.425628548256286e-05, "loss": 0.6584, "step": 6967 }, { "epoch": 0.20343931564041925, "grad_norm": 0.5879949138815838, "learning_rate": 4.425466342254664e-05, "loss": 0.6859, "step": 6968 }, { "epoch": 0.20346851186826662, "grad_norm": 0.6094137914217015, "learning_rate": 4.425304136253042e-05, "loss": 0.683, "step": 6969 }, { "epoch": 0.20349770809611398, "grad_norm": 0.499467259899169, "learning_rate": 4.42514193025142e-05, "loss": 0.567, "step": 6970 }, { "epoch": 0.20352690432396134, "grad_norm": 0.6530357066550678, "learning_rate": 4.4249797242497973e-05, "loss": 0.8002, "step": 6971 }, { "epoch": 0.2035561005518087, "grad_norm": 0.5937537621463549, "learning_rate": 4.4248175182481755e-05, "loss": 0.6796, "step": 6972 }, { "epoch": 0.20358529677965606, "grad_norm": 0.5658859990977029, "learning_rate": 4.424655312246554e-05, "loss": 0.6749, "step": 6973 }, { "epoch": 0.20361449300750342, "grad_norm": 0.538077956753418, "learning_rate": 4.424493106244931e-05, "loss": 0.5812, "step": 6974 }, { "epoch": 0.20364368923535078, "grad_norm": 0.5891031672532199, "learning_rate": 4.4243309002433094e-05, "loss": 0.6036, "step": 6975 }, { "epoch": 0.20367288546319814, "grad_norm": 0.525417667729278, "learning_rate": 4.424168694241687e-05, "loss": 0.582, "step": 6976 }, { "epoch": 0.2037020816910455, "grad_norm": 0.5346979475111651, "learning_rate": 4.424006488240065e-05, "loss": 0.6094, "step": 6977 }, { "epoch": 0.20373127791889287, "grad_norm": 0.5859323748861105, "learning_rate": 4.423844282238443e-05, "loss": 0.7211, "step": 6978 }, { "epoch": 0.20376047414674023, "grad_norm": 0.5715580322139657, "learning_rate": 4.423682076236821e-05, "loss": 0.6564, "step": 6979 }, { "epoch": 0.20378967037458762, "grad_norm": 0.5489031969533997, "learning_rate": 4.423519870235199e-05, "loss": 0.5738, "step": 6980 }, { "epoch": 0.20381886660243498, "grad_norm": 0.6505910093128511, "learning_rate": 4.4233576642335764e-05, "loss": 0.8015, "step": 6981 }, { "epoch": 0.20384806283028234, "grad_norm": 0.5627028942385642, "learning_rate": 4.4231954582319546e-05, "loss": 0.634, "step": 6982 }, { "epoch": 0.2038772590581297, "grad_norm": 0.5172848851298352, "learning_rate": 4.423033252230333e-05, "loss": 0.5699, "step": 6983 }, { "epoch": 0.20390645528597706, "grad_norm": 0.6096414624094232, "learning_rate": 4.42287104622871e-05, "loss": 0.6445, "step": 6984 }, { "epoch": 0.20393565151382442, "grad_norm": 0.6053595200115378, "learning_rate": 4.422708840227089e-05, "loss": 0.625, "step": 6985 }, { "epoch": 0.20396484774167178, "grad_norm": 0.5896165672039966, "learning_rate": 4.4225466342254666e-05, "loss": 0.6842, "step": 6986 }, { "epoch": 0.20399404396951915, "grad_norm": 0.5885131301184167, "learning_rate": 4.422384428223845e-05, "loss": 0.6805, "step": 6987 }, { "epoch": 0.2040232401973665, "grad_norm": 0.6472180195779987, "learning_rate": 4.422222222222222e-05, "loss": 0.6887, "step": 6988 }, { "epoch": 0.20405243642521387, "grad_norm": 0.6073359826066705, "learning_rate": 4.4220600162206004e-05, "loss": 0.6822, "step": 6989 }, { "epoch": 0.20408163265306123, "grad_norm": 0.540853429520301, "learning_rate": 4.4218978102189786e-05, "loss": 0.6071, "step": 6990 }, { "epoch": 0.2041108288809086, "grad_norm": 0.6044723252719314, "learning_rate": 4.421735604217356e-05, "loss": 0.6664, "step": 6991 }, { "epoch": 0.20414002510875595, "grad_norm": 0.5708413748050698, "learning_rate": 4.421573398215734e-05, "loss": 0.698, "step": 6992 }, { "epoch": 0.2041692213366033, "grad_norm": 0.5489834625197791, "learning_rate": 4.4214111922141125e-05, "loss": 0.5992, "step": 6993 }, { "epoch": 0.20419841756445067, "grad_norm": 0.6912251004268161, "learning_rate": 4.42124898621249e-05, "loss": 0.7413, "step": 6994 }, { "epoch": 0.20422761379229804, "grad_norm": 0.5641741268227343, "learning_rate": 4.421086780210868e-05, "loss": 0.6366, "step": 6995 }, { "epoch": 0.2042568100201454, "grad_norm": 0.5853664883636098, "learning_rate": 4.4209245742092456e-05, "loss": 0.6665, "step": 6996 }, { "epoch": 0.20428600624799276, "grad_norm": 0.5493046255815737, "learning_rate": 4.420762368207624e-05, "loss": 0.661, "step": 6997 }, { "epoch": 0.20431520247584012, "grad_norm": 0.5691595407543636, "learning_rate": 4.420600162206002e-05, "loss": 0.6601, "step": 6998 }, { "epoch": 0.20434439870368748, "grad_norm": 0.6123744616216594, "learning_rate": 4.4204379562043795e-05, "loss": 0.7289, "step": 6999 }, { "epoch": 0.20437359493153484, "grad_norm": 0.5762132748567245, "learning_rate": 4.420275750202758e-05, "loss": 0.6879, "step": 7000 }, { "epoch": 0.2044027911593822, "grad_norm": 0.5469957054744804, "learning_rate": 4.420113544201135e-05, "loss": 0.6429, "step": 7001 }, { "epoch": 0.20443198738722956, "grad_norm": 0.6060588656980812, "learning_rate": 4.4199513381995134e-05, "loss": 0.6475, "step": 7002 }, { "epoch": 0.20446118361507692, "grad_norm": 0.5875768493162545, "learning_rate": 4.4197891321978915e-05, "loss": 0.7329, "step": 7003 }, { "epoch": 0.20449037984292429, "grad_norm": 0.5504590472674825, "learning_rate": 4.41962692619627e-05, "loss": 0.6172, "step": 7004 }, { "epoch": 0.20451957607077165, "grad_norm": 0.631938984013702, "learning_rate": 4.419464720194648e-05, "loss": 0.7649, "step": 7005 }, { "epoch": 0.204548772298619, "grad_norm": 0.6078330389386686, "learning_rate": 4.4193025141930254e-05, "loss": 0.7189, "step": 7006 }, { "epoch": 0.20457796852646637, "grad_norm": 0.5573509562552975, "learning_rate": 4.4191403081914036e-05, "loss": 0.6742, "step": 7007 }, { "epoch": 0.20460716475431373, "grad_norm": 0.6778244725559539, "learning_rate": 4.418978102189781e-05, "loss": 0.663, "step": 7008 }, { "epoch": 0.2046363609821611, "grad_norm": 0.5566446752624967, "learning_rate": 4.418815896188159e-05, "loss": 0.6109, "step": 7009 }, { "epoch": 0.20466555721000848, "grad_norm": 0.5436052841107375, "learning_rate": 4.4186536901865374e-05, "loss": 0.6317, "step": 7010 }, { "epoch": 0.20469475343785584, "grad_norm": 0.548103255526114, "learning_rate": 4.418491484184915e-05, "loss": 0.6131, "step": 7011 }, { "epoch": 0.2047239496657032, "grad_norm": 0.5990198575192226, "learning_rate": 4.418329278183293e-05, "loss": 0.7861, "step": 7012 }, { "epoch": 0.20475314589355056, "grad_norm": 0.5798591210862012, "learning_rate": 4.418167072181671e-05, "loss": 0.652, "step": 7013 }, { "epoch": 0.20478234212139793, "grad_norm": 0.5912803561007824, "learning_rate": 4.418004866180049e-05, "loss": 0.6891, "step": 7014 }, { "epoch": 0.2048115383492453, "grad_norm": 0.5689613446638148, "learning_rate": 4.417842660178427e-05, "loss": 0.6686, "step": 7015 }, { "epoch": 0.20484073457709265, "grad_norm": 0.6002660790733865, "learning_rate": 4.4176804541768044e-05, "loss": 0.6932, "step": 7016 }, { "epoch": 0.20486993080494, "grad_norm": 0.6553614944196878, "learning_rate": 4.4175182481751826e-05, "loss": 0.7936, "step": 7017 }, { "epoch": 0.20489912703278737, "grad_norm": 0.614161247426905, "learning_rate": 4.417356042173561e-05, "loss": 0.7294, "step": 7018 }, { "epoch": 0.20492832326063473, "grad_norm": 0.5795900090542901, "learning_rate": 4.417193836171938e-05, "loss": 0.715, "step": 7019 }, { "epoch": 0.2049575194884821, "grad_norm": 0.8587525679053641, "learning_rate": 4.4170316301703165e-05, "loss": 0.7894, "step": 7020 }, { "epoch": 0.20498671571632945, "grad_norm": 0.554349985681754, "learning_rate": 4.416869424168694e-05, "loss": 0.6151, "step": 7021 }, { "epoch": 0.20501591194417682, "grad_norm": 0.6346481455804501, "learning_rate": 4.416707218167073e-05, "loss": 0.6896, "step": 7022 }, { "epoch": 0.20504510817202418, "grad_norm": 0.5931497085541763, "learning_rate": 4.41654501216545e-05, "loss": 0.6809, "step": 7023 }, { "epoch": 0.20507430439987154, "grad_norm": 0.6064818134552128, "learning_rate": 4.4163828061638285e-05, "loss": 0.678, "step": 7024 }, { "epoch": 0.2051035006277189, "grad_norm": 0.596948608739788, "learning_rate": 4.4162206001622067e-05, "loss": 0.6748, "step": 7025 }, { "epoch": 0.20513269685556626, "grad_norm": 0.5284949773025035, "learning_rate": 4.416058394160584e-05, "loss": 0.5527, "step": 7026 }, { "epoch": 0.20516189308341362, "grad_norm": 0.550738251689486, "learning_rate": 4.415896188158962e-05, "loss": 0.631, "step": 7027 }, { "epoch": 0.20519108931126098, "grad_norm": 0.6206269811231033, "learning_rate": 4.41573398215734e-05, "loss": 0.7266, "step": 7028 }, { "epoch": 0.20522028553910834, "grad_norm": 0.5646742359969026, "learning_rate": 4.415571776155718e-05, "loss": 0.6676, "step": 7029 }, { "epoch": 0.2052494817669557, "grad_norm": 0.5802097647838272, "learning_rate": 4.415409570154096e-05, "loss": 0.6627, "step": 7030 }, { "epoch": 0.20527867799480307, "grad_norm": 0.7296566183237717, "learning_rate": 4.415247364152474e-05, "loss": 0.6763, "step": 7031 }, { "epoch": 0.20530787422265043, "grad_norm": 0.5838513643669521, "learning_rate": 4.415085158150852e-05, "loss": 0.6366, "step": 7032 }, { "epoch": 0.2053370704504978, "grad_norm": 0.5597076214721661, "learning_rate": 4.4149229521492294e-05, "loss": 0.6752, "step": 7033 }, { "epoch": 0.20536626667834515, "grad_norm": 0.5499969126490022, "learning_rate": 4.4147607461476075e-05, "loss": 0.5963, "step": 7034 }, { "epoch": 0.2053954629061925, "grad_norm": 0.5237351976999083, "learning_rate": 4.414598540145986e-05, "loss": 0.5053, "step": 7035 }, { "epoch": 0.20542465913403987, "grad_norm": 0.5884805070213774, "learning_rate": 4.414436334144363e-05, "loss": 0.678, "step": 7036 }, { "epoch": 0.20545385536188723, "grad_norm": 0.5691401372082595, "learning_rate": 4.4142741281427414e-05, "loss": 0.6946, "step": 7037 }, { "epoch": 0.2054830515897346, "grad_norm": 0.572120405529579, "learning_rate": 4.4141119221411196e-05, "loss": 0.6556, "step": 7038 }, { "epoch": 0.20551224781758196, "grad_norm": 0.5689003052970827, "learning_rate": 4.413949716139497e-05, "loss": 0.6637, "step": 7039 }, { "epoch": 0.20554144404542932, "grad_norm": 0.5981330551659505, "learning_rate": 4.413787510137875e-05, "loss": 0.6424, "step": 7040 }, { "epoch": 0.2055706402732767, "grad_norm": 0.5539421428765633, "learning_rate": 4.4136253041362534e-05, "loss": 0.5813, "step": 7041 }, { "epoch": 0.20559983650112407, "grad_norm": 0.5384838738953658, "learning_rate": 4.4134630981346316e-05, "loss": 0.57, "step": 7042 }, { "epoch": 0.20562903272897143, "grad_norm": 0.5528852083453691, "learning_rate": 4.413300892133009e-05, "loss": 0.6726, "step": 7043 }, { "epoch": 0.2056582289568188, "grad_norm": 0.596446049770478, "learning_rate": 4.413138686131387e-05, "loss": 0.6198, "step": 7044 }, { "epoch": 0.20568742518466615, "grad_norm": 0.5646223671780078, "learning_rate": 4.4129764801297654e-05, "loss": 0.6965, "step": 7045 }, { "epoch": 0.2057166214125135, "grad_norm": 0.9100786327085618, "learning_rate": 4.412814274128143e-05, "loss": 0.7384, "step": 7046 }, { "epoch": 0.20574581764036087, "grad_norm": 0.5897217676421812, "learning_rate": 4.412652068126521e-05, "loss": 0.6961, "step": 7047 }, { "epoch": 0.20577501386820823, "grad_norm": 0.5779130463772559, "learning_rate": 4.4124898621248986e-05, "loss": 0.6795, "step": 7048 }, { "epoch": 0.2058042100960556, "grad_norm": 0.6057139741621332, "learning_rate": 4.412327656123277e-05, "loss": 0.6785, "step": 7049 }, { "epoch": 0.20583340632390296, "grad_norm": 0.5605029264784557, "learning_rate": 4.412165450121655e-05, "loss": 0.6255, "step": 7050 }, { "epoch": 0.20586260255175032, "grad_norm": 0.650459272945956, "learning_rate": 4.4120032441200325e-05, "loss": 0.7987, "step": 7051 }, { "epoch": 0.20589179877959768, "grad_norm": 0.6015949405073869, "learning_rate": 4.4118410381184106e-05, "loss": 0.6575, "step": 7052 }, { "epoch": 0.20592099500744504, "grad_norm": 0.5948232054078848, "learning_rate": 4.411678832116788e-05, "loss": 0.6674, "step": 7053 }, { "epoch": 0.2059501912352924, "grad_norm": 0.5256842100504925, "learning_rate": 4.411516626115166e-05, "loss": 0.5588, "step": 7054 }, { "epoch": 0.20597938746313976, "grad_norm": 0.5872252916621998, "learning_rate": 4.4113544201135445e-05, "loss": 0.6286, "step": 7055 }, { "epoch": 0.20600858369098712, "grad_norm": 0.5692291357320095, "learning_rate": 4.411192214111922e-05, "loss": 0.6801, "step": 7056 }, { "epoch": 0.20603777991883448, "grad_norm": 0.5903402041549147, "learning_rate": 4.4110300081103e-05, "loss": 0.7283, "step": 7057 }, { "epoch": 0.20606697614668185, "grad_norm": 0.6675451428848707, "learning_rate": 4.4108678021086783e-05, "loss": 0.7755, "step": 7058 }, { "epoch": 0.2060961723745292, "grad_norm": 0.5780080619080873, "learning_rate": 4.410705596107056e-05, "loss": 0.6916, "step": 7059 }, { "epoch": 0.20612536860237657, "grad_norm": 0.6148584965060035, "learning_rate": 4.410543390105435e-05, "loss": 0.6484, "step": 7060 }, { "epoch": 0.20615456483022393, "grad_norm": 0.5599143750162422, "learning_rate": 4.410381184103812e-05, "loss": 0.5595, "step": 7061 }, { "epoch": 0.2061837610580713, "grad_norm": 0.6555071150664472, "learning_rate": 4.4102189781021904e-05, "loss": 0.6737, "step": 7062 }, { "epoch": 0.20621295728591865, "grad_norm": 0.5392052402248282, "learning_rate": 4.410056772100568e-05, "loss": 0.6273, "step": 7063 }, { "epoch": 0.206242153513766, "grad_norm": 0.6122143002846631, "learning_rate": 4.409894566098946e-05, "loss": 0.6664, "step": 7064 }, { "epoch": 0.20627134974161337, "grad_norm": 0.651491894712396, "learning_rate": 4.409732360097324e-05, "loss": 0.697, "step": 7065 }, { "epoch": 0.20630054596946074, "grad_norm": 0.58477966880062, "learning_rate": 4.409570154095702e-05, "loss": 0.6583, "step": 7066 }, { "epoch": 0.2063297421973081, "grad_norm": 0.5459546015839511, "learning_rate": 4.40940794809408e-05, "loss": 0.6116, "step": 7067 }, { "epoch": 0.20635893842515546, "grad_norm": 0.61370462248836, "learning_rate": 4.4092457420924574e-05, "loss": 0.6792, "step": 7068 }, { "epoch": 0.20638813465300282, "grad_norm": 0.5784373065615919, "learning_rate": 4.4090835360908356e-05, "loss": 0.6767, "step": 7069 }, { "epoch": 0.20641733088085018, "grad_norm": 0.5808767321577399, "learning_rate": 4.408921330089214e-05, "loss": 0.6685, "step": 7070 }, { "epoch": 0.20644652710869757, "grad_norm": 0.7009003412302253, "learning_rate": 4.408759124087591e-05, "loss": 0.7538, "step": 7071 }, { "epoch": 0.20647572333654493, "grad_norm": 0.630404134749945, "learning_rate": 4.4085969180859694e-05, "loss": 0.7707, "step": 7072 }, { "epoch": 0.2065049195643923, "grad_norm": 0.5411301986241857, "learning_rate": 4.408434712084347e-05, "loss": 0.6137, "step": 7073 }, { "epoch": 0.20653411579223965, "grad_norm": 0.6121148157173744, "learning_rate": 4.408272506082725e-05, "loss": 0.6824, "step": 7074 }, { "epoch": 0.20656331202008701, "grad_norm": 0.5970588878794361, "learning_rate": 4.408110300081103e-05, "loss": 0.7082, "step": 7075 }, { "epoch": 0.20659250824793438, "grad_norm": 0.5822638116400748, "learning_rate": 4.407948094079481e-05, "loss": 0.6718, "step": 7076 }, { "epoch": 0.20662170447578174, "grad_norm": 0.585874672461212, "learning_rate": 4.407785888077859e-05, "loss": 0.6687, "step": 7077 }, { "epoch": 0.2066509007036291, "grad_norm": 0.5630672770270582, "learning_rate": 4.4076236820762364e-05, "loss": 0.643, "step": 7078 }, { "epoch": 0.20668009693147646, "grad_norm": 0.5545708166062767, "learning_rate": 4.407461476074615e-05, "loss": 0.6338, "step": 7079 }, { "epoch": 0.20670929315932382, "grad_norm": 0.5983104263864834, "learning_rate": 4.4072992700729935e-05, "loss": 0.6328, "step": 7080 }, { "epoch": 0.20673848938717118, "grad_norm": 0.5961607327016223, "learning_rate": 4.407137064071371e-05, "loss": 0.6959, "step": 7081 }, { "epoch": 0.20676768561501854, "grad_norm": 0.6561567603304866, "learning_rate": 4.406974858069749e-05, "loss": 0.7439, "step": 7082 }, { "epoch": 0.2067968818428659, "grad_norm": 0.694365181440929, "learning_rate": 4.4068126520681266e-05, "loss": 0.6411, "step": 7083 }, { "epoch": 0.20682607807071327, "grad_norm": 0.5891496094884109, "learning_rate": 4.406650446066505e-05, "loss": 0.7106, "step": 7084 }, { "epoch": 0.20685527429856063, "grad_norm": 0.6027827922811082, "learning_rate": 4.406488240064883e-05, "loss": 0.6873, "step": 7085 }, { "epoch": 0.206884470526408, "grad_norm": 0.5969772891849866, "learning_rate": 4.4063260340632605e-05, "loss": 0.7061, "step": 7086 }, { "epoch": 0.20691366675425535, "grad_norm": 0.651681920501541, "learning_rate": 4.406163828061639e-05, "loss": 0.6865, "step": 7087 }, { "epoch": 0.2069428629821027, "grad_norm": 0.5593334728053976, "learning_rate": 4.406001622060016e-05, "loss": 0.6074, "step": 7088 }, { "epoch": 0.20697205920995007, "grad_norm": 0.5920998033096071, "learning_rate": 4.4058394160583944e-05, "loss": 0.6449, "step": 7089 }, { "epoch": 0.20700125543779743, "grad_norm": 0.5897450319019908, "learning_rate": 4.4056772100567725e-05, "loss": 0.6614, "step": 7090 }, { "epoch": 0.2070304516656448, "grad_norm": 0.6189099935570876, "learning_rate": 4.40551500405515e-05, "loss": 0.7444, "step": 7091 }, { "epoch": 0.20705964789349215, "grad_norm": 0.6010862628212483, "learning_rate": 4.405352798053528e-05, "loss": 0.7138, "step": 7092 }, { "epoch": 0.20708884412133952, "grad_norm": 0.5582640615093577, "learning_rate": 4.405190592051906e-05, "loss": 0.6209, "step": 7093 }, { "epoch": 0.20711804034918688, "grad_norm": 0.5618681342974329, "learning_rate": 4.405028386050284e-05, "loss": 0.649, "step": 7094 }, { "epoch": 0.20714723657703424, "grad_norm": 0.600639432207826, "learning_rate": 4.404866180048662e-05, "loss": 0.7092, "step": 7095 }, { "epoch": 0.2071764328048816, "grad_norm": 0.5748266874979878, "learning_rate": 4.4047039740470396e-05, "loss": 0.7159, "step": 7096 }, { "epoch": 0.20720562903272896, "grad_norm": 0.6204523166529496, "learning_rate": 4.404541768045418e-05, "loss": 0.72, "step": 7097 }, { "epoch": 0.20723482526057632, "grad_norm": 0.5838381597482251, "learning_rate": 4.404379562043796e-05, "loss": 0.6649, "step": 7098 }, { "epoch": 0.20726402148842368, "grad_norm": 0.5869086234173712, "learning_rate": 4.404217356042174e-05, "loss": 0.6412, "step": 7099 }, { "epoch": 0.20729321771627104, "grad_norm": 0.6110679929824018, "learning_rate": 4.4040551500405516e-05, "loss": 0.6623, "step": 7100 }, { "epoch": 0.20732241394411843, "grad_norm": 0.6330221399913236, "learning_rate": 4.40389294403893e-05, "loss": 0.78, "step": 7101 }, { "epoch": 0.2073516101719658, "grad_norm": 0.6516000822678008, "learning_rate": 4.403730738037308e-05, "loss": 0.7053, "step": 7102 }, { "epoch": 0.20738080639981316, "grad_norm": 0.5729077632508363, "learning_rate": 4.4035685320356854e-05, "loss": 0.6494, "step": 7103 }, { "epoch": 0.20741000262766052, "grad_norm": 0.5506461999786523, "learning_rate": 4.4034063260340636e-05, "loss": 0.6213, "step": 7104 }, { "epoch": 0.20743919885550788, "grad_norm": 0.6256726543832224, "learning_rate": 4.403244120032442e-05, "loss": 0.7382, "step": 7105 }, { "epoch": 0.20746839508335524, "grad_norm": 0.5741240965323243, "learning_rate": 4.403081914030819e-05, "loss": 0.6608, "step": 7106 }, { "epoch": 0.2074975913112026, "grad_norm": 0.6115042535326792, "learning_rate": 4.4029197080291975e-05, "loss": 0.7178, "step": 7107 }, { "epoch": 0.20752678753904996, "grad_norm": 0.5570974939946656, "learning_rate": 4.402757502027575e-05, "loss": 0.581, "step": 7108 }, { "epoch": 0.20755598376689732, "grad_norm": 0.6001209196085991, "learning_rate": 4.402595296025953e-05, "loss": 0.7346, "step": 7109 }, { "epoch": 0.20758517999474468, "grad_norm": 0.5564007671538156, "learning_rate": 4.402433090024331e-05, "loss": 0.6454, "step": 7110 }, { "epoch": 0.20761437622259205, "grad_norm": 0.5938089477663548, "learning_rate": 4.402270884022709e-05, "loss": 0.6906, "step": 7111 }, { "epoch": 0.2076435724504394, "grad_norm": 0.5580200326980206, "learning_rate": 4.402108678021087e-05, "loss": 0.6329, "step": 7112 }, { "epoch": 0.20767276867828677, "grad_norm": 0.5553696009575034, "learning_rate": 4.4019464720194645e-05, "loss": 0.6161, "step": 7113 }, { "epoch": 0.20770196490613413, "grad_norm": 0.5521870028519911, "learning_rate": 4.4017842660178427e-05, "loss": 0.6539, "step": 7114 }, { "epoch": 0.2077311611339815, "grad_norm": 0.55567050659706, "learning_rate": 4.401622060016221e-05, "loss": 0.6426, "step": 7115 }, { "epoch": 0.20776035736182885, "grad_norm": 0.5804586090739636, "learning_rate": 4.401459854014598e-05, "loss": 0.6678, "step": 7116 }, { "epoch": 0.2077895535896762, "grad_norm": 0.5496471781750715, "learning_rate": 4.401297648012977e-05, "loss": 0.594, "step": 7117 }, { "epoch": 0.20781874981752357, "grad_norm": 0.5558881738527022, "learning_rate": 4.401135442011355e-05, "loss": 0.689, "step": 7118 }, { "epoch": 0.20784794604537093, "grad_norm": 0.5891029132578234, "learning_rate": 4.400973236009733e-05, "loss": 0.6637, "step": 7119 }, { "epoch": 0.2078771422732183, "grad_norm": 0.6059704734965883, "learning_rate": 4.4008110300081104e-05, "loss": 0.6731, "step": 7120 }, { "epoch": 0.20790633850106566, "grad_norm": 0.63503381521122, "learning_rate": 4.4006488240064885e-05, "loss": 0.7123, "step": 7121 }, { "epoch": 0.20793553472891302, "grad_norm": 0.5111335026631676, "learning_rate": 4.400486618004867e-05, "loss": 0.5695, "step": 7122 }, { "epoch": 0.20796473095676038, "grad_norm": 0.5786882266558967, "learning_rate": 4.400324412003244e-05, "loss": 0.7276, "step": 7123 }, { "epoch": 0.20799392718460774, "grad_norm": 0.6186202384223, "learning_rate": 4.4001622060016224e-05, "loss": 0.7048, "step": 7124 }, { "epoch": 0.2080231234124551, "grad_norm": 0.5596143703699621, "learning_rate": 4.4000000000000006e-05, "loss": 0.6574, "step": 7125 }, { "epoch": 0.20805231964030246, "grad_norm": 0.6713325645206971, "learning_rate": 4.399837793998378e-05, "loss": 0.8673, "step": 7126 }, { "epoch": 0.20808151586814982, "grad_norm": 0.6272420661132535, "learning_rate": 4.399675587996756e-05, "loss": 0.7383, "step": 7127 }, { "epoch": 0.20811071209599719, "grad_norm": 0.5685827127545185, "learning_rate": 4.399513381995134e-05, "loss": 0.6153, "step": 7128 }, { "epoch": 0.20813990832384455, "grad_norm": 0.5931972413863824, "learning_rate": 4.399351175993512e-05, "loss": 0.6934, "step": 7129 }, { "epoch": 0.2081691045516919, "grad_norm": 0.6915691955814688, "learning_rate": 4.39918896999189e-05, "loss": 0.7222, "step": 7130 }, { "epoch": 0.2081983007795393, "grad_norm": 0.5905856658404605, "learning_rate": 4.3990267639902676e-05, "loss": 0.7034, "step": 7131 }, { "epoch": 0.20822749700738666, "grad_norm": 0.5695716195492408, "learning_rate": 4.398864557988646e-05, "loss": 0.6743, "step": 7132 }, { "epoch": 0.20825669323523402, "grad_norm": 0.5504307502855802, "learning_rate": 4.398702351987023e-05, "loss": 0.6253, "step": 7133 }, { "epoch": 0.20828588946308138, "grad_norm": 0.5318828868192697, "learning_rate": 4.3985401459854014e-05, "loss": 0.6083, "step": 7134 }, { "epoch": 0.20831508569092874, "grad_norm": 0.5882762900995498, "learning_rate": 4.3983779399837796e-05, "loss": 0.6801, "step": 7135 }, { "epoch": 0.2083442819187761, "grad_norm": 0.5868746128856679, "learning_rate": 4.398215733982158e-05, "loss": 0.6941, "step": 7136 }, { "epoch": 0.20837347814662346, "grad_norm": 0.5782500791988049, "learning_rate": 4.398053527980536e-05, "loss": 0.6451, "step": 7137 }, { "epoch": 0.20840267437447083, "grad_norm": 0.5691021923916737, "learning_rate": 4.3978913219789135e-05, "loss": 0.6476, "step": 7138 }, { "epoch": 0.2084318706023182, "grad_norm": 0.5557593594025446, "learning_rate": 4.3977291159772916e-05, "loss": 0.6023, "step": 7139 }, { "epoch": 0.20846106683016555, "grad_norm": 0.5600053977451136, "learning_rate": 4.397566909975669e-05, "loss": 0.6233, "step": 7140 }, { "epoch": 0.2084902630580129, "grad_norm": 0.6059134235646145, "learning_rate": 4.397404703974047e-05, "loss": 0.7332, "step": 7141 }, { "epoch": 0.20851945928586027, "grad_norm": 0.5890156943152731, "learning_rate": 4.3972424979724255e-05, "loss": 0.6666, "step": 7142 }, { "epoch": 0.20854865551370763, "grad_norm": 0.5933948310223423, "learning_rate": 4.397080291970803e-05, "loss": 0.7039, "step": 7143 }, { "epoch": 0.208577851741555, "grad_norm": 0.5995139126097558, "learning_rate": 4.396918085969181e-05, "loss": 0.7009, "step": 7144 }, { "epoch": 0.20860704796940235, "grad_norm": 0.5468462629060593, "learning_rate": 4.396755879967559e-05, "loss": 0.6192, "step": 7145 }, { "epoch": 0.20863624419724972, "grad_norm": 0.5795236410027651, "learning_rate": 4.396593673965937e-05, "loss": 0.6494, "step": 7146 }, { "epoch": 0.20866544042509708, "grad_norm": 0.5989135655575033, "learning_rate": 4.396431467964315e-05, "loss": 0.665, "step": 7147 }, { "epoch": 0.20869463665294444, "grad_norm": 0.5384107351192686, "learning_rate": 4.3962692619626925e-05, "loss": 0.5819, "step": 7148 }, { "epoch": 0.2087238328807918, "grad_norm": 0.578509110015345, "learning_rate": 4.396107055961071e-05, "loss": 0.6211, "step": 7149 }, { "epoch": 0.20875302910863916, "grad_norm": 0.6232049010869523, "learning_rate": 4.395944849959449e-05, "loss": 0.7094, "step": 7150 }, { "epoch": 0.20878222533648652, "grad_norm": 0.6177044092961215, "learning_rate": 4.3957826439578264e-05, "loss": 0.7252, "step": 7151 }, { "epoch": 0.20881142156433388, "grad_norm": 0.5799159156175523, "learning_rate": 4.3956204379562045e-05, "loss": 0.6657, "step": 7152 }, { "epoch": 0.20884061779218124, "grad_norm": 0.5719599811886389, "learning_rate": 4.395458231954582e-05, "loss": 0.6247, "step": 7153 }, { "epoch": 0.2088698140200286, "grad_norm": 0.6542817467224203, "learning_rate": 4.39529602595296e-05, "loss": 0.7108, "step": 7154 }, { "epoch": 0.20889901024787597, "grad_norm": 0.5893213926365566, "learning_rate": 4.3951338199513384e-05, "loss": 0.6472, "step": 7155 }, { "epoch": 0.20892820647572333, "grad_norm": 0.5214572107616882, "learning_rate": 4.3949716139497166e-05, "loss": 0.5799, "step": 7156 }, { "epoch": 0.2089574027035707, "grad_norm": 0.596325711424158, "learning_rate": 4.394809407948095e-05, "loss": 0.6679, "step": 7157 }, { "epoch": 0.20898659893141805, "grad_norm": 0.5752289139466029, "learning_rate": 4.394647201946472e-05, "loss": 0.618, "step": 7158 }, { "epoch": 0.2090157951592654, "grad_norm": 0.5847908508092206, "learning_rate": 4.3944849959448504e-05, "loss": 0.6385, "step": 7159 }, { "epoch": 0.20904499138711277, "grad_norm": 0.58622729154778, "learning_rate": 4.394322789943228e-05, "loss": 0.6381, "step": 7160 }, { "epoch": 0.20907418761496016, "grad_norm": 0.5907345974349487, "learning_rate": 4.394160583941606e-05, "loss": 0.7386, "step": 7161 }, { "epoch": 0.20910338384280752, "grad_norm": 0.6584398121122625, "learning_rate": 4.393998377939984e-05, "loss": 0.7093, "step": 7162 }, { "epoch": 0.20913258007065488, "grad_norm": 0.6278506203758182, "learning_rate": 4.393836171938362e-05, "loss": 0.6733, "step": 7163 }, { "epoch": 0.20916177629850224, "grad_norm": 0.5713014948624096, "learning_rate": 4.39367396593674e-05, "loss": 0.6107, "step": 7164 }, { "epoch": 0.2091909725263496, "grad_norm": 0.5538636860294847, "learning_rate": 4.3935117599351174e-05, "loss": 0.6016, "step": 7165 }, { "epoch": 0.20922016875419697, "grad_norm": 0.5617588924433625, "learning_rate": 4.3933495539334956e-05, "loss": 0.6784, "step": 7166 }, { "epoch": 0.20924936498204433, "grad_norm": 0.5313483325185585, "learning_rate": 4.393187347931874e-05, "loss": 0.5718, "step": 7167 }, { "epoch": 0.2092785612098917, "grad_norm": 0.5882256739514191, "learning_rate": 4.393025141930251e-05, "loss": 0.6509, "step": 7168 }, { "epoch": 0.20930775743773905, "grad_norm": 0.5559520916609901, "learning_rate": 4.3928629359286295e-05, "loss": 0.6414, "step": 7169 }, { "epoch": 0.2093369536655864, "grad_norm": 0.5830217287830377, "learning_rate": 4.3927007299270077e-05, "loss": 0.6644, "step": 7170 }, { "epoch": 0.20936614989343377, "grad_norm": 0.6419051547179501, "learning_rate": 4.392538523925385e-05, "loss": 0.7344, "step": 7171 }, { "epoch": 0.20939534612128113, "grad_norm": 0.5597463258072998, "learning_rate": 4.392376317923763e-05, "loss": 0.6643, "step": 7172 }, { "epoch": 0.2094245423491285, "grad_norm": 0.6348597292269299, "learning_rate": 4.3922141119221415e-05, "loss": 0.7784, "step": 7173 }, { "epoch": 0.20945373857697586, "grad_norm": 0.6138971283469439, "learning_rate": 4.39205190592052e-05, "loss": 0.7708, "step": 7174 }, { "epoch": 0.20948293480482322, "grad_norm": 0.5693281467116539, "learning_rate": 4.391889699918897e-05, "loss": 0.6419, "step": 7175 }, { "epoch": 0.20951213103267058, "grad_norm": 0.5404063562611501, "learning_rate": 4.3917274939172754e-05, "loss": 0.6187, "step": 7176 }, { "epoch": 0.20954132726051794, "grad_norm": 0.5686128183920367, "learning_rate": 4.3915652879156535e-05, "loss": 0.6504, "step": 7177 }, { "epoch": 0.2095705234883653, "grad_norm": 0.5543007052377629, "learning_rate": 4.391403081914031e-05, "loss": 0.6185, "step": 7178 }, { "epoch": 0.20959971971621266, "grad_norm": 0.5548460625226501, "learning_rate": 4.391240875912409e-05, "loss": 0.6149, "step": 7179 }, { "epoch": 0.20962891594406002, "grad_norm": 0.5530007862838746, "learning_rate": 4.391078669910787e-05, "loss": 0.592, "step": 7180 }, { "epoch": 0.20965811217190738, "grad_norm": 0.5715318267677962, "learning_rate": 4.390916463909165e-05, "loss": 0.6288, "step": 7181 }, { "epoch": 0.20968730839975475, "grad_norm": 0.5663593525991342, "learning_rate": 4.390754257907543e-05, "loss": 0.6601, "step": 7182 }, { "epoch": 0.2097165046276021, "grad_norm": 0.5718114640873327, "learning_rate": 4.3905920519059206e-05, "loss": 0.6163, "step": 7183 }, { "epoch": 0.20974570085544947, "grad_norm": 0.6029223072818498, "learning_rate": 4.390429845904299e-05, "loss": 0.7048, "step": 7184 }, { "epoch": 0.20977489708329683, "grad_norm": 0.5407451878050925, "learning_rate": 4.390267639902676e-05, "loss": 0.5887, "step": 7185 }, { "epoch": 0.2098040933111442, "grad_norm": 0.551686472333074, "learning_rate": 4.3901054339010544e-05, "loss": 0.6667, "step": 7186 }, { "epoch": 0.20983328953899155, "grad_norm": 0.5723240287309602, "learning_rate": 4.3899432278994326e-05, "loss": 0.6609, "step": 7187 }, { "epoch": 0.2098624857668389, "grad_norm": 0.570000771078226, "learning_rate": 4.38978102189781e-05, "loss": 0.6193, "step": 7188 }, { "epoch": 0.20989168199468627, "grad_norm": 0.5987470198715826, "learning_rate": 4.389618815896188e-05, "loss": 0.741, "step": 7189 }, { "epoch": 0.20992087822253364, "grad_norm": 0.6062473812045941, "learning_rate": 4.389456609894566e-05, "loss": 0.6495, "step": 7190 }, { "epoch": 0.20995007445038102, "grad_norm": 0.5564016304849336, "learning_rate": 4.389294403892944e-05, "loss": 0.6602, "step": 7191 }, { "epoch": 0.20997927067822839, "grad_norm": 0.5752838559340578, "learning_rate": 4.389132197891323e-05, "loss": 0.6798, "step": 7192 }, { "epoch": 0.21000846690607575, "grad_norm": 0.6244335714895201, "learning_rate": 4.3889699918897e-05, "loss": 0.6428, "step": 7193 }, { "epoch": 0.2100376631339231, "grad_norm": 0.5839105116648441, "learning_rate": 4.3888077858880785e-05, "loss": 0.6487, "step": 7194 }, { "epoch": 0.21006685936177047, "grad_norm": 0.6411741290402538, "learning_rate": 4.388645579886456e-05, "loss": 0.7455, "step": 7195 }, { "epoch": 0.21009605558961783, "grad_norm": 0.582381321609071, "learning_rate": 4.388483373884834e-05, "loss": 0.6844, "step": 7196 }, { "epoch": 0.2101252518174652, "grad_norm": 0.6689779786527332, "learning_rate": 4.388321167883212e-05, "loss": 0.7096, "step": 7197 }, { "epoch": 0.21015444804531255, "grad_norm": 0.537731125987931, "learning_rate": 4.38815896188159e-05, "loss": 0.6075, "step": 7198 }, { "epoch": 0.21018364427315991, "grad_norm": 0.5252245554338962, "learning_rate": 4.387996755879968e-05, "loss": 0.5547, "step": 7199 }, { "epoch": 0.21021284050100728, "grad_norm": 0.5410522350730694, "learning_rate": 4.3878345498783455e-05, "loss": 0.5921, "step": 7200 }, { "epoch": 0.21024203672885464, "grad_norm": 0.5725576883366073, "learning_rate": 4.3876723438767237e-05, "loss": 0.6346, "step": 7201 }, { "epoch": 0.210271232956702, "grad_norm": 0.6420289835857278, "learning_rate": 4.387510137875102e-05, "loss": 0.7356, "step": 7202 }, { "epoch": 0.21030042918454936, "grad_norm": 0.6281047623791802, "learning_rate": 4.387347931873479e-05, "loss": 0.7345, "step": 7203 }, { "epoch": 0.21032962541239672, "grad_norm": 0.5921904827203652, "learning_rate": 4.3871857258718575e-05, "loss": 0.6383, "step": 7204 }, { "epoch": 0.21035882164024408, "grad_norm": 0.6040361234368923, "learning_rate": 4.387023519870235e-05, "loss": 0.7004, "step": 7205 }, { "epoch": 0.21038801786809144, "grad_norm": 0.6032832348867404, "learning_rate": 4.386861313868613e-05, "loss": 0.7514, "step": 7206 }, { "epoch": 0.2104172140959388, "grad_norm": 0.5784852802835377, "learning_rate": 4.3866991078669914e-05, "loss": 0.6663, "step": 7207 }, { "epoch": 0.21044641032378616, "grad_norm": 0.6001622057653121, "learning_rate": 4.386536901865369e-05, "loss": 0.6762, "step": 7208 }, { "epoch": 0.21047560655163353, "grad_norm": 0.5821559205311772, "learning_rate": 4.386374695863747e-05, "loss": 0.6578, "step": 7209 }, { "epoch": 0.2105048027794809, "grad_norm": 0.5745086322846511, "learning_rate": 4.3862124898621245e-05, "loss": 0.653, "step": 7210 }, { "epoch": 0.21053399900732825, "grad_norm": 0.6053366754602211, "learning_rate": 4.3860502838605034e-05, "loss": 0.6442, "step": 7211 }, { "epoch": 0.2105631952351756, "grad_norm": 0.5512301873715225, "learning_rate": 4.3858880778588816e-05, "loss": 0.6398, "step": 7212 }, { "epoch": 0.21059239146302297, "grad_norm": 0.5406012255389209, "learning_rate": 4.385725871857259e-05, "loss": 0.6343, "step": 7213 }, { "epoch": 0.21062158769087033, "grad_norm": 0.5579380132199595, "learning_rate": 4.385563665855637e-05, "loss": 0.6372, "step": 7214 }, { "epoch": 0.2106507839187177, "grad_norm": 0.6189164905007056, "learning_rate": 4.385401459854015e-05, "loss": 0.6967, "step": 7215 }, { "epoch": 0.21067998014656505, "grad_norm": 0.624680092956527, "learning_rate": 4.385239253852393e-05, "loss": 0.7048, "step": 7216 }, { "epoch": 0.21070917637441242, "grad_norm": 0.5759923279634355, "learning_rate": 4.385077047850771e-05, "loss": 0.6659, "step": 7217 }, { "epoch": 0.21073837260225978, "grad_norm": 0.572747542547795, "learning_rate": 4.3849148418491486e-05, "loss": 0.662, "step": 7218 }, { "epoch": 0.21076756883010714, "grad_norm": 0.6540682641537051, "learning_rate": 4.384752635847527e-05, "loss": 0.7439, "step": 7219 }, { "epoch": 0.2107967650579545, "grad_norm": 0.5535514870693765, "learning_rate": 4.384590429845904e-05, "loss": 0.5975, "step": 7220 }, { "epoch": 0.21082596128580186, "grad_norm": 0.556551157060811, "learning_rate": 4.3844282238442824e-05, "loss": 0.657, "step": 7221 }, { "epoch": 0.21085515751364925, "grad_norm": 0.5368450281995956, "learning_rate": 4.3842660178426606e-05, "loss": 0.568, "step": 7222 }, { "epoch": 0.2108843537414966, "grad_norm": 0.5656422066416724, "learning_rate": 4.384103811841038e-05, "loss": 0.6467, "step": 7223 }, { "epoch": 0.21091354996934397, "grad_norm": 0.5565475839604832, "learning_rate": 4.383941605839416e-05, "loss": 0.6331, "step": 7224 }, { "epoch": 0.21094274619719133, "grad_norm": 0.5703681504264327, "learning_rate": 4.383779399837794e-05, "loss": 0.6, "step": 7225 }, { "epoch": 0.2109719424250387, "grad_norm": 0.5893375077794121, "learning_rate": 4.383617193836172e-05, "loss": 0.6523, "step": 7226 }, { "epoch": 0.21100113865288606, "grad_norm": 0.5605699278834991, "learning_rate": 4.38345498783455e-05, "loss": 0.6153, "step": 7227 }, { "epoch": 0.21103033488073342, "grad_norm": 0.553419617821726, "learning_rate": 4.3832927818329276e-05, "loss": 0.5509, "step": 7228 }, { "epoch": 0.21105953110858078, "grad_norm": 0.5822501868761438, "learning_rate": 4.383130575831306e-05, "loss": 0.6906, "step": 7229 }, { "epoch": 0.21108872733642814, "grad_norm": 0.5462072848201702, "learning_rate": 4.382968369829684e-05, "loss": 0.6093, "step": 7230 }, { "epoch": 0.2111179235642755, "grad_norm": 0.5891119139871315, "learning_rate": 4.382806163828062e-05, "loss": 0.6699, "step": 7231 }, { "epoch": 0.21114711979212286, "grad_norm": 0.5447887597199503, "learning_rate": 4.38264395782644e-05, "loss": 0.5921, "step": 7232 }, { "epoch": 0.21117631601997022, "grad_norm": 0.626664384015501, "learning_rate": 4.382481751824818e-05, "loss": 0.6517, "step": 7233 }, { "epoch": 0.21120551224781758, "grad_norm": 0.5844161460075062, "learning_rate": 4.382319545823196e-05, "loss": 0.6574, "step": 7234 }, { "epoch": 0.21123470847566495, "grad_norm": 0.5309490943545963, "learning_rate": 4.3821573398215735e-05, "loss": 0.564, "step": 7235 }, { "epoch": 0.2112639047035123, "grad_norm": 0.5448261291473637, "learning_rate": 4.381995133819952e-05, "loss": 0.5943, "step": 7236 }, { "epoch": 0.21129310093135967, "grad_norm": 0.6237455816338568, "learning_rate": 4.38183292781833e-05, "loss": 0.6981, "step": 7237 }, { "epoch": 0.21132229715920703, "grad_norm": 0.6045590926609297, "learning_rate": 4.3816707218167074e-05, "loss": 0.6333, "step": 7238 }, { "epoch": 0.2113514933870544, "grad_norm": 0.6186011302449799, "learning_rate": 4.3815085158150855e-05, "loss": 0.7219, "step": 7239 }, { "epoch": 0.21138068961490175, "grad_norm": 0.6056907203068065, "learning_rate": 4.381346309813463e-05, "loss": 0.7154, "step": 7240 }, { "epoch": 0.2114098858427491, "grad_norm": 0.5661388278094911, "learning_rate": 4.381184103811841e-05, "loss": 0.6722, "step": 7241 }, { "epoch": 0.21143908207059647, "grad_norm": 0.548906305264002, "learning_rate": 4.3810218978102194e-05, "loss": 0.633, "step": 7242 }, { "epoch": 0.21146827829844383, "grad_norm": 0.5271549457742737, "learning_rate": 4.380859691808597e-05, "loss": 0.6031, "step": 7243 }, { "epoch": 0.2114974745262912, "grad_norm": 0.5969611568278788, "learning_rate": 4.380697485806975e-05, "loss": 0.6789, "step": 7244 }, { "epoch": 0.21152667075413856, "grad_norm": 0.5806472469715376, "learning_rate": 4.3805352798053526e-05, "loss": 0.6725, "step": 7245 }, { "epoch": 0.21155586698198592, "grad_norm": 0.5580400567304605, "learning_rate": 4.380373073803731e-05, "loss": 0.6534, "step": 7246 }, { "epoch": 0.21158506320983328, "grad_norm": 0.5051373097673892, "learning_rate": 4.380210867802109e-05, "loss": 0.5245, "step": 7247 }, { "epoch": 0.21161425943768064, "grad_norm": 0.6785883355092236, "learning_rate": 4.3800486618004864e-05, "loss": 0.812, "step": 7248 }, { "epoch": 0.211643455665528, "grad_norm": 0.6129544395092855, "learning_rate": 4.379886455798865e-05, "loss": 0.6951, "step": 7249 }, { "epoch": 0.21167265189337536, "grad_norm": 0.5684298615569747, "learning_rate": 4.379724249797243e-05, "loss": 0.6676, "step": 7250 }, { "epoch": 0.21170184812122272, "grad_norm": 0.5870402329502893, "learning_rate": 4.379562043795621e-05, "loss": 0.6745, "step": 7251 }, { "epoch": 0.2117310443490701, "grad_norm": 0.5861323913289608, "learning_rate": 4.3793998377939984e-05, "loss": 0.7431, "step": 7252 }, { "epoch": 0.21176024057691747, "grad_norm": 0.6002608650367226, "learning_rate": 4.3792376317923766e-05, "loss": 0.6818, "step": 7253 }, { "epoch": 0.21178943680476484, "grad_norm": 0.5538719686316802, "learning_rate": 4.379075425790755e-05, "loss": 0.5976, "step": 7254 }, { "epoch": 0.2118186330326122, "grad_norm": 0.6497281268224497, "learning_rate": 4.378913219789132e-05, "loss": 0.756, "step": 7255 }, { "epoch": 0.21184782926045956, "grad_norm": 0.5689789616946997, "learning_rate": 4.3787510137875105e-05, "loss": 0.6255, "step": 7256 }, { "epoch": 0.21187702548830692, "grad_norm": 0.6802761969761614, "learning_rate": 4.3785888077858887e-05, "loss": 0.7579, "step": 7257 }, { "epoch": 0.21190622171615428, "grad_norm": 0.6414744383340423, "learning_rate": 4.378426601784266e-05, "loss": 0.7039, "step": 7258 }, { "epoch": 0.21193541794400164, "grad_norm": 0.6041561207827016, "learning_rate": 4.378264395782644e-05, "loss": 0.685, "step": 7259 }, { "epoch": 0.211964614171849, "grad_norm": 0.5505146270068756, "learning_rate": 4.378102189781022e-05, "loss": 0.6284, "step": 7260 }, { "epoch": 0.21199381039969636, "grad_norm": 0.5613014269495787, "learning_rate": 4.3779399837794e-05, "loss": 0.6816, "step": 7261 }, { "epoch": 0.21202300662754373, "grad_norm": 0.5548538339134039, "learning_rate": 4.377777777777778e-05, "loss": 0.6738, "step": 7262 }, { "epoch": 0.2120522028553911, "grad_norm": 0.5652532784754857, "learning_rate": 4.377615571776156e-05, "loss": 0.6414, "step": 7263 }, { "epoch": 0.21208139908323845, "grad_norm": 0.5820642539048632, "learning_rate": 4.377453365774534e-05, "loss": 0.666, "step": 7264 }, { "epoch": 0.2121105953110858, "grad_norm": 0.5483835663313392, "learning_rate": 4.3772911597729114e-05, "loss": 0.6393, "step": 7265 }, { "epoch": 0.21213979153893317, "grad_norm": 0.5891159476685156, "learning_rate": 4.3771289537712895e-05, "loss": 0.6998, "step": 7266 }, { "epoch": 0.21216898776678053, "grad_norm": 0.5619823024700065, "learning_rate": 4.376966747769668e-05, "loss": 0.6758, "step": 7267 }, { "epoch": 0.2121981839946279, "grad_norm": 0.6377713319846433, "learning_rate": 4.376804541768046e-05, "loss": 0.6705, "step": 7268 }, { "epoch": 0.21222738022247525, "grad_norm": 0.6077512984287302, "learning_rate": 4.376642335766424e-05, "loss": 0.607, "step": 7269 }, { "epoch": 0.21225657645032261, "grad_norm": 0.5197815550202262, "learning_rate": 4.3764801297648016e-05, "loss": 0.5501, "step": 7270 }, { "epoch": 0.21228577267816998, "grad_norm": 0.5395406324276789, "learning_rate": 4.37631792376318e-05, "loss": 0.593, "step": 7271 }, { "epoch": 0.21231496890601734, "grad_norm": 0.5477630493888571, "learning_rate": 4.376155717761557e-05, "loss": 0.6004, "step": 7272 }, { "epoch": 0.2123441651338647, "grad_norm": 0.5556982610396279, "learning_rate": 4.3759935117599354e-05, "loss": 0.6497, "step": 7273 }, { "epoch": 0.21237336136171206, "grad_norm": 0.6067117971699804, "learning_rate": 4.3758313057583136e-05, "loss": 0.7339, "step": 7274 }, { "epoch": 0.21240255758955942, "grad_norm": 0.570168906864085, "learning_rate": 4.375669099756691e-05, "loss": 0.6585, "step": 7275 }, { "epoch": 0.21243175381740678, "grad_norm": 0.5596100108216072, "learning_rate": 4.375506893755069e-05, "loss": 0.6533, "step": 7276 }, { "epoch": 0.21246095004525414, "grad_norm": 0.5590587715579869, "learning_rate": 4.375344687753447e-05, "loss": 0.6725, "step": 7277 }, { "epoch": 0.2124901462731015, "grad_norm": 0.5328015700850034, "learning_rate": 4.375182481751825e-05, "loss": 0.6639, "step": 7278 }, { "epoch": 0.21251934250094887, "grad_norm": 0.6129585503770884, "learning_rate": 4.375020275750203e-05, "loss": 0.8161, "step": 7279 }, { "epoch": 0.21254853872879623, "grad_norm": 0.6090337046165343, "learning_rate": 4.3748580697485806e-05, "loss": 0.6682, "step": 7280 }, { "epoch": 0.2125777349566436, "grad_norm": 0.541610801475758, "learning_rate": 4.374695863746959e-05, "loss": 0.5966, "step": 7281 }, { "epoch": 0.21260693118449098, "grad_norm": 0.6346705247852034, "learning_rate": 4.374533657745337e-05, "loss": 0.7668, "step": 7282 }, { "epoch": 0.21263612741233834, "grad_norm": 0.6123621212529712, "learning_rate": 4.3743714517437145e-05, "loss": 0.6558, "step": 7283 }, { "epoch": 0.2126653236401857, "grad_norm": 0.5836981183014415, "learning_rate": 4.3742092457420926e-05, "loss": 0.6812, "step": 7284 }, { "epoch": 0.21269451986803306, "grad_norm": 0.6594113196331336, "learning_rate": 4.37404703974047e-05, "loss": 0.7858, "step": 7285 }, { "epoch": 0.21272371609588042, "grad_norm": 0.7160368211664344, "learning_rate": 4.373884833738848e-05, "loss": 0.6966, "step": 7286 }, { "epoch": 0.21275291232372778, "grad_norm": 0.5697571102444557, "learning_rate": 4.3737226277372265e-05, "loss": 0.6221, "step": 7287 }, { "epoch": 0.21278210855157514, "grad_norm": 0.5599009190350093, "learning_rate": 4.3735604217356047e-05, "loss": 0.6655, "step": 7288 }, { "epoch": 0.2128113047794225, "grad_norm": 0.6235495226877524, "learning_rate": 4.373398215733983e-05, "loss": 0.7389, "step": 7289 }, { "epoch": 0.21284050100726987, "grad_norm": 0.5560636682124145, "learning_rate": 4.37323600973236e-05, "loss": 0.564, "step": 7290 }, { "epoch": 0.21286969723511723, "grad_norm": 0.5774953304016448, "learning_rate": 4.3730738037307385e-05, "loss": 0.6792, "step": 7291 }, { "epoch": 0.2128988934629646, "grad_norm": 0.6082695625545385, "learning_rate": 4.372911597729116e-05, "loss": 0.7221, "step": 7292 }, { "epoch": 0.21292808969081195, "grad_norm": 0.624131148656581, "learning_rate": 4.372749391727494e-05, "loss": 0.7161, "step": 7293 }, { "epoch": 0.2129572859186593, "grad_norm": 0.5969329876303546, "learning_rate": 4.3725871857258724e-05, "loss": 0.6565, "step": 7294 }, { "epoch": 0.21298648214650667, "grad_norm": 0.6563577351250754, "learning_rate": 4.37242497972425e-05, "loss": 0.7151, "step": 7295 }, { "epoch": 0.21301567837435403, "grad_norm": 0.5602642608870332, "learning_rate": 4.372262773722628e-05, "loss": 0.6823, "step": 7296 }, { "epoch": 0.2130448746022014, "grad_norm": 0.5762734640684832, "learning_rate": 4.3721005677210055e-05, "loss": 0.6684, "step": 7297 }, { "epoch": 0.21307407083004876, "grad_norm": 0.5965887462000513, "learning_rate": 4.371938361719384e-05, "loss": 0.6904, "step": 7298 }, { "epoch": 0.21310326705789612, "grad_norm": 0.6088664687085809, "learning_rate": 4.371776155717762e-05, "loss": 0.698, "step": 7299 }, { "epoch": 0.21313246328574348, "grad_norm": 0.6072924202766006, "learning_rate": 4.3716139497161394e-05, "loss": 0.7067, "step": 7300 }, { "epoch": 0.21316165951359084, "grad_norm": 0.5599932785590427, "learning_rate": 4.3714517437145176e-05, "loss": 0.6551, "step": 7301 }, { "epoch": 0.2131908557414382, "grad_norm": 0.5730090914147304, "learning_rate": 4.371289537712895e-05, "loss": 0.687, "step": 7302 }, { "epoch": 0.21322005196928556, "grad_norm": 0.5420880605564089, "learning_rate": 4.371127331711273e-05, "loss": 0.581, "step": 7303 }, { "epoch": 0.21324924819713292, "grad_norm": 0.5837986115656788, "learning_rate": 4.3709651257096514e-05, "loss": 0.6605, "step": 7304 }, { "epoch": 0.21327844442498028, "grad_norm": 0.5922131466035407, "learning_rate": 4.370802919708029e-05, "loss": 0.729, "step": 7305 }, { "epoch": 0.21330764065282765, "grad_norm": 0.5678861965659733, "learning_rate": 4.370640713706408e-05, "loss": 0.664, "step": 7306 }, { "epoch": 0.213336836880675, "grad_norm": 0.5687354795522032, "learning_rate": 4.370478507704785e-05, "loss": 0.6485, "step": 7307 }, { "epoch": 0.21336603310852237, "grad_norm": 0.6467603850221749, "learning_rate": 4.3703163017031634e-05, "loss": 0.7626, "step": 7308 }, { "epoch": 0.21339522933636973, "grad_norm": 0.578892736904779, "learning_rate": 4.3701540957015416e-05, "loss": 0.6588, "step": 7309 }, { "epoch": 0.2134244255642171, "grad_norm": 0.5869715450603844, "learning_rate": 4.369991889699919e-05, "loss": 0.6673, "step": 7310 }, { "epoch": 0.21345362179206445, "grad_norm": 0.5388327113443078, "learning_rate": 4.369829683698297e-05, "loss": 0.6224, "step": 7311 }, { "epoch": 0.21348281801991184, "grad_norm": 0.6048929948007172, "learning_rate": 4.369667477696675e-05, "loss": 0.7159, "step": 7312 }, { "epoch": 0.2135120142477592, "grad_norm": 0.5418127856896869, "learning_rate": 4.369505271695053e-05, "loss": 0.587, "step": 7313 }, { "epoch": 0.21354121047560656, "grad_norm": 0.5850675405970069, "learning_rate": 4.369343065693431e-05, "loss": 0.632, "step": 7314 }, { "epoch": 0.21357040670345392, "grad_norm": 0.5664948089728327, "learning_rate": 4.3691808596918086e-05, "loss": 0.6223, "step": 7315 }, { "epoch": 0.21359960293130129, "grad_norm": 0.5871603685677697, "learning_rate": 4.369018653690187e-05, "loss": 0.682, "step": 7316 }, { "epoch": 0.21362879915914865, "grad_norm": 0.5879782799471939, "learning_rate": 4.368856447688564e-05, "loss": 0.6906, "step": 7317 }, { "epoch": 0.213657995386996, "grad_norm": 0.6037720046243576, "learning_rate": 4.3686942416869425e-05, "loss": 0.6932, "step": 7318 }, { "epoch": 0.21368719161484337, "grad_norm": 0.5537083671265987, "learning_rate": 4.368532035685321e-05, "loss": 0.6147, "step": 7319 }, { "epoch": 0.21371638784269073, "grad_norm": 0.5363607779807317, "learning_rate": 4.368369829683698e-05, "loss": 0.5992, "step": 7320 }, { "epoch": 0.2137455840705381, "grad_norm": 0.5219260338353232, "learning_rate": 4.3682076236820763e-05, "loss": 0.5793, "step": 7321 }, { "epoch": 0.21377478029838545, "grad_norm": 0.5660911434542099, "learning_rate": 4.368045417680454e-05, "loss": 0.6268, "step": 7322 }, { "epoch": 0.21380397652623281, "grad_norm": 0.5999919738631848, "learning_rate": 4.367883211678832e-05, "loss": 0.6495, "step": 7323 }, { "epoch": 0.21383317275408018, "grad_norm": 0.585012420147139, "learning_rate": 4.367721005677211e-05, "loss": 0.6499, "step": 7324 }, { "epoch": 0.21386236898192754, "grad_norm": 0.563489898546148, "learning_rate": 4.3675587996755884e-05, "loss": 0.6185, "step": 7325 }, { "epoch": 0.2138915652097749, "grad_norm": 0.6988764126298905, "learning_rate": 4.3673965936739665e-05, "loss": 0.7002, "step": 7326 }, { "epoch": 0.21392076143762226, "grad_norm": 0.5974695469733744, "learning_rate": 4.367234387672344e-05, "loss": 0.7474, "step": 7327 }, { "epoch": 0.21394995766546962, "grad_norm": 0.633567466099204, "learning_rate": 4.367072181670722e-05, "loss": 0.7402, "step": 7328 }, { "epoch": 0.21397915389331698, "grad_norm": 0.5829788341878137, "learning_rate": 4.3669099756691004e-05, "loss": 0.6655, "step": 7329 }, { "epoch": 0.21400835012116434, "grad_norm": 0.5451402427263661, "learning_rate": 4.366747769667478e-05, "loss": 0.6018, "step": 7330 }, { "epoch": 0.2140375463490117, "grad_norm": 0.5766577950246519, "learning_rate": 4.366585563665856e-05, "loss": 0.6751, "step": 7331 }, { "epoch": 0.21406674257685906, "grad_norm": 0.5660221452218476, "learning_rate": 4.3664233576642336e-05, "loss": 0.6527, "step": 7332 }, { "epoch": 0.21409593880470643, "grad_norm": 0.5727865859210205, "learning_rate": 4.366261151662612e-05, "loss": 0.6165, "step": 7333 }, { "epoch": 0.2141251350325538, "grad_norm": 0.5170876961507084, "learning_rate": 4.36609894566099e-05, "loss": 0.5964, "step": 7334 }, { "epoch": 0.21415433126040115, "grad_norm": 0.5544157004299255, "learning_rate": 4.3659367396593674e-05, "loss": 0.6171, "step": 7335 }, { "epoch": 0.2141835274882485, "grad_norm": 0.5211913481377991, "learning_rate": 4.3657745336577456e-05, "loss": 0.5887, "step": 7336 }, { "epoch": 0.21421272371609587, "grad_norm": 0.5808924972917168, "learning_rate": 4.365612327656123e-05, "loss": 0.6342, "step": 7337 }, { "epoch": 0.21424191994394323, "grad_norm": 0.5650605529418847, "learning_rate": 4.365450121654501e-05, "loss": 0.6638, "step": 7338 }, { "epoch": 0.2142711161717906, "grad_norm": 0.6068434552419293, "learning_rate": 4.3652879156528794e-05, "loss": 0.6142, "step": 7339 }, { "epoch": 0.21430031239963795, "grad_norm": 0.5529427417276574, "learning_rate": 4.365125709651257e-05, "loss": 0.6318, "step": 7340 }, { "epoch": 0.21432950862748532, "grad_norm": 0.5527875805580027, "learning_rate": 4.364963503649635e-05, "loss": 0.6224, "step": 7341 }, { "epoch": 0.2143587048553327, "grad_norm": 0.5935655918030679, "learning_rate": 4.3648012976480126e-05, "loss": 0.6778, "step": 7342 }, { "epoch": 0.21438790108318007, "grad_norm": 0.6341075966312197, "learning_rate": 4.3646390916463915e-05, "loss": 0.6806, "step": 7343 }, { "epoch": 0.21441709731102743, "grad_norm": 0.5486112859511203, "learning_rate": 4.364476885644769e-05, "loss": 0.6369, "step": 7344 }, { "epoch": 0.2144462935388748, "grad_norm": 0.5635969039400406, "learning_rate": 4.364314679643147e-05, "loss": 0.6547, "step": 7345 }, { "epoch": 0.21447548976672215, "grad_norm": 0.5473562223595342, "learning_rate": 4.364152473641525e-05, "loss": 0.5577, "step": 7346 }, { "epoch": 0.2145046859945695, "grad_norm": 0.5860656445738293, "learning_rate": 4.363990267639903e-05, "loss": 0.7052, "step": 7347 }, { "epoch": 0.21453388222241687, "grad_norm": 0.598400291060865, "learning_rate": 4.363828061638281e-05, "loss": 0.7057, "step": 7348 }, { "epoch": 0.21456307845026423, "grad_norm": 0.5422151768086512, "learning_rate": 4.363665855636659e-05, "loss": 0.5822, "step": 7349 }, { "epoch": 0.2145922746781116, "grad_norm": 0.556060793799686, "learning_rate": 4.363503649635037e-05, "loss": 0.6847, "step": 7350 }, { "epoch": 0.21462147090595896, "grad_norm": 0.5728509566326948, "learning_rate": 4.363341443633415e-05, "loss": 0.7005, "step": 7351 }, { "epoch": 0.21465066713380632, "grad_norm": 0.5700587655390794, "learning_rate": 4.3631792376317924e-05, "loss": 0.6874, "step": 7352 }, { "epoch": 0.21467986336165368, "grad_norm": 0.5744554416326553, "learning_rate": 4.3630170316301705e-05, "loss": 0.643, "step": 7353 }, { "epoch": 0.21470905958950104, "grad_norm": 0.5260865658513546, "learning_rate": 4.362854825628549e-05, "loss": 0.5849, "step": 7354 }, { "epoch": 0.2147382558173484, "grad_norm": 0.5722856616726344, "learning_rate": 4.362692619626926e-05, "loss": 0.6963, "step": 7355 }, { "epoch": 0.21476745204519576, "grad_norm": 0.5655649584505004, "learning_rate": 4.3625304136253044e-05, "loss": 0.637, "step": 7356 }, { "epoch": 0.21479664827304312, "grad_norm": 0.5446636649627482, "learning_rate": 4.362368207623682e-05, "loss": 0.6463, "step": 7357 }, { "epoch": 0.21482584450089048, "grad_norm": 0.7289236879690392, "learning_rate": 4.36220600162206e-05, "loss": 0.7651, "step": 7358 }, { "epoch": 0.21485504072873784, "grad_norm": 1.276789021753096, "learning_rate": 4.362043795620438e-05, "loss": 0.8007, "step": 7359 }, { "epoch": 0.2148842369565852, "grad_norm": 0.5367882172819951, "learning_rate": 4.361881589618816e-05, "loss": 0.624, "step": 7360 }, { "epoch": 0.21491343318443257, "grad_norm": 0.6062752135376129, "learning_rate": 4.361719383617194e-05, "loss": 0.6695, "step": 7361 }, { "epoch": 0.21494262941227993, "grad_norm": 0.6009304429914707, "learning_rate": 4.361557177615572e-05, "loss": 0.6916, "step": 7362 }, { "epoch": 0.2149718256401273, "grad_norm": 0.5650958135898185, "learning_rate": 4.36139497161395e-05, "loss": 0.6576, "step": 7363 }, { "epoch": 0.21500102186797465, "grad_norm": 0.5697714725590575, "learning_rate": 4.361232765612328e-05, "loss": 0.6864, "step": 7364 }, { "epoch": 0.215030218095822, "grad_norm": 0.5401969166606119, "learning_rate": 4.361070559610706e-05, "loss": 0.6037, "step": 7365 }, { "epoch": 0.21505941432366937, "grad_norm": 0.5543838042905673, "learning_rate": 4.360908353609084e-05, "loss": 0.6637, "step": 7366 }, { "epoch": 0.21508861055151673, "grad_norm": 0.5800471961046583, "learning_rate": 4.3607461476074616e-05, "loss": 0.7471, "step": 7367 }, { "epoch": 0.2151178067793641, "grad_norm": 0.5694860156353583, "learning_rate": 4.36058394160584e-05, "loss": 0.6638, "step": 7368 }, { "epoch": 0.21514700300721146, "grad_norm": 0.5832791045788042, "learning_rate": 4.360421735604218e-05, "loss": 0.6525, "step": 7369 }, { "epoch": 0.21517619923505882, "grad_norm": 0.5695227711716343, "learning_rate": 4.3602595296025955e-05, "loss": 0.6373, "step": 7370 }, { "epoch": 0.21520539546290618, "grad_norm": 0.5658571836821453, "learning_rate": 4.3600973236009736e-05, "loss": 0.6412, "step": 7371 }, { "epoch": 0.21523459169075357, "grad_norm": 0.591837124305192, "learning_rate": 4.359935117599351e-05, "loss": 0.7011, "step": 7372 }, { "epoch": 0.21526378791860093, "grad_norm": 0.5604972990316545, "learning_rate": 4.359772911597729e-05, "loss": 0.6407, "step": 7373 }, { "epoch": 0.2152929841464483, "grad_norm": 0.5901380964354811, "learning_rate": 4.3596107055961075e-05, "loss": 0.6126, "step": 7374 }, { "epoch": 0.21532218037429565, "grad_norm": 0.6321906163257108, "learning_rate": 4.359448499594485e-05, "loss": 0.7786, "step": 7375 }, { "epoch": 0.215351376602143, "grad_norm": 0.598824533602879, "learning_rate": 4.359286293592863e-05, "loss": 0.7454, "step": 7376 }, { "epoch": 0.21538057282999037, "grad_norm": 0.6776359638871701, "learning_rate": 4.3591240875912407e-05, "loss": 0.6517, "step": 7377 }, { "epoch": 0.21540976905783774, "grad_norm": 0.5493584329596594, "learning_rate": 4.358961881589619e-05, "loss": 0.5855, "step": 7378 }, { "epoch": 0.2154389652856851, "grad_norm": 0.5709500729335936, "learning_rate": 4.358799675587997e-05, "loss": 0.6747, "step": 7379 }, { "epoch": 0.21546816151353246, "grad_norm": 0.5579326649828494, "learning_rate": 4.3586374695863745e-05, "loss": 0.5973, "step": 7380 }, { "epoch": 0.21549735774137982, "grad_norm": 0.555435652998648, "learning_rate": 4.3584752635847534e-05, "loss": 0.5651, "step": 7381 }, { "epoch": 0.21552655396922718, "grad_norm": 0.5491908991055136, "learning_rate": 4.358313057583131e-05, "loss": 0.5938, "step": 7382 }, { "epoch": 0.21555575019707454, "grad_norm": 0.5582846078728384, "learning_rate": 4.358150851581509e-05, "loss": 0.7124, "step": 7383 }, { "epoch": 0.2155849464249219, "grad_norm": 0.5964589612852017, "learning_rate": 4.3579886455798865e-05, "loss": 0.708, "step": 7384 }, { "epoch": 0.21561414265276926, "grad_norm": 0.6300289307570243, "learning_rate": 4.357826439578265e-05, "loss": 0.7594, "step": 7385 }, { "epoch": 0.21564333888061663, "grad_norm": 0.5805953810770041, "learning_rate": 4.357664233576643e-05, "loss": 0.7124, "step": 7386 }, { "epoch": 0.215672535108464, "grad_norm": 0.5382100851153548, "learning_rate": 4.3575020275750204e-05, "loss": 0.5697, "step": 7387 }, { "epoch": 0.21570173133631135, "grad_norm": 0.5793733623683899, "learning_rate": 4.3573398215733986e-05, "loss": 0.6643, "step": 7388 }, { "epoch": 0.2157309275641587, "grad_norm": 0.8397576777834708, "learning_rate": 4.357177615571776e-05, "loss": 0.7452, "step": 7389 }, { "epoch": 0.21576012379200607, "grad_norm": 0.5714898523739756, "learning_rate": 4.357015409570154e-05, "loss": 0.6415, "step": 7390 }, { "epoch": 0.21578932001985343, "grad_norm": 0.556973935663038, "learning_rate": 4.3568532035685324e-05, "loss": 0.6287, "step": 7391 }, { "epoch": 0.2158185162477008, "grad_norm": 0.5828221383806886, "learning_rate": 4.35669099756691e-05, "loss": 0.6791, "step": 7392 }, { "epoch": 0.21584771247554815, "grad_norm": 0.7556902370251203, "learning_rate": 4.356528791565288e-05, "loss": 0.7156, "step": 7393 }, { "epoch": 0.21587690870339551, "grad_norm": 0.5521049444144457, "learning_rate": 4.356366585563666e-05, "loss": 0.6614, "step": 7394 }, { "epoch": 0.21590610493124288, "grad_norm": 0.5968201049050516, "learning_rate": 4.356204379562044e-05, "loss": 0.6717, "step": 7395 }, { "epoch": 0.21593530115909024, "grad_norm": 0.6481861283865843, "learning_rate": 4.356042173560422e-05, "loss": 0.7925, "step": 7396 }, { "epoch": 0.2159644973869376, "grad_norm": 0.5401617818396413, "learning_rate": 4.3558799675587994e-05, "loss": 0.5819, "step": 7397 }, { "epoch": 0.21599369361478496, "grad_norm": 0.5635289654281118, "learning_rate": 4.3557177615571776e-05, "loss": 0.6102, "step": 7398 }, { "epoch": 0.21602288984263232, "grad_norm": 0.5662032558825099, "learning_rate": 4.355555555555556e-05, "loss": 0.6589, "step": 7399 }, { "epoch": 0.21605208607047968, "grad_norm": 0.5611444149388275, "learning_rate": 4.355393349553934e-05, "loss": 0.6559, "step": 7400 }, { "epoch": 0.21608128229832704, "grad_norm": 0.5110112950873374, "learning_rate": 4.355231143552312e-05, "loss": 0.5406, "step": 7401 }, { "epoch": 0.21611047852617443, "grad_norm": 0.5223841500379951, "learning_rate": 4.3550689375506896e-05, "loss": 0.5574, "step": 7402 }, { "epoch": 0.2161396747540218, "grad_norm": 0.5416381579535888, "learning_rate": 4.354906731549068e-05, "loss": 0.6261, "step": 7403 }, { "epoch": 0.21616887098186915, "grad_norm": 0.6087192481345463, "learning_rate": 4.354744525547445e-05, "loss": 0.6974, "step": 7404 }, { "epoch": 0.21619806720971652, "grad_norm": 0.5395401269735832, "learning_rate": 4.3545823195458235e-05, "loss": 0.5717, "step": 7405 }, { "epoch": 0.21622726343756388, "grad_norm": 0.5574691119348831, "learning_rate": 4.354420113544202e-05, "loss": 0.5899, "step": 7406 }, { "epoch": 0.21625645966541124, "grad_norm": 0.6606293751683656, "learning_rate": 4.354257907542579e-05, "loss": 0.7062, "step": 7407 }, { "epoch": 0.2162856558932586, "grad_norm": 0.5964881557492716, "learning_rate": 4.3540957015409573e-05, "loss": 0.6873, "step": 7408 }, { "epoch": 0.21631485212110596, "grad_norm": 0.5508519430283134, "learning_rate": 4.353933495539335e-05, "loss": 0.6551, "step": 7409 }, { "epoch": 0.21634404834895332, "grad_norm": 0.5627261666204375, "learning_rate": 4.353771289537713e-05, "loss": 0.6204, "step": 7410 }, { "epoch": 0.21637324457680068, "grad_norm": 0.5575691148108349, "learning_rate": 4.353609083536091e-05, "loss": 0.6746, "step": 7411 }, { "epoch": 0.21640244080464804, "grad_norm": 0.6138041804378064, "learning_rate": 4.353446877534469e-05, "loss": 0.7312, "step": 7412 }, { "epoch": 0.2164316370324954, "grad_norm": 0.5951816544967001, "learning_rate": 4.353284671532847e-05, "loss": 0.6461, "step": 7413 }, { "epoch": 0.21646083326034277, "grad_norm": 0.5212597347375643, "learning_rate": 4.353122465531225e-05, "loss": 0.6097, "step": 7414 }, { "epoch": 0.21649002948819013, "grad_norm": 0.6348271484875734, "learning_rate": 4.3529602595296025e-05, "loss": 0.7338, "step": 7415 }, { "epoch": 0.2165192257160375, "grad_norm": 0.6194809517190708, "learning_rate": 4.352798053527981e-05, "loss": 0.7238, "step": 7416 }, { "epoch": 0.21654842194388485, "grad_norm": 0.5911905557583756, "learning_rate": 4.352635847526358e-05, "loss": 0.7087, "step": 7417 }, { "epoch": 0.2165776181717322, "grad_norm": 0.6105932651867084, "learning_rate": 4.3524736415247364e-05, "loss": 0.7222, "step": 7418 }, { "epoch": 0.21660681439957957, "grad_norm": 0.600443435859003, "learning_rate": 4.3523114355231146e-05, "loss": 0.6681, "step": 7419 }, { "epoch": 0.21663601062742693, "grad_norm": 0.5915054466041847, "learning_rate": 4.352149229521493e-05, "loss": 0.6912, "step": 7420 }, { "epoch": 0.2166652068552743, "grad_norm": 0.5107911453733301, "learning_rate": 4.351987023519871e-05, "loss": 0.5749, "step": 7421 }, { "epoch": 0.21669440308312166, "grad_norm": 0.6377745844431959, "learning_rate": 4.3518248175182484e-05, "loss": 0.7455, "step": 7422 }, { "epoch": 0.21672359931096902, "grad_norm": 0.5518663542008847, "learning_rate": 4.3516626115166266e-05, "loss": 0.6245, "step": 7423 }, { "epoch": 0.21675279553881638, "grad_norm": 0.5764876499381484, "learning_rate": 4.351500405515004e-05, "loss": 0.6921, "step": 7424 }, { "epoch": 0.21678199176666374, "grad_norm": 0.5482786268941952, "learning_rate": 4.351338199513382e-05, "loss": 0.5927, "step": 7425 }, { "epoch": 0.2168111879945111, "grad_norm": 0.5653243634368084, "learning_rate": 4.3511759935117605e-05, "loss": 0.6704, "step": 7426 }, { "epoch": 0.21684038422235846, "grad_norm": 0.6055507415956325, "learning_rate": 4.351013787510138e-05, "loss": 0.6631, "step": 7427 }, { "epoch": 0.21686958045020582, "grad_norm": 0.5168844678556532, "learning_rate": 4.350851581508516e-05, "loss": 0.5758, "step": 7428 }, { "epoch": 0.21689877667805318, "grad_norm": 0.5591336462633841, "learning_rate": 4.3506893755068936e-05, "loss": 0.6588, "step": 7429 }, { "epoch": 0.21692797290590055, "grad_norm": 0.5957374387381443, "learning_rate": 4.350527169505272e-05, "loss": 0.7143, "step": 7430 }, { "epoch": 0.2169571691337479, "grad_norm": 0.6044726071842992, "learning_rate": 4.35036496350365e-05, "loss": 0.6758, "step": 7431 }, { "epoch": 0.21698636536159527, "grad_norm": 0.5848268073048803, "learning_rate": 4.3502027575020275e-05, "loss": 0.6717, "step": 7432 }, { "epoch": 0.21701556158944266, "grad_norm": 0.5581658773601361, "learning_rate": 4.3500405515004057e-05, "loss": 0.5913, "step": 7433 }, { "epoch": 0.21704475781729002, "grad_norm": 0.6289444922389182, "learning_rate": 4.349878345498783e-05, "loss": 0.7878, "step": 7434 }, { "epoch": 0.21707395404513738, "grad_norm": 0.5462896466792817, "learning_rate": 4.349716139497161e-05, "loss": 0.554, "step": 7435 }, { "epoch": 0.21710315027298474, "grad_norm": 0.5750958685183511, "learning_rate": 4.3495539334955395e-05, "loss": 0.6599, "step": 7436 }, { "epoch": 0.2171323465008321, "grad_norm": 0.5689509937763466, "learning_rate": 4.349391727493917e-05, "loss": 0.6707, "step": 7437 }, { "epoch": 0.21716154272867946, "grad_norm": 0.6597786921823147, "learning_rate": 4.349229521492296e-05, "loss": 0.7991, "step": 7438 }, { "epoch": 0.21719073895652682, "grad_norm": 0.5803406628352723, "learning_rate": 4.3490673154906734e-05, "loss": 0.7486, "step": 7439 }, { "epoch": 0.21721993518437419, "grad_norm": 0.5528223125247065, "learning_rate": 4.3489051094890515e-05, "loss": 0.6536, "step": 7440 }, { "epoch": 0.21724913141222155, "grad_norm": 0.5475454259341421, "learning_rate": 4.34874290348743e-05, "loss": 0.6452, "step": 7441 }, { "epoch": 0.2172783276400689, "grad_norm": 0.5064297119886425, "learning_rate": 4.348580697485807e-05, "loss": 0.5108, "step": 7442 }, { "epoch": 0.21730752386791627, "grad_norm": 0.5946434698962766, "learning_rate": 4.3484184914841854e-05, "loss": 0.6022, "step": 7443 }, { "epoch": 0.21733672009576363, "grad_norm": 0.6213050774945242, "learning_rate": 4.348256285482563e-05, "loss": 0.7312, "step": 7444 }, { "epoch": 0.217365916323611, "grad_norm": 0.5757379732877305, "learning_rate": 4.348094079480941e-05, "loss": 0.6891, "step": 7445 }, { "epoch": 0.21739511255145835, "grad_norm": 0.5291409835465345, "learning_rate": 4.347931873479319e-05, "loss": 0.6085, "step": 7446 }, { "epoch": 0.2174243087793057, "grad_norm": 0.5981120870529762, "learning_rate": 4.347769667477697e-05, "loss": 0.6678, "step": 7447 }, { "epoch": 0.21745350500715308, "grad_norm": 0.5807403542959885, "learning_rate": 4.347607461476075e-05, "loss": 0.7033, "step": 7448 }, { "epoch": 0.21748270123500044, "grad_norm": 0.5279181711891623, "learning_rate": 4.3474452554744524e-05, "loss": 0.5463, "step": 7449 }, { "epoch": 0.2175118974628478, "grad_norm": 0.5199153399552117, "learning_rate": 4.3472830494728306e-05, "loss": 0.5256, "step": 7450 }, { "epoch": 0.21754109369069516, "grad_norm": 0.5792597297509637, "learning_rate": 4.347120843471209e-05, "loss": 0.6859, "step": 7451 }, { "epoch": 0.21757028991854252, "grad_norm": 0.5241485002942997, "learning_rate": 4.346958637469586e-05, "loss": 0.5664, "step": 7452 }, { "epoch": 0.21759948614638988, "grad_norm": 0.5793335834721234, "learning_rate": 4.3467964314679644e-05, "loss": 0.6702, "step": 7453 }, { "epoch": 0.21762868237423724, "grad_norm": 0.5640241282734858, "learning_rate": 4.346634225466342e-05, "loss": 0.6645, "step": 7454 }, { "epoch": 0.2176578786020846, "grad_norm": 0.5977359736741389, "learning_rate": 4.34647201946472e-05, "loss": 0.6712, "step": 7455 }, { "epoch": 0.21768707482993196, "grad_norm": 0.5848765300853073, "learning_rate": 4.346309813463098e-05, "loss": 0.6223, "step": 7456 }, { "epoch": 0.21771627105777933, "grad_norm": 0.6247174659441608, "learning_rate": 4.3461476074614765e-05, "loss": 0.7092, "step": 7457 }, { "epoch": 0.2177454672856267, "grad_norm": 0.5529296970420565, "learning_rate": 4.3459854014598546e-05, "loss": 0.6255, "step": 7458 }, { "epoch": 0.21777466351347405, "grad_norm": 0.5832075056122692, "learning_rate": 4.345823195458232e-05, "loss": 0.6755, "step": 7459 }, { "epoch": 0.2178038597413214, "grad_norm": 0.629612561369645, "learning_rate": 4.34566098945661e-05, "loss": 0.622, "step": 7460 }, { "epoch": 0.21783305596916877, "grad_norm": 0.542354758453828, "learning_rate": 4.3454987834549885e-05, "loss": 0.6354, "step": 7461 }, { "epoch": 0.21786225219701613, "grad_norm": 0.5639477827180913, "learning_rate": 4.345336577453366e-05, "loss": 0.6724, "step": 7462 }, { "epoch": 0.21789144842486352, "grad_norm": 0.5807979148145135, "learning_rate": 4.345174371451744e-05, "loss": 0.6595, "step": 7463 }, { "epoch": 0.21792064465271088, "grad_norm": 0.5630419114715906, "learning_rate": 4.3450121654501217e-05, "loss": 0.6331, "step": 7464 }, { "epoch": 0.21794984088055824, "grad_norm": 0.5976369611768032, "learning_rate": 4.3448499594485e-05, "loss": 0.7958, "step": 7465 }, { "epoch": 0.2179790371084056, "grad_norm": 0.6335640603637152, "learning_rate": 4.344687753446878e-05, "loss": 0.7159, "step": 7466 }, { "epoch": 0.21800823333625297, "grad_norm": 0.5562415745025432, "learning_rate": 4.3445255474452555e-05, "loss": 0.6547, "step": 7467 }, { "epoch": 0.21803742956410033, "grad_norm": 0.6158216446323709, "learning_rate": 4.344363341443634e-05, "loss": 0.653, "step": 7468 }, { "epoch": 0.2180666257919477, "grad_norm": 0.5081788313799764, "learning_rate": 4.344201135442011e-05, "loss": 0.5636, "step": 7469 }, { "epoch": 0.21809582201979505, "grad_norm": 0.6075089271521358, "learning_rate": 4.3440389294403894e-05, "loss": 0.64, "step": 7470 }, { "epoch": 0.2181250182476424, "grad_norm": 0.5141649668010922, "learning_rate": 4.3438767234387675e-05, "loss": 0.5992, "step": 7471 }, { "epoch": 0.21815421447548977, "grad_norm": 0.5508426796024793, "learning_rate": 4.343714517437145e-05, "loss": 0.6303, "step": 7472 }, { "epoch": 0.21818341070333713, "grad_norm": 0.6507056474540724, "learning_rate": 4.343552311435523e-05, "loss": 0.7578, "step": 7473 }, { "epoch": 0.2182126069311845, "grad_norm": 0.5948552180950295, "learning_rate": 4.343390105433901e-05, "loss": 0.7105, "step": 7474 }, { "epoch": 0.21824180315903186, "grad_norm": 0.5551412174135614, "learning_rate": 4.3432278994322796e-05, "loss": 0.6295, "step": 7475 }, { "epoch": 0.21827099938687922, "grad_norm": 0.5342821172092008, "learning_rate": 4.343065693430657e-05, "loss": 0.5668, "step": 7476 }, { "epoch": 0.21830019561472658, "grad_norm": 0.616490827743134, "learning_rate": 4.342903487429035e-05, "loss": 0.6515, "step": 7477 }, { "epoch": 0.21832939184257394, "grad_norm": 0.5651269401875522, "learning_rate": 4.3427412814274134e-05, "loss": 0.6409, "step": 7478 }, { "epoch": 0.2183585880704213, "grad_norm": 0.5498699341386171, "learning_rate": 4.342579075425791e-05, "loss": 0.5928, "step": 7479 }, { "epoch": 0.21838778429826866, "grad_norm": 0.5504714962532999, "learning_rate": 4.342416869424169e-05, "loss": 0.6398, "step": 7480 }, { "epoch": 0.21841698052611602, "grad_norm": 0.5410890206908692, "learning_rate": 4.342254663422547e-05, "loss": 0.5527, "step": 7481 }, { "epoch": 0.21844617675396338, "grad_norm": 0.558343959918404, "learning_rate": 4.342092457420925e-05, "loss": 0.5735, "step": 7482 }, { "epoch": 0.21847537298181074, "grad_norm": 0.5224854805259098, "learning_rate": 4.341930251419303e-05, "loss": 0.5462, "step": 7483 }, { "epoch": 0.2185045692096581, "grad_norm": 0.6016900958735011, "learning_rate": 4.3417680454176804e-05, "loss": 0.7213, "step": 7484 }, { "epoch": 0.21853376543750547, "grad_norm": 0.5758537886662973, "learning_rate": 4.3416058394160586e-05, "loss": 0.6518, "step": 7485 }, { "epoch": 0.21856296166535283, "grad_norm": 0.5474787080224596, "learning_rate": 4.341443633414437e-05, "loss": 0.6484, "step": 7486 }, { "epoch": 0.2185921578932002, "grad_norm": 0.5579679471578638, "learning_rate": 4.341281427412814e-05, "loss": 0.62, "step": 7487 }, { "epoch": 0.21862135412104755, "grad_norm": 0.655011825516273, "learning_rate": 4.3411192214111925e-05, "loss": 0.6869, "step": 7488 }, { "epoch": 0.2186505503488949, "grad_norm": 0.575422712733017, "learning_rate": 4.34095701540957e-05, "loss": 0.6571, "step": 7489 }, { "epoch": 0.21867974657674227, "grad_norm": 0.5716857551911654, "learning_rate": 4.340794809407948e-05, "loss": 0.6607, "step": 7490 }, { "epoch": 0.21870894280458963, "grad_norm": 0.5919950195093702, "learning_rate": 4.340632603406326e-05, "loss": 0.6612, "step": 7491 }, { "epoch": 0.218738139032437, "grad_norm": 0.5395403355550543, "learning_rate": 4.340470397404704e-05, "loss": 0.5771, "step": 7492 }, { "epoch": 0.21876733526028438, "grad_norm": 0.5402111988001401, "learning_rate": 4.340308191403082e-05, "loss": 0.6364, "step": 7493 }, { "epoch": 0.21879653148813175, "grad_norm": 0.6108897119128239, "learning_rate": 4.34014598540146e-05, "loss": 0.6844, "step": 7494 }, { "epoch": 0.2188257277159791, "grad_norm": 0.5540668433035396, "learning_rate": 4.3399837793998383e-05, "loss": 0.6099, "step": 7495 }, { "epoch": 0.21885492394382647, "grad_norm": 0.5960958677487961, "learning_rate": 4.339821573398216e-05, "loss": 0.6417, "step": 7496 }, { "epoch": 0.21888412017167383, "grad_norm": 0.57685360951407, "learning_rate": 4.339659367396594e-05, "loss": 0.6645, "step": 7497 }, { "epoch": 0.2189133163995212, "grad_norm": 0.5775264763196075, "learning_rate": 4.339497161394972e-05, "loss": 0.6498, "step": 7498 }, { "epoch": 0.21894251262736855, "grad_norm": 0.5246203291789368, "learning_rate": 4.33933495539335e-05, "loss": 0.6041, "step": 7499 }, { "epoch": 0.2189717088552159, "grad_norm": 0.5878296291118211, "learning_rate": 4.339172749391728e-05, "loss": 0.6268, "step": 7500 }, { "epoch": 0.21900090508306327, "grad_norm": 0.5488720310005053, "learning_rate": 4.339010543390106e-05, "loss": 0.6355, "step": 7501 }, { "epoch": 0.21903010131091064, "grad_norm": 0.7157646241734115, "learning_rate": 4.3388483373884835e-05, "loss": 0.645, "step": 7502 }, { "epoch": 0.219059297538758, "grad_norm": 0.6007498110719289, "learning_rate": 4.338686131386862e-05, "loss": 0.6674, "step": 7503 }, { "epoch": 0.21908849376660536, "grad_norm": 0.57431040777447, "learning_rate": 4.338523925385239e-05, "loss": 0.6707, "step": 7504 }, { "epoch": 0.21911768999445272, "grad_norm": 0.5300104408022049, "learning_rate": 4.3383617193836174e-05, "loss": 0.5869, "step": 7505 }, { "epoch": 0.21914688622230008, "grad_norm": 0.6071465009335466, "learning_rate": 4.3381995133819956e-05, "loss": 0.6795, "step": 7506 }, { "epoch": 0.21917608245014744, "grad_norm": 0.5521599811572951, "learning_rate": 4.338037307380373e-05, "loss": 0.6027, "step": 7507 }, { "epoch": 0.2192052786779948, "grad_norm": 0.5448688787617055, "learning_rate": 4.337875101378751e-05, "loss": 0.6378, "step": 7508 }, { "epoch": 0.21923447490584216, "grad_norm": 0.5755360930879765, "learning_rate": 4.337712895377129e-05, "loss": 0.6655, "step": 7509 }, { "epoch": 0.21926367113368952, "grad_norm": 0.5855717653688962, "learning_rate": 4.337550689375507e-05, "loss": 0.6689, "step": 7510 }, { "epoch": 0.2192928673615369, "grad_norm": 0.5649936491422796, "learning_rate": 4.337388483373885e-05, "loss": 0.6427, "step": 7511 }, { "epoch": 0.21932206358938425, "grad_norm": 0.6423778277327407, "learning_rate": 4.3372262773722626e-05, "loss": 0.7379, "step": 7512 }, { "epoch": 0.2193512598172316, "grad_norm": 0.6151935808814043, "learning_rate": 4.3370640713706415e-05, "loss": 0.7385, "step": 7513 }, { "epoch": 0.21938045604507897, "grad_norm": 0.5876345721476426, "learning_rate": 4.336901865369019e-05, "loss": 0.7252, "step": 7514 }, { "epoch": 0.21940965227292633, "grad_norm": 0.615378616660098, "learning_rate": 4.336739659367397e-05, "loss": 0.7356, "step": 7515 }, { "epoch": 0.2194388485007737, "grad_norm": 0.7026172070955988, "learning_rate": 4.3365774533657746e-05, "loss": 0.7071, "step": 7516 }, { "epoch": 0.21946804472862105, "grad_norm": 0.5510807048326686, "learning_rate": 4.336415247364153e-05, "loss": 0.6007, "step": 7517 }, { "epoch": 0.21949724095646841, "grad_norm": 0.5323377466553529, "learning_rate": 4.336253041362531e-05, "loss": 0.5695, "step": 7518 }, { "epoch": 0.21952643718431578, "grad_norm": 0.5953392468430163, "learning_rate": 4.3360908353609085e-05, "loss": 0.6398, "step": 7519 }, { "epoch": 0.21955563341216314, "grad_norm": 0.6245671357357863, "learning_rate": 4.3359286293592867e-05, "loss": 0.7503, "step": 7520 }, { "epoch": 0.2195848296400105, "grad_norm": 0.5502709232000895, "learning_rate": 4.335766423357664e-05, "loss": 0.6923, "step": 7521 }, { "epoch": 0.21961402586785786, "grad_norm": 0.5560164605186495, "learning_rate": 4.335604217356042e-05, "loss": 0.6125, "step": 7522 }, { "epoch": 0.21964322209570525, "grad_norm": 0.550011245806123, "learning_rate": 4.3354420113544205e-05, "loss": 0.6438, "step": 7523 }, { "epoch": 0.2196724183235526, "grad_norm": 0.5631875949116438, "learning_rate": 4.335279805352798e-05, "loss": 0.6382, "step": 7524 }, { "epoch": 0.21970161455139997, "grad_norm": 0.6249399700557122, "learning_rate": 4.335117599351176e-05, "loss": 0.73, "step": 7525 }, { "epoch": 0.21973081077924733, "grad_norm": 0.5708398767896383, "learning_rate": 4.3349553933495544e-05, "loss": 0.6462, "step": 7526 }, { "epoch": 0.2197600070070947, "grad_norm": 0.5340823159833031, "learning_rate": 4.334793187347932e-05, "loss": 0.6216, "step": 7527 }, { "epoch": 0.21978920323494205, "grad_norm": 0.5664542168234614, "learning_rate": 4.33463098134631e-05, "loss": 0.581, "step": 7528 }, { "epoch": 0.21981839946278942, "grad_norm": 0.5701806990714945, "learning_rate": 4.3344687753446875e-05, "loss": 0.6663, "step": 7529 }, { "epoch": 0.21984759569063678, "grad_norm": 0.5407890627587766, "learning_rate": 4.334306569343066e-05, "loss": 0.5957, "step": 7530 }, { "epoch": 0.21987679191848414, "grad_norm": 0.5743116890215328, "learning_rate": 4.334144363341444e-05, "loss": 0.7025, "step": 7531 }, { "epoch": 0.2199059881463315, "grad_norm": 0.6045844865292151, "learning_rate": 4.333982157339822e-05, "loss": 0.7272, "step": 7532 }, { "epoch": 0.21993518437417886, "grad_norm": 0.5688021670812546, "learning_rate": 4.3338199513382e-05, "loss": 0.6601, "step": 7533 }, { "epoch": 0.21996438060202622, "grad_norm": 0.6024512666119095, "learning_rate": 4.333657745336578e-05, "loss": 0.6368, "step": 7534 }, { "epoch": 0.21999357682987358, "grad_norm": 0.5735508530717759, "learning_rate": 4.333495539334956e-05, "loss": 0.6386, "step": 7535 }, { "epoch": 0.22002277305772094, "grad_norm": 0.5070000605168623, "learning_rate": 4.3333333333333334e-05, "loss": 0.5357, "step": 7536 }, { "epoch": 0.2200519692855683, "grad_norm": 0.4958036325432106, "learning_rate": 4.3331711273317116e-05, "loss": 0.5189, "step": 7537 }, { "epoch": 0.22008116551341567, "grad_norm": 0.5489788615996221, "learning_rate": 4.33300892133009e-05, "loss": 0.6357, "step": 7538 }, { "epoch": 0.22011036174126303, "grad_norm": 0.5481783131788571, "learning_rate": 4.332846715328467e-05, "loss": 0.608, "step": 7539 }, { "epoch": 0.2201395579691104, "grad_norm": 0.5833221293720563, "learning_rate": 4.3326845093268454e-05, "loss": 0.6554, "step": 7540 }, { "epoch": 0.22016875419695775, "grad_norm": 0.5284987926122797, "learning_rate": 4.332522303325223e-05, "loss": 0.5973, "step": 7541 }, { "epoch": 0.2201979504248051, "grad_norm": 0.6109427814397342, "learning_rate": 4.332360097323601e-05, "loss": 0.6442, "step": 7542 }, { "epoch": 0.22022714665265247, "grad_norm": 0.5671785173744475, "learning_rate": 4.332197891321979e-05, "loss": 0.5659, "step": 7543 }, { "epoch": 0.22025634288049983, "grad_norm": 0.6498552686556589, "learning_rate": 4.332035685320357e-05, "loss": 0.6942, "step": 7544 }, { "epoch": 0.2202855391083472, "grad_norm": 0.5967651006678192, "learning_rate": 4.331873479318735e-05, "loss": 0.6687, "step": 7545 }, { "epoch": 0.22031473533619456, "grad_norm": 0.6309874475264279, "learning_rate": 4.3317112733171125e-05, "loss": 0.7826, "step": 7546 }, { "epoch": 0.22034393156404192, "grad_norm": 0.5978217244617622, "learning_rate": 4.3315490673154906e-05, "loss": 0.6505, "step": 7547 }, { "epoch": 0.22037312779188928, "grad_norm": 0.5589567021125788, "learning_rate": 4.331386861313869e-05, "loss": 0.6933, "step": 7548 }, { "epoch": 0.22040232401973664, "grad_norm": 0.5921331279378429, "learning_rate": 4.331224655312246e-05, "loss": 0.6481, "step": 7549 }, { "epoch": 0.220431520247584, "grad_norm": 0.6159927043995201, "learning_rate": 4.3310624493106245e-05, "loss": 0.742, "step": 7550 }, { "epoch": 0.22046071647543136, "grad_norm": 0.5494746482197522, "learning_rate": 4.3309002433090027e-05, "loss": 0.6247, "step": 7551 }, { "epoch": 0.22048991270327872, "grad_norm": 0.4889539037233573, "learning_rate": 4.330738037307381e-05, "loss": 0.544, "step": 7552 }, { "epoch": 0.2205191089311261, "grad_norm": 0.5477087938533224, "learning_rate": 4.330575831305759e-05, "loss": 0.6402, "step": 7553 }, { "epoch": 0.22054830515897347, "grad_norm": 0.5811545645477716, "learning_rate": 4.3304136253041365e-05, "loss": 0.6367, "step": 7554 }, { "epoch": 0.22057750138682083, "grad_norm": 0.5600878173956078, "learning_rate": 4.330251419302515e-05, "loss": 0.6158, "step": 7555 }, { "epoch": 0.2206066976146682, "grad_norm": 0.7301923642308392, "learning_rate": 4.330089213300892e-05, "loss": 0.7444, "step": 7556 }, { "epoch": 0.22063589384251556, "grad_norm": 0.5603802946730192, "learning_rate": 4.3299270072992704e-05, "loss": 0.652, "step": 7557 }, { "epoch": 0.22066509007036292, "grad_norm": 0.547626581854312, "learning_rate": 4.3297648012976485e-05, "loss": 0.6247, "step": 7558 }, { "epoch": 0.22069428629821028, "grad_norm": 0.6020195972594158, "learning_rate": 4.329602595296026e-05, "loss": 0.6826, "step": 7559 }, { "epoch": 0.22072348252605764, "grad_norm": 0.7121356566070943, "learning_rate": 4.329440389294404e-05, "loss": 0.7142, "step": 7560 }, { "epoch": 0.220752678753905, "grad_norm": 0.6089814546967057, "learning_rate": 4.329278183292782e-05, "loss": 0.7211, "step": 7561 }, { "epoch": 0.22078187498175236, "grad_norm": 0.5484582572204308, "learning_rate": 4.32911597729116e-05, "loss": 0.637, "step": 7562 }, { "epoch": 0.22081107120959972, "grad_norm": 0.5717645091266378, "learning_rate": 4.328953771289538e-05, "loss": 0.6962, "step": 7563 }, { "epoch": 0.22084026743744709, "grad_norm": 0.6102405958724739, "learning_rate": 4.3287915652879156e-05, "loss": 0.6747, "step": 7564 }, { "epoch": 0.22086946366529445, "grad_norm": 0.5561801757831469, "learning_rate": 4.328629359286294e-05, "loss": 0.6811, "step": 7565 }, { "epoch": 0.2208986598931418, "grad_norm": 0.6015220909950265, "learning_rate": 4.328467153284671e-05, "loss": 0.6738, "step": 7566 }, { "epoch": 0.22092785612098917, "grad_norm": 0.5536786869302318, "learning_rate": 4.3283049472830494e-05, "loss": 0.6383, "step": 7567 }, { "epoch": 0.22095705234883653, "grad_norm": 0.5474719138683335, "learning_rate": 4.3281427412814276e-05, "loss": 0.6472, "step": 7568 }, { "epoch": 0.2209862485766839, "grad_norm": 0.622956098144502, "learning_rate": 4.327980535279805e-05, "loss": 0.702, "step": 7569 }, { "epoch": 0.22101544480453125, "grad_norm": 0.5622834877675962, "learning_rate": 4.327818329278184e-05, "loss": 0.6716, "step": 7570 }, { "epoch": 0.2210446410323786, "grad_norm": 0.5242789700828303, "learning_rate": 4.3276561232765614e-05, "loss": 0.5512, "step": 7571 }, { "epoch": 0.22107383726022597, "grad_norm": 0.5686339076179817, "learning_rate": 4.3274939172749396e-05, "loss": 0.6691, "step": 7572 }, { "epoch": 0.22110303348807334, "grad_norm": 0.6068812202450734, "learning_rate": 4.327331711273318e-05, "loss": 0.7472, "step": 7573 }, { "epoch": 0.2211322297159207, "grad_norm": 0.5708737529733351, "learning_rate": 4.327169505271695e-05, "loss": 0.6608, "step": 7574 }, { "epoch": 0.22116142594376806, "grad_norm": 0.5843079641768446, "learning_rate": 4.3270072992700735e-05, "loss": 0.7146, "step": 7575 }, { "epoch": 0.22119062217161542, "grad_norm": 0.5458940339170768, "learning_rate": 4.326845093268451e-05, "loss": 0.6444, "step": 7576 }, { "epoch": 0.22121981839946278, "grad_norm": 0.5938588219308139, "learning_rate": 4.326682887266829e-05, "loss": 0.605, "step": 7577 }, { "epoch": 0.22124901462731014, "grad_norm": 0.5564194417642162, "learning_rate": 4.326520681265207e-05, "loss": 0.6622, "step": 7578 }, { "epoch": 0.2212782108551575, "grad_norm": 0.6402939752080326, "learning_rate": 4.326358475263585e-05, "loss": 0.7642, "step": 7579 }, { "epoch": 0.22130740708300486, "grad_norm": 0.569539047917829, "learning_rate": 4.326196269261963e-05, "loss": 0.6633, "step": 7580 }, { "epoch": 0.22133660331085223, "grad_norm": 0.609430411666854, "learning_rate": 4.3260340632603405e-05, "loss": 0.6773, "step": 7581 }, { "epoch": 0.2213657995386996, "grad_norm": 0.5757181936641278, "learning_rate": 4.325871857258719e-05, "loss": 0.6671, "step": 7582 }, { "epoch": 0.22139499576654698, "grad_norm": 0.5703091828784501, "learning_rate": 4.325709651257097e-05, "loss": 0.7337, "step": 7583 }, { "epoch": 0.22142419199439434, "grad_norm": 0.5598836213046613, "learning_rate": 4.3255474452554743e-05, "loss": 0.6864, "step": 7584 }, { "epoch": 0.2214533882222417, "grad_norm": 0.615136807803768, "learning_rate": 4.3253852392538525e-05, "loss": 0.7188, "step": 7585 }, { "epoch": 0.22148258445008906, "grad_norm": 0.5481600656193514, "learning_rate": 4.32522303325223e-05, "loss": 0.6305, "step": 7586 }, { "epoch": 0.22151178067793642, "grad_norm": 0.6028550288051334, "learning_rate": 4.325060827250608e-05, "loss": 0.7346, "step": 7587 }, { "epoch": 0.22154097690578378, "grad_norm": 0.5727076460731245, "learning_rate": 4.3248986212489864e-05, "loss": 0.6982, "step": 7588 }, { "epoch": 0.22157017313363114, "grad_norm": 0.583065485869921, "learning_rate": 4.3247364152473645e-05, "loss": 0.6842, "step": 7589 }, { "epoch": 0.2215993693614785, "grad_norm": 0.5428644631696365, "learning_rate": 4.324574209245743e-05, "loss": 0.6448, "step": 7590 }, { "epoch": 0.22162856558932587, "grad_norm": 0.5427222254266413, "learning_rate": 4.32441200324412e-05, "loss": 0.6293, "step": 7591 }, { "epoch": 0.22165776181717323, "grad_norm": 0.5549964181558908, "learning_rate": 4.3242497972424984e-05, "loss": 0.642, "step": 7592 }, { "epoch": 0.2216869580450206, "grad_norm": 0.6294336064040122, "learning_rate": 4.3240875912408766e-05, "loss": 0.7268, "step": 7593 }, { "epoch": 0.22171615427286795, "grad_norm": 0.6109144551349416, "learning_rate": 4.323925385239254e-05, "loss": 0.7221, "step": 7594 }, { "epoch": 0.2217453505007153, "grad_norm": 0.6455318472740965, "learning_rate": 4.323763179237632e-05, "loss": 0.7953, "step": 7595 }, { "epoch": 0.22177454672856267, "grad_norm": 0.5602220460396707, "learning_rate": 4.32360097323601e-05, "loss": 0.6492, "step": 7596 }, { "epoch": 0.22180374295641003, "grad_norm": 0.5619733554058521, "learning_rate": 4.323438767234388e-05, "loss": 0.6002, "step": 7597 }, { "epoch": 0.2218329391842574, "grad_norm": 0.5840065612772255, "learning_rate": 4.323276561232766e-05, "loss": 0.6792, "step": 7598 }, { "epoch": 0.22186213541210476, "grad_norm": 0.6146182215699478, "learning_rate": 4.3231143552311436e-05, "loss": 0.7552, "step": 7599 }, { "epoch": 0.22189133163995212, "grad_norm": 0.5386701047678262, "learning_rate": 4.322952149229522e-05, "loss": 0.6041, "step": 7600 }, { "epoch": 0.22192052786779948, "grad_norm": 0.5696091626812455, "learning_rate": 4.322789943227899e-05, "loss": 0.6441, "step": 7601 }, { "epoch": 0.22194972409564684, "grad_norm": 0.5589484460977114, "learning_rate": 4.3226277372262774e-05, "loss": 0.6253, "step": 7602 }, { "epoch": 0.2219789203234942, "grad_norm": 0.6005438058013163, "learning_rate": 4.3224655312246556e-05, "loss": 0.646, "step": 7603 }, { "epoch": 0.22200811655134156, "grad_norm": 0.556462483537417, "learning_rate": 4.322303325223033e-05, "loss": 0.6153, "step": 7604 }, { "epoch": 0.22203731277918892, "grad_norm": 0.5543931838447262, "learning_rate": 4.322141119221411e-05, "loss": 0.6298, "step": 7605 }, { "epoch": 0.22206650900703628, "grad_norm": 0.5841341051610939, "learning_rate": 4.321978913219789e-05, "loss": 0.6826, "step": 7606 }, { "epoch": 0.22209570523488364, "grad_norm": 0.6415415977362182, "learning_rate": 4.321816707218167e-05, "loss": 0.79, "step": 7607 }, { "epoch": 0.222124901462731, "grad_norm": 0.5632270111363787, "learning_rate": 4.321654501216545e-05, "loss": 0.6506, "step": 7608 }, { "epoch": 0.22215409769057837, "grad_norm": 0.5289568671213076, "learning_rate": 4.321492295214923e-05, "loss": 0.5933, "step": 7609 }, { "epoch": 0.22218329391842573, "grad_norm": 0.5563437367520525, "learning_rate": 4.3213300892133015e-05, "loss": 0.6467, "step": 7610 }, { "epoch": 0.2222124901462731, "grad_norm": 0.5376440863143376, "learning_rate": 4.321167883211679e-05, "loss": 0.5814, "step": 7611 }, { "epoch": 0.22224168637412045, "grad_norm": 0.574880489171682, "learning_rate": 4.321005677210057e-05, "loss": 0.7219, "step": 7612 }, { "epoch": 0.2222708826019678, "grad_norm": 0.5832424827524862, "learning_rate": 4.3208434712084354e-05, "loss": 0.6873, "step": 7613 }, { "epoch": 0.2223000788298152, "grad_norm": 0.6232032027768328, "learning_rate": 4.320681265206813e-05, "loss": 0.7009, "step": 7614 }, { "epoch": 0.22232927505766256, "grad_norm": 0.5540935935458571, "learning_rate": 4.320519059205191e-05, "loss": 0.6694, "step": 7615 }, { "epoch": 0.22235847128550992, "grad_norm": 0.5271787690824858, "learning_rate": 4.3203568532035685e-05, "loss": 0.6393, "step": 7616 }, { "epoch": 0.22238766751335728, "grad_norm": 0.4937890880223569, "learning_rate": 4.320194647201947e-05, "loss": 0.5394, "step": 7617 }, { "epoch": 0.22241686374120465, "grad_norm": 0.5817490897216566, "learning_rate": 4.320032441200325e-05, "loss": 0.6778, "step": 7618 }, { "epoch": 0.222446059969052, "grad_norm": 0.5256819316246018, "learning_rate": 4.3198702351987024e-05, "loss": 0.586, "step": 7619 }, { "epoch": 0.22247525619689937, "grad_norm": 0.5175614353321213, "learning_rate": 4.3197080291970806e-05, "loss": 0.5888, "step": 7620 }, { "epoch": 0.22250445242474673, "grad_norm": 0.5347739589649907, "learning_rate": 4.319545823195458e-05, "loss": 0.6147, "step": 7621 }, { "epoch": 0.2225336486525941, "grad_norm": 0.5587230507266573, "learning_rate": 4.319383617193836e-05, "loss": 0.6955, "step": 7622 }, { "epoch": 0.22256284488044145, "grad_norm": 0.5635826855828251, "learning_rate": 4.3192214111922144e-05, "loss": 0.6667, "step": 7623 }, { "epoch": 0.2225920411082888, "grad_norm": 0.5590233737778997, "learning_rate": 4.319059205190592e-05, "loss": 0.6838, "step": 7624 }, { "epoch": 0.22262123733613617, "grad_norm": 0.5963236916951851, "learning_rate": 4.31889699918897e-05, "loss": 0.7356, "step": 7625 }, { "epoch": 0.22265043356398354, "grad_norm": 0.6808551544804795, "learning_rate": 4.318734793187348e-05, "loss": 0.6543, "step": 7626 }, { "epoch": 0.2226796297918309, "grad_norm": 0.6177523932443333, "learning_rate": 4.3185725871857264e-05, "loss": 0.6823, "step": 7627 }, { "epoch": 0.22270882601967826, "grad_norm": 0.6034722472896774, "learning_rate": 4.318410381184104e-05, "loss": 0.6752, "step": 7628 }, { "epoch": 0.22273802224752562, "grad_norm": 0.61092066119556, "learning_rate": 4.318248175182482e-05, "loss": 0.6446, "step": 7629 }, { "epoch": 0.22276721847537298, "grad_norm": 0.5656396249905127, "learning_rate": 4.31808596918086e-05, "loss": 0.6408, "step": 7630 }, { "epoch": 0.22279641470322034, "grad_norm": 0.5834658804882423, "learning_rate": 4.317923763179238e-05, "loss": 0.6664, "step": 7631 }, { "epoch": 0.2228256109310677, "grad_norm": 0.5797517972482855, "learning_rate": 4.317761557177616e-05, "loss": 0.7144, "step": 7632 }, { "epoch": 0.22285480715891506, "grad_norm": 0.5460072204009683, "learning_rate": 4.3175993511759935e-05, "loss": 0.5993, "step": 7633 }, { "epoch": 0.22288400338676242, "grad_norm": 0.5868051010503692, "learning_rate": 4.3174371451743716e-05, "loss": 0.7113, "step": 7634 }, { "epoch": 0.22291319961460979, "grad_norm": 0.5495001591090769, "learning_rate": 4.31727493917275e-05, "loss": 0.6592, "step": 7635 }, { "epoch": 0.22294239584245715, "grad_norm": 0.7247699308221243, "learning_rate": 4.317112733171127e-05, "loss": 0.8827, "step": 7636 }, { "epoch": 0.2229715920703045, "grad_norm": 0.5925577696048444, "learning_rate": 4.3169505271695055e-05, "loss": 0.6901, "step": 7637 }, { "epoch": 0.22300078829815187, "grad_norm": 0.6151011347257287, "learning_rate": 4.316788321167884e-05, "loss": 0.7971, "step": 7638 }, { "epoch": 0.22302998452599923, "grad_norm": 0.5715172861223735, "learning_rate": 4.316626115166261e-05, "loss": 0.7162, "step": 7639 }, { "epoch": 0.2230591807538466, "grad_norm": 0.5813787494305367, "learning_rate": 4.316463909164639e-05, "loss": 0.6777, "step": 7640 }, { "epoch": 0.22308837698169395, "grad_norm": 0.5144755761819381, "learning_rate": 4.316301703163017e-05, "loss": 0.6051, "step": 7641 }, { "epoch": 0.22311757320954131, "grad_norm": 0.5742574056989951, "learning_rate": 4.316139497161395e-05, "loss": 0.6424, "step": 7642 }, { "epoch": 0.22314676943738868, "grad_norm": 0.5695401574059537, "learning_rate": 4.315977291159773e-05, "loss": 0.6631, "step": 7643 }, { "epoch": 0.22317596566523606, "grad_norm": 0.5468406402603178, "learning_rate": 4.315815085158151e-05, "loss": 0.6072, "step": 7644 }, { "epoch": 0.22320516189308343, "grad_norm": 0.7114551973176655, "learning_rate": 4.3156528791565295e-05, "loss": 0.7476, "step": 7645 }, { "epoch": 0.2232343581209308, "grad_norm": 0.5726552683627987, "learning_rate": 4.315490673154907e-05, "loss": 0.6778, "step": 7646 }, { "epoch": 0.22326355434877815, "grad_norm": 0.5502839747686804, "learning_rate": 4.315328467153285e-05, "loss": 0.6401, "step": 7647 }, { "epoch": 0.2232927505766255, "grad_norm": 0.5386902033401241, "learning_rate": 4.315166261151663e-05, "loss": 0.5878, "step": 7648 }, { "epoch": 0.22332194680447287, "grad_norm": 0.5955272484506507, "learning_rate": 4.315004055150041e-05, "loss": 0.639, "step": 7649 }, { "epoch": 0.22335114303232023, "grad_norm": 0.5931496913838605, "learning_rate": 4.314841849148419e-05, "loss": 0.6005, "step": 7650 }, { "epoch": 0.2233803392601676, "grad_norm": 0.5733332737537981, "learning_rate": 4.3146796431467966e-05, "loss": 0.7095, "step": 7651 }, { "epoch": 0.22340953548801495, "grad_norm": 0.5577852373433594, "learning_rate": 4.314517437145175e-05, "loss": 0.6438, "step": 7652 }, { "epoch": 0.22343873171586232, "grad_norm": 0.5537309807968714, "learning_rate": 4.314355231143552e-05, "loss": 0.6271, "step": 7653 }, { "epoch": 0.22346792794370968, "grad_norm": 0.5859813863510823, "learning_rate": 4.3141930251419304e-05, "loss": 0.6011, "step": 7654 }, { "epoch": 0.22349712417155704, "grad_norm": 0.627986389722684, "learning_rate": 4.3140308191403086e-05, "loss": 0.7049, "step": 7655 }, { "epoch": 0.2235263203994044, "grad_norm": 0.5810228836514331, "learning_rate": 4.313868613138686e-05, "loss": 0.6875, "step": 7656 }, { "epoch": 0.22355551662725176, "grad_norm": 0.531420243328868, "learning_rate": 4.313706407137064e-05, "loss": 0.6132, "step": 7657 }, { "epoch": 0.22358471285509912, "grad_norm": 0.555493717356983, "learning_rate": 4.3135442011354424e-05, "loss": 0.6831, "step": 7658 }, { "epoch": 0.22361390908294648, "grad_norm": 0.5743976411542334, "learning_rate": 4.31338199513382e-05, "loss": 0.6683, "step": 7659 }, { "epoch": 0.22364310531079384, "grad_norm": 0.6443714163958265, "learning_rate": 4.313219789132198e-05, "loss": 0.7841, "step": 7660 }, { "epoch": 0.2236723015386412, "grad_norm": 0.5554556472372496, "learning_rate": 4.3130575831305756e-05, "loss": 0.6405, "step": 7661 }, { "epoch": 0.22370149776648857, "grad_norm": 0.6184027826637161, "learning_rate": 4.312895377128954e-05, "loss": 0.714, "step": 7662 }, { "epoch": 0.22373069399433593, "grad_norm": 0.5603657403571466, "learning_rate": 4.312733171127332e-05, "loss": 0.6566, "step": 7663 }, { "epoch": 0.2237598902221833, "grad_norm": 0.5889715908227473, "learning_rate": 4.31257096512571e-05, "loss": 0.7231, "step": 7664 }, { "epoch": 0.22378908645003065, "grad_norm": 0.5634765682767778, "learning_rate": 4.312408759124088e-05, "loss": 0.6928, "step": 7665 }, { "epoch": 0.223818282677878, "grad_norm": 0.5474321349197657, "learning_rate": 4.312246553122466e-05, "loss": 0.593, "step": 7666 }, { "epoch": 0.22384747890572537, "grad_norm": 0.6654919677821574, "learning_rate": 4.312084347120844e-05, "loss": 0.8324, "step": 7667 }, { "epoch": 0.22387667513357273, "grad_norm": 0.560620310924917, "learning_rate": 4.3119221411192215e-05, "loss": 0.6299, "step": 7668 }, { "epoch": 0.2239058713614201, "grad_norm": 0.5937793962488894, "learning_rate": 4.3117599351176e-05, "loss": 0.6388, "step": 7669 }, { "epoch": 0.22393506758926746, "grad_norm": 0.5488480983803307, "learning_rate": 4.311597729115978e-05, "loss": 0.6628, "step": 7670 }, { "epoch": 0.22396426381711482, "grad_norm": 0.5567777048330438, "learning_rate": 4.3114355231143553e-05, "loss": 0.6688, "step": 7671 }, { "epoch": 0.22399346004496218, "grad_norm": 0.5185591957230069, "learning_rate": 4.3112733171127335e-05, "loss": 0.5602, "step": 7672 }, { "epoch": 0.22402265627280954, "grad_norm": 0.5581269900898334, "learning_rate": 4.311111111111111e-05, "loss": 0.6386, "step": 7673 }, { "epoch": 0.22405185250065693, "grad_norm": 0.5368050953803605, "learning_rate": 4.310948905109489e-05, "loss": 0.5678, "step": 7674 }, { "epoch": 0.2240810487285043, "grad_norm": 0.5707361052887838, "learning_rate": 4.3107866991078674e-05, "loss": 0.7002, "step": 7675 }, { "epoch": 0.22411024495635165, "grad_norm": 0.5740301254049802, "learning_rate": 4.310624493106245e-05, "loss": 0.6825, "step": 7676 }, { "epoch": 0.224139441184199, "grad_norm": 0.5374942930457244, "learning_rate": 4.310462287104623e-05, "loss": 0.5739, "step": 7677 }, { "epoch": 0.22416863741204637, "grad_norm": 0.5890539315153921, "learning_rate": 4.3103000811030005e-05, "loss": 0.7155, "step": 7678 }, { "epoch": 0.22419783363989373, "grad_norm": 0.5625698210571245, "learning_rate": 4.310137875101379e-05, "loss": 0.6578, "step": 7679 }, { "epoch": 0.2242270298677411, "grad_norm": 0.9016695329099238, "learning_rate": 4.309975669099757e-05, "loss": 0.6618, "step": 7680 }, { "epoch": 0.22425622609558846, "grad_norm": 0.5692685482510352, "learning_rate": 4.3098134630981344e-05, "loss": 0.6768, "step": 7681 }, { "epoch": 0.22428542232343582, "grad_norm": 0.5478219365962673, "learning_rate": 4.3096512570965126e-05, "loss": 0.628, "step": 7682 }, { "epoch": 0.22431461855128318, "grad_norm": 0.5528802700280483, "learning_rate": 4.309489051094891e-05, "loss": 0.6372, "step": 7683 }, { "epoch": 0.22434381477913054, "grad_norm": 0.5681202332389316, "learning_rate": 4.309326845093269e-05, "loss": 0.6559, "step": 7684 }, { "epoch": 0.2243730110069779, "grad_norm": 0.5773163135026146, "learning_rate": 4.309164639091647e-05, "loss": 0.6414, "step": 7685 }, { "epoch": 0.22440220723482526, "grad_norm": 0.5352971063363232, "learning_rate": 4.3090024330900246e-05, "loss": 0.628, "step": 7686 }, { "epoch": 0.22443140346267262, "grad_norm": 0.6003799533932905, "learning_rate": 4.308840227088403e-05, "loss": 0.7344, "step": 7687 }, { "epoch": 0.22446059969051999, "grad_norm": 0.5795000566453893, "learning_rate": 4.30867802108678e-05, "loss": 0.6798, "step": 7688 }, { "epoch": 0.22448979591836735, "grad_norm": 0.5920188090309347, "learning_rate": 4.3085158150851585e-05, "loss": 0.7237, "step": 7689 }, { "epoch": 0.2245189921462147, "grad_norm": 0.7294764591927765, "learning_rate": 4.3083536090835366e-05, "loss": 0.7396, "step": 7690 }, { "epoch": 0.22454818837406207, "grad_norm": 0.539131464629863, "learning_rate": 4.308191403081914e-05, "loss": 0.6659, "step": 7691 }, { "epoch": 0.22457738460190943, "grad_norm": 0.5618547256156748, "learning_rate": 4.308029197080292e-05, "loss": 0.6386, "step": 7692 }, { "epoch": 0.2246065808297568, "grad_norm": 0.5432120807266518, "learning_rate": 4.30786699107867e-05, "loss": 0.5582, "step": 7693 }, { "epoch": 0.22463577705760415, "grad_norm": 0.6818331201776772, "learning_rate": 4.307704785077048e-05, "loss": 0.6781, "step": 7694 }, { "epoch": 0.2246649732854515, "grad_norm": 0.5313956743048132, "learning_rate": 4.307542579075426e-05, "loss": 0.5592, "step": 7695 }, { "epoch": 0.22469416951329887, "grad_norm": 0.577384665248545, "learning_rate": 4.3073803730738037e-05, "loss": 0.6575, "step": 7696 }, { "epoch": 0.22472336574114624, "grad_norm": 0.5413427103201016, "learning_rate": 4.307218167072182e-05, "loss": 0.5923, "step": 7697 }, { "epoch": 0.2247525619689936, "grad_norm": 0.6248703038039181, "learning_rate": 4.307055961070559e-05, "loss": 0.63, "step": 7698 }, { "epoch": 0.22478175819684096, "grad_norm": 0.6233760422312741, "learning_rate": 4.3068937550689375e-05, "loss": 0.7504, "step": 7699 }, { "epoch": 0.22481095442468832, "grad_norm": 0.5526055006784996, "learning_rate": 4.306731549067316e-05, "loss": 0.6378, "step": 7700 }, { "epoch": 0.22484015065253568, "grad_norm": 0.5709636242145258, "learning_rate": 4.306569343065693e-05, "loss": 0.7044, "step": 7701 }, { "epoch": 0.22486934688038304, "grad_norm": 0.6199785321123698, "learning_rate": 4.306407137064072e-05, "loss": 0.7024, "step": 7702 }, { "epoch": 0.2248985431082304, "grad_norm": 0.591465938445241, "learning_rate": 4.3062449310624495e-05, "loss": 0.6512, "step": 7703 }, { "epoch": 0.2249277393360778, "grad_norm": 0.5669480904747304, "learning_rate": 4.306082725060828e-05, "loss": 0.6743, "step": 7704 }, { "epoch": 0.22495693556392515, "grad_norm": 0.6123961777872675, "learning_rate": 4.305920519059206e-05, "loss": 0.6873, "step": 7705 }, { "epoch": 0.22498613179177251, "grad_norm": 0.6062976486726591, "learning_rate": 4.3057583130575834e-05, "loss": 0.7222, "step": 7706 }, { "epoch": 0.22501532801961988, "grad_norm": 0.5948597736119173, "learning_rate": 4.3055961070559616e-05, "loss": 0.6994, "step": 7707 }, { "epoch": 0.22504452424746724, "grad_norm": 0.5746335057919837, "learning_rate": 4.305433901054339e-05, "loss": 0.6217, "step": 7708 }, { "epoch": 0.2250737204753146, "grad_norm": 0.5528419740366456, "learning_rate": 4.305271695052717e-05, "loss": 0.6565, "step": 7709 }, { "epoch": 0.22510291670316196, "grad_norm": 0.5686325855240767, "learning_rate": 4.3051094890510954e-05, "loss": 0.7094, "step": 7710 }, { "epoch": 0.22513211293100932, "grad_norm": 0.5330813350812683, "learning_rate": 4.304947283049473e-05, "loss": 0.6185, "step": 7711 }, { "epoch": 0.22516130915885668, "grad_norm": 0.5512867918192886, "learning_rate": 4.304785077047851e-05, "loss": 0.5919, "step": 7712 }, { "epoch": 0.22519050538670404, "grad_norm": 0.5748607196090222, "learning_rate": 4.3046228710462286e-05, "loss": 0.6493, "step": 7713 }, { "epoch": 0.2252197016145514, "grad_norm": 0.560775368537426, "learning_rate": 4.304460665044607e-05, "loss": 0.6434, "step": 7714 }, { "epoch": 0.22524889784239877, "grad_norm": 0.6166556310254045, "learning_rate": 4.304298459042985e-05, "loss": 0.7182, "step": 7715 }, { "epoch": 0.22527809407024613, "grad_norm": 0.5542835900040121, "learning_rate": 4.3041362530413624e-05, "loss": 0.5985, "step": 7716 }, { "epoch": 0.2253072902980935, "grad_norm": 0.597377056863269, "learning_rate": 4.3039740470397406e-05, "loss": 0.741, "step": 7717 }, { "epoch": 0.22533648652594085, "grad_norm": 0.5952993807557513, "learning_rate": 4.303811841038118e-05, "loss": 0.7202, "step": 7718 }, { "epoch": 0.2253656827537882, "grad_norm": 0.5984550596984587, "learning_rate": 4.303649635036496e-05, "loss": 0.6719, "step": 7719 }, { "epoch": 0.22539487898163557, "grad_norm": 0.5726943232726873, "learning_rate": 4.3034874290348745e-05, "loss": 0.6909, "step": 7720 }, { "epoch": 0.22542407520948293, "grad_norm": 0.588036992465429, "learning_rate": 4.3033252230332526e-05, "loss": 0.6617, "step": 7721 }, { "epoch": 0.2254532714373303, "grad_norm": 0.6309475784377861, "learning_rate": 4.303163017031631e-05, "loss": 0.7587, "step": 7722 }, { "epoch": 0.22548246766517765, "grad_norm": 0.5545481397156239, "learning_rate": 4.303000811030008e-05, "loss": 0.6272, "step": 7723 }, { "epoch": 0.22551166389302502, "grad_norm": 0.5778821030643448, "learning_rate": 4.3028386050283865e-05, "loss": 0.6709, "step": 7724 }, { "epoch": 0.22554086012087238, "grad_norm": 0.5639346585115914, "learning_rate": 4.302676399026765e-05, "loss": 0.658, "step": 7725 }, { "epoch": 0.22557005634871974, "grad_norm": 0.6002271102952615, "learning_rate": 4.302514193025142e-05, "loss": 0.646, "step": 7726 }, { "epoch": 0.2255992525765671, "grad_norm": 0.5012793650941272, "learning_rate": 4.30235198702352e-05, "loss": 0.5378, "step": 7727 }, { "epoch": 0.22562844880441446, "grad_norm": 0.5451202563030029, "learning_rate": 4.302189781021898e-05, "loss": 0.5892, "step": 7728 }, { "epoch": 0.22565764503226182, "grad_norm": 0.5593051213790419, "learning_rate": 4.302027575020276e-05, "loss": 0.6528, "step": 7729 }, { "epoch": 0.22568684126010918, "grad_norm": 0.5445889523724405, "learning_rate": 4.301865369018654e-05, "loss": 0.6469, "step": 7730 }, { "epoch": 0.22571603748795654, "grad_norm": 0.5718960729613752, "learning_rate": 4.301703163017032e-05, "loss": 0.6567, "step": 7731 }, { "epoch": 0.2257452337158039, "grad_norm": 0.560244828511294, "learning_rate": 4.30154095701541e-05, "loss": 0.5934, "step": 7732 }, { "epoch": 0.22577442994365127, "grad_norm": 0.5905013344989701, "learning_rate": 4.3013787510137874e-05, "loss": 0.6548, "step": 7733 }, { "epoch": 0.22580362617149866, "grad_norm": 0.5559683599468429, "learning_rate": 4.3012165450121655e-05, "loss": 0.6366, "step": 7734 }, { "epoch": 0.22583282239934602, "grad_norm": 0.5689153913913751, "learning_rate": 4.301054339010544e-05, "loss": 0.6554, "step": 7735 }, { "epoch": 0.22586201862719338, "grad_norm": 0.5536301696094446, "learning_rate": 4.300892133008921e-05, "loss": 0.6761, "step": 7736 }, { "epoch": 0.22589121485504074, "grad_norm": 0.5434156196666609, "learning_rate": 4.3007299270072994e-05, "loss": 0.6158, "step": 7737 }, { "epoch": 0.2259204110828881, "grad_norm": 0.5873532712361155, "learning_rate": 4.300567721005677e-05, "loss": 0.6266, "step": 7738 }, { "epoch": 0.22594960731073546, "grad_norm": 0.552516861619675, "learning_rate": 4.300405515004055e-05, "loss": 0.6511, "step": 7739 }, { "epoch": 0.22597880353858282, "grad_norm": 0.5636645768863713, "learning_rate": 4.300243309002433e-05, "loss": 0.6202, "step": 7740 }, { "epoch": 0.22600799976643018, "grad_norm": 0.5870021180599893, "learning_rate": 4.3000811030008114e-05, "loss": 0.6748, "step": 7741 }, { "epoch": 0.22603719599427755, "grad_norm": 0.5705938812066115, "learning_rate": 4.2999188969991896e-05, "loss": 0.596, "step": 7742 }, { "epoch": 0.2260663922221249, "grad_norm": 0.6209504351940407, "learning_rate": 4.299756690997567e-05, "loss": 0.648, "step": 7743 }, { "epoch": 0.22609558844997227, "grad_norm": 0.6615021815939089, "learning_rate": 4.299594484995945e-05, "loss": 0.7159, "step": 7744 }, { "epoch": 0.22612478467781963, "grad_norm": 0.6275001672423043, "learning_rate": 4.299432278994323e-05, "loss": 0.7072, "step": 7745 }, { "epoch": 0.226153980905667, "grad_norm": 0.6345532985474011, "learning_rate": 4.299270072992701e-05, "loss": 0.7101, "step": 7746 }, { "epoch": 0.22618317713351435, "grad_norm": 0.6439967153352857, "learning_rate": 4.299107866991079e-05, "loss": 0.7636, "step": 7747 }, { "epoch": 0.2262123733613617, "grad_norm": 0.5753695645644368, "learning_rate": 4.2989456609894566e-05, "loss": 0.6038, "step": 7748 }, { "epoch": 0.22624156958920907, "grad_norm": 0.5490045957834848, "learning_rate": 4.298783454987835e-05, "loss": 0.6057, "step": 7749 }, { "epoch": 0.22627076581705644, "grad_norm": 1.0665247350014322, "learning_rate": 4.298621248986213e-05, "loss": 0.7191, "step": 7750 }, { "epoch": 0.2262999620449038, "grad_norm": 0.5840489174842466, "learning_rate": 4.2984590429845905e-05, "loss": 0.7296, "step": 7751 }, { "epoch": 0.22632915827275116, "grad_norm": 0.5678282250310343, "learning_rate": 4.2982968369829686e-05, "loss": 0.6338, "step": 7752 }, { "epoch": 0.22635835450059852, "grad_norm": 0.6222224118816573, "learning_rate": 4.298134630981346e-05, "loss": 0.6879, "step": 7753 }, { "epoch": 0.22638755072844588, "grad_norm": 0.609756090079003, "learning_rate": 4.297972424979724e-05, "loss": 0.691, "step": 7754 }, { "epoch": 0.22641674695629324, "grad_norm": 0.5365946117714419, "learning_rate": 4.2978102189781025e-05, "loss": 0.6212, "step": 7755 }, { "epoch": 0.2264459431841406, "grad_norm": 0.5755503979102077, "learning_rate": 4.29764801297648e-05, "loss": 0.6313, "step": 7756 }, { "epoch": 0.22647513941198796, "grad_norm": 0.5269268854359181, "learning_rate": 4.297485806974858e-05, "loss": 0.6139, "step": 7757 }, { "epoch": 0.22650433563983532, "grad_norm": 0.5494159199119942, "learning_rate": 4.297323600973236e-05, "loss": 0.6387, "step": 7758 }, { "epoch": 0.22653353186768269, "grad_norm": 0.6152666080672542, "learning_rate": 4.2971613949716145e-05, "loss": 0.7358, "step": 7759 }, { "epoch": 0.22656272809553005, "grad_norm": 0.5711922864891983, "learning_rate": 4.296999188969992e-05, "loss": 0.662, "step": 7760 }, { "epoch": 0.2265919243233774, "grad_norm": 0.5582829969302727, "learning_rate": 4.29683698296837e-05, "loss": 0.6437, "step": 7761 }, { "epoch": 0.22662112055122477, "grad_norm": 0.590325052435645, "learning_rate": 4.2966747769667484e-05, "loss": 0.7265, "step": 7762 }, { "epoch": 0.22665031677907213, "grad_norm": 0.5675537264752355, "learning_rate": 4.296512570965126e-05, "loss": 0.6396, "step": 7763 }, { "epoch": 0.22667951300691952, "grad_norm": 0.5652108260385655, "learning_rate": 4.296350364963504e-05, "loss": 0.6061, "step": 7764 }, { "epoch": 0.22670870923476688, "grad_norm": 0.507845289934292, "learning_rate": 4.2961881589618815e-05, "loss": 0.5684, "step": 7765 }, { "epoch": 0.22673790546261424, "grad_norm": 0.5650778112571635, "learning_rate": 4.29602595296026e-05, "loss": 0.6252, "step": 7766 }, { "epoch": 0.2267671016904616, "grad_norm": 0.5425085163148796, "learning_rate": 4.295863746958638e-05, "loss": 0.6138, "step": 7767 }, { "epoch": 0.22679629791830896, "grad_norm": 0.5820075527408991, "learning_rate": 4.2957015409570154e-05, "loss": 0.6807, "step": 7768 }, { "epoch": 0.22682549414615633, "grad_norm": 0.49554640597071964, "learning_rate": 4.2955393349553936e-05, "loss": 0.5096, "step": 7769 }, { "epoch": 0.2268546903740037, "grad_norm": 0.5953711402984234, "learning_rate": 4.295377128953772e-05, "loss": 0.675, "step": 7770 }, { "epoch": 0.22688388660185105, "grad_norm": 0.5695606402029062, "learning_rate": 4.295214922952149e-05, "loss": 0.6647, "step": 7771 }, { "epoch": 0.2269130828296984, "grad_norm": 0.5561743682702307, "learning_rate": 4.2950527169505274e-05, "loss": 0.6467, "step": 7772 }, { "epoch": 0.22694227905754577, "grad_norm": 0.5306645388730207, "learning_rate": 4.294890510948905e-05, "loss": 0.6282, "step": 7773 }, { "epoch": 0.22697147528539313, "grad_norm": 0.6181615782592875, "learning_rate": 4.294728304947283e-05, "loss": 0.7603, "step": 7774 }, { "epoch": 0.2270006715132405, "grad_norm": 0.5852744133644258, "learning_rate": 4.294566098945661e-05, "loss": 0.6352, "step": 7775 }, { "epoch": 0.22702986774108785, "grad_norm": 0.5896058467011193, "learning_rate": 4.294403892944039e-05, "loss": 0.6385, "step": 7776 }, { "epoch": 0.22705906396893522, "grad_norm": 0.5522674155058307, "learning_rate": 4.2942416869424176e-05, "loss": 0.6028, "step": 7777 }, { "epoch": 0.22708826019678258, "grad_norm": 0.547532751680217, "learning_rate": 4.294079480940795e-05, "loss": 0.628, "step": 7778 }, { "epoch": 0.22711745642462994, "grad_norm": 0.5250173906452735, "learning_rate": 4.293917274939173e-05, "loss": 0.6006, "step": 7779 }, { "epoch": 0.2271466526524773, "grad_norm": 0.5429984322054102, "learning_rate": 4.293755068937551e-05, "loss": 0.6255, "step": 7780 }, { "epoch": 0.22717584888032466, "grad_norm": 0.8621062526978741, "learning_rate": 4.293592862935929e-05, "loss": 0.6814, "step": 7781 }, { "epoch": 0.22720504510817202, "grad_norm": 0.5947778187718396, "learning_rate": 4.293430656934307e-05, "loss": 0.7238, "step": 7782 }, { "epoch": 0.22723424133601938, "grad_norm": 0.5353982376136426, "learning_rate": 4.2932684509326847e-05, "loss": 0.5964, "step": 7783 }, { "epoch": 0.22726343756386674, "grad_norm": 0.53534399136919, "learning_rate": 4.293106244931063e-05, "loss": 0.5664, "step": 7784 }, { "epoch": 0.2272926337917141, "grad_norm": 0.5568580600869424, "learning_rate": 4.29294403892944e-05, "loss": 0.641, "step": 7785 }, { "epoch": 0.22732183001956147, "grad_norm": 0.5397718665966382, "learning_rate": 4.2927818329278185e-05, "loss": 0.6091, "step": 7786 }, { "epoch": 0.22735102624740883, "grad_norm": 0.5123703651317384, "learning_rate": 4.292619626926197e-05, "loss": 0.587, "step": 7787 }, { "epoch": 0.2273802224752562, "grad_norm": 0.5738277446306944, "learning_rate": 4.292457420924574e-05, "loss": 0.6576, "step": 7788 }, { "epoch": 0.22740941870310355, "grad_norm": 0.5772253651754966, "learning_rate": 4.2922952149229524e-05, "loss": 0.6648, "step": 7789 }, { "epoch": 0.2274386149309509, "grad_norm": 0.5593287907385054, "learning_rate": 4.29213300892133e-05, "loss": 0.6502, "step": 7790 }, { "epoch": 0.22746781115879827, "grad_norm": 0.5750882777717585, "learning_rate": 4.291970802919708e-05, "loss": 0.6559, "step": 7791 }, { "epoch": 0.22749700738664563, "grad_norm": 0.6059618390757366, "learning_rate": 4.291808596918086e-05, "loss": 0.6699, "step": 7792 }, { "epoch": 0.227526203614493, "grad_norm": 0.5763659751930887, "learning_rate": 4.291646390916464e-05, "loss": 0.7032, "step": 7793 }, { "epoch": 0.22755539984234038, "grad_norm": 0.5641400271485945, "learning_rate": 4.291484184914842e-05, "loss": 0.6304, "step": 7794 }, { "epoch": 0.22758459607018774, "grad_norm": 0.5655229316384366, "learning_rate": 4.29132197891322e-05, "loss": 0.6428, "step": 7795 }, { "epoch": 0.2276137922980351, "grad_norm": 0.5690303239237874, "learning_rate": 4.291159772911598e-05, "loss": 0.6601, "step": 7796 }, { "epoch": 0.22764298852588247, "grad_norm": 0.6431757138400321, "learning_rate": 4.2909975669099764e-05, "loss": 0.751, "step": 7797 }, { "epoch": 0.22767218475372983, "grad_norm": 0.5611910321254278, "learning_rate": 4.290835360908354e-05, "loss": 0.6485, "step": 7798 }, { "epoch": 0.2277013809815772, "grad_norm": 0.520552342273407, "learning_rate": 4.290673154906732e-05, "loss": 0.6028, "step": 7799 }, { "epoch": 0.22773057720942455, "grad_norm": 0.5843034529372433, "learning_rate": 4.2905109489051096e-05, "loss": 0.6158, "step": 7800 }, { "epoch": 0.2277597734372719, "grad_norm": 0.6259084281882616, "learning_rate": 4.290348742903488e-05, "loss": 0.6183, "step": 7801 }, { "epoch": 0.22778896966511927, "grad_norm": 0.6622361068311875, "learning_rate": 4.290186536901866e-05, "loss": 0.6822, "step": 7802 }, { "epoch": 0.22781816589296663, "grad_norm": 0.5525698179439713, "learning_rate": 4.2900243309002434e-05, "loss": 0.6145, "step": 7803 }, { "epoch": 0.227847362120814, "grad_norm": 0.5523912130414823, "learning_rate": 4.2898621248986216e-05, "loss": 0.67, "step": 7804 }, { "epoch": 0.22787655834866136, "grad_norm": 0.5384290720084871, "learning_rate": 4.289699918896999e-05, "loss": 0.545, "step": 7805 }, { "epoch": 0.22790575457650872, "grad_norm": 0.5729164979057929, "learning_rate": 4.289537712895377e-05, "loss": 0.7057, "step": 7806 }, { "epoch": 0.22793495080435608, "grad_norm": 0.5176125391361741, "learning_rate": 4.2893755068937555e-05, "loss": 0.5764, "step": 7807 }, { "epoch": 0.22796414703220344, "grad_norm": 0.5654754288095459, "learning_rate": 4.289213300892133e-05, "loss": 0.7043, "step": 7808 }, { "epoch": 0.2279933432600508, "grad_norm": 0.5535226552375325, "learning_rate": 4.289051094890511e-05, "loss": 0.6533, "step": 7809 }, { "epoch": 0.22802253948789816, "grad_norm": 0.6330365345887744, "learning_rate": 4.2888888888888886e-05, "loss": 0.6786, "step": 7810 }, { "epoch": 0.22805173571574552, "grad_norm": 0.5849535554524548, "learning_rate": 4.288726682887267e-05, "loss": 0.7002, "step": 7811 }, { "epoch": 0.22808093194359289, "grad_norm": 0.5274942087384566, "learning_rate": 4.288564476885645e-05, "loss": 0.5595, "step": 7812 }, { "epoch": 0.22811012817144025, "grad_norm": 0.5772652557164544, "learning_rate": 4.2884022708840225e-05, "loss": 0.6838, "step": 7813 }, { "epoch": 0.2281393243992876, "grad_norm": 0.6112767410238109, "learning_rate": 4.2882400648824007e-05, "loss": 0.6547, "step": 7814 }, { "epoch": 0.22816852062713497, "grad_norm": 0.5479695355839854, "learning_rate": 4.288077858880779e-05, "loss": 0.6317, "step": 7815 }, { "epoch": 0.22819771685498233, "grad_norm": 0.527971451780363, "learning_rate": 4.287915652879157e-05, "loss": 0.6033, "step": 7816 }, { "epoch": 0.2282269130828297, "grad_norm": 0.5831558393813819, "learning_rate": 4.287753446877535e-05, "loss": 0.6575, "step": 7817 }, { "epoch": 0.22825610931067705, "grad_norm": 0.6083842741851747, "learning_rate": 4.287591240875913e-05, "loss": 0.6947, "step": 7818 }, { "epoch": 0.2282853055385244, "grad_norm": 0.6281196166927361, "learning_rate": 4.287429034874291e-05, "loss": 0.7156, "step": 7819 }, { "epoch": 0.22831450176637177, "grad_norm": 0.52414337295354, "learning_rate": 4.2872668288726684e-05, "loss": 0.6124, "step": 7820 }, { "epoch": 0.22834369799421914, "grad_norm": 0.5560155113211772, "learning_rate": 4.2871046228710465e-05, "loss": 0.6263, "step": 7821 }, { "epoch": 0.2283728942220665, "grad_norm": 0.669343749328765, "learning_rate": 4.286942416869425e-05, "loss": 0.5974, "step": 7822 }, { "epoch": 0.22840209044991386, "grad_norm": 0.5693367491774931, "learning_rate": 4.286780210867802e-05, "loss": 0.6934, "step": 7823 }, { "epoch": 0.22843128667776122, "grad_norm": 0.5248831993235097, "learning_rate": 4.2866180048661804e-05, "loss": 0.6008, "step": 7824 }, { "epoch": 0.2284604829056086, "grad_norm": 0.5442807359182145, "learning_rate": 4.286455798864558e-05, "loss": 0.6383, "step": 7825 }, { "epoch": 0.22848967913345597, "grad_norm": 0.5647812072524975, "learning_rate": 4.286293592862936e-05, "loss": 0.6223, "step": 7826 }, { "epoch": 0.22851887536130333, "grad_norm": 0.4970119341177806, "learning_rate": 4.286131386861314e-05, "loss": 0.5292, "step": 7827 }, { "epoch": 0.2285480715891507, "grad_norm": 0.5086961450365097, "learning_rate": 4.285969180859692e-05, "loss": 0.5173, "step": 7828 }, { "epoch": 0.22857726781699805, "grad_norm": 0.5429759464937214, "learning_rate": 4.28580697485807e-05, "loss": 0.6115, "step": 7829 }, { "epoch": 0.22860646404484541, "grad_norm": 0.5911493319478711, "learning_rate": 4.2856447688564474e-05, "loss": 0.6114, "step": 7830 }, { "epoch": 0.22863566027269278, "grad_norm": 0.561904327888998, "learning_rate": 4.2854825628548256e-05, "loss": 0.6273, "step": 7831 }, { "epoch": 0.22866485650054014, "grad_norm": 0.5989847058004582, "learning_rate": 4.285320356853204e-05, "loss": 0.6865, "step": 7832 }, { "epoch": 0.2286940527283875, "grad_norm": 0.6032025056874817, "learning_rate": 4.285158150851581e-05, "loss": 0.7299, "step": 7833 }, { "epoch": 0.22872324895623486, "grad_norm": 0.6159270298972667, "learning_rate": 4.28499594484996e-05, "loss": 0.7491, "step": 7834 }, { "epoch": 0.22875244518408222, "grad_norm": 0.5622707164589277, "learning_rate": 4.2848337388483376e-05, "loss": 0.6398, "step": 7835 }, { "epoch": 0.22878164141192958, "grad_norm": 0.5636505291560864, "learning_rate": 4.284671532846716e-05, "loss": 0.6388, "step": 7836 }, { "epoch": 0.22881083763977694, "grad_norm": 0.5356191587039457, "learning_rate": 4.284509326845094e-05, "loss": 0.5988, "step": 7837 }, { "epoch": 0.2288400338676243, "grad_norm": 0.6170232518221251, "learning_rate": 4.2843471208434715e-05, "loss": 0.7521, "step": 7838 }, { "epoch": 0.22886923009547167, "grad_norm": 0.5685805695926395, "learning_rate": 4.2841849148418496e-05, "loss": 0.6553, "step": 7839 }, { "epoch": 0.22889842632331903, "grad_norm": 0.5576409851141151, "learning_rate": 4.284022708840227e-05, "loss": 0.6774, "step": 7840 }, { "epoch": 0.2289276225511664, "grad_norm": 0.6173805720160497, "learning_rate": 4.283860502838605e-05, "loss": 0.6932, "step": 7841 }, { "epoch": 0.22895681877901375, "grad_norm": 0.5504955674288391, "learning_rate": 4.2836982968369835e-05, "loss": 0.6203, "step": 7842 }, { "epoch": 0.2289860150068611, "grad_norm": 0.48710810752393796, "learning_rate": 4.283536090835361e-05, "loss": 0.5244, "step": 7843 }, { "epoch": 0.22901521123470847, "grad_norm": 0.5957212660160898, "learning_rate": 4.283373884833739e-05, "loss": 0.7028, "step": 7844 }, { "epoch": 0.22904440746255583, "grad_norm": 0.5191969991656995, "learning_rate": 4.283211678832117e-05, "loss": 0.5706, "step": 7845 }, { "epoch": 0.2290736036904032, "grad_norm": 0.5385368884413957, "learning_rate": 4.283049472830495e-05, "loss": 0.6257, "step": 7846 }, { "epoch": 0.22910279991825055, "grad_norm": 0.5638513935726086, "learning_rate": 4.282887266828873e-05, "loss": 0.6328, "step": 7847 }, { "epoch": 0.22913199614609792, "grad_norm": 0.5687190059229167, "learning_rate": 4.2827250608272505e-05, "loss": 0.6617, "step": 7848 }, { "epoch": 0.22916119237394528, "grad_norm": 3.6332825683063374, "learning_rate": 4.282562854825629e-05, "loss": 0.7373, "step": 7849 }, { "epoch": 0.22919038860179264, "grad_norm": 0.6042065396316783, "learning_rate": 4.282400648824006e-05, "loss": 0.7367, "step": 7850 }, { "epoch": 0.22921958482964, "grad_norm": 0.5673894234335298, "learning_rate": 4.2822384428223844e-05, "loss": 0.614, "step": 7851 }, { "epoch": 0.22924878105748736, "grad_norm": 0.5160370738314263, "learning_rate": 4.2820762368207625e-05, "loss": 0.5728, "step": 7852 }, { "epoch": 0.22927797728533472, "grad_norm": 0.559808177738249, "learning_rate": 4.281914030819141e-05, "loss": 0.6386, "step": 7853 }, { "epoch": 0.22930717351318208, "grad_norm": 0.5886773685828247, "learning_rate": 4.281751824817519e-05, "loss": 0.6615, "step": 7854 }, { "epoch": 0.22933636974102947, "grad_norm": 0.5969845170119618, "learning_rate": 4.2815896188158964e-05, "loss": 0.7043, "step": 7855 }, { "epoch": 0.22936556596887683, "grad_norm": 0.6346135400077192, "learning_rate": 4.2814274128142746e-05, "loss": 0.7733, "step": 7856 }, { "epoch": 0.2293947621967242, "grad_norm": 0.5821433047153134, "learning_rate": 4.281265206812653e-05, "loss": 0.6344, "step": 7857 }, { "epoch": 0.22942395842457156, "grad_norm": 0.6039357130005086, "learning_rate": 4.28110300081103e-05, "loss": 0.6838, "step": 7858 }, { "epoch": 0.22945315465241892, "grad_norm": 0.5395018499615635, "learning_rate": 4.2809407948094084e-05, "loss": 0.5311, "step": 7859 }, { "epoch": 0.22948235088026628, "grad_norm": 0.5571284923043077, "learning_rate": 4.280778588807786e-05, "loss": 0.6441, "step": 7860 }, { "epoch": 0.22951154710811364, "grad_norm": 0.5554558100974677, "learning_rate": 4.280616382806164e-05, "loss": 0.6337, "step": 7861 }, { "epoch": 0.229540743335961, "grad_norm": 0.5773746824487779, "learning_rate": 4.280454176804542e-05, "loss": 0.7027, "step": 7862 }, { "epoch": 0.22956993956380836, "grad_norm": 0.609874810270197, "learning_rate": 4.28029197080292e-05, "loss": 0.7585, "step": 7863 }, { "epoch": 0.22959913579165572, "grad_norm": 0.6052608100315233, "learning_rate": 4.280129764801298e-05, "loss": 0.739, "step": 7864 }, { "epoch": 0.22962833201950308, "grad_norm": 0.5505181439695709, "learning_rate": 4.2799675587996755e-05, "loss": 0.6059, "step": 7865 }, { "epoch": 0.22965752824735045, "grad_norm": 0.5447976950155214, "learning_rate": 4.2798053527980536e-05, "loss": 0.6552, "step": 7866 }, { "epoch": 0.2296867244751978, "grad_norm": 0.5407495443573475, "learning_rate": 4.279643146796432e-05, "loss": 0.6278, "step": 7867 }, { "epoch": 0.22971592070304517, "grad_norm": 0.5682656641137828, "learning_rate": 4.279480940794809e-05, "loss": 0.6882, "step": 7868 }, { "epoch": 0.22974511693089253, "grad_norm": 0.5258675509264648, "learning_rate": 4.2793187347931875e-05, "loss": 0.6151, "step": 7869 }, { "epoch": 0.2297743131587399, "grad_norm": 0.5772761447505721, "learning_rate": 4.279156528791565e-05, "loss": 0.7058, "step": 7870 }, { "epoch": 0.22980350938658725, "grad_norm": 0.5634902467774258, "learning_rate": 4.278994322789943e-05, "loss": 0.6329, "step": 7871 }, { "epoch": 0.2298327056144346, "grad_norm": 0.562569067385632, "learning_rate": 4.278832116788321e-05, "loss": 0.6827, "step": 7872 }, { "epoch": 0.22986190184228197, "grad_norm": 0.5570189767879136, "learning_rate": 4.2786699107866995e-05, "loss": 0.6417, "step": 7873 }, { "epoch": 0.22989109807012933, "grad_norm": 0.5787698274330549, "learning_rate": 4.278507704785078e-05, "loss": 0.6835, "step": 7874 }, { "epoch": 0.2299202942979767, "grad_norm": 0.5788234938926805, "learning_rate": 4.278345498783455e-05, "loss": 0.678, "step": 7875 }, { "epoch": 0.22994949052582406, "grad_norm": 0.5888403271219846, "learning_rate": 4.2781832927818334e-05, "loss": 0.702, "step": 7876 }, { "epoch": 0.22997868675367142, "grad_norm": 0.5307465070571685, "learning_rate": 4.278021086780211e-05, "loss": 0.5885, "step": 7877 }, { "epoch": 0.23000788298151878, "grad_norm": 0.5661535123985125, "learning_rate": 4.277858880778589e-05, "loss": 0.6526, "step": 7878 }, { "epoch": 0.23003707920936614, "grad_norm": 0.5925965527904784, "learning_rate": 4.277696674776967e-05, "loss": 0.7047, "step": 7879 }, { "epoch": 0.2300662754372135, "grad_norm": 0.5802756324757148, "learning_rate": 4.277534468775345e-05, "loss": 0.6836, "step": 7880 }, { "epoch": 0.23009547166506086, "grad_norm": 0.5930920095675113, "learning_rate": 4.277372262773723e-05, "loss": 0.7158, "step": 7881 }, { "epoch": 0.23012466789290822, "grad_norm": 0.6003723619430762, "learning_rate": 4.277210056772101e-05, "loss": 0.739, "step": 7882 }, { "epoch": 0.23015386412075559, "grad_norm": 0.5394771358103684, "learning_rate": 4.2770478507704786e-05, "loss": 0.5825, "step": 7883 }, { "epoch": 0.23018306034860295, "grad_norm": 0.5515579632783392, "learning_rate": 4.276885644768857e-05, "loss": 0.6276, "step": 7884 }, { "epoch": 0.23021225657645034, "grad_norm": 0.5025543700246804, "learning_rate": 4.276723438767234e-05, "loss": 0.5394, "step": 7885 }, { "epoch": 0.2302414528042977, "grad_norm": 0.5619012607050621, "learning_rate": 4.2765612327656124e-05, "loss": 0.6379, "step": 7886 }, { "epoch": 0.23027064903214506, "grad_norm": 0.5573193944843682, "learning_rate": 4.2763990267639906e-05, "loss": 0.6158, "step": 7887 }, { "epoch": 0.23029984525999242, "grad_norm": 0.5727793600853927, "learning_rate": 4.276236820762368e-05, "loss": 0.6676, "step": 7888 }, { "epoch": 0.23032904148783978, "grad_norm": 0.5520651667166053, "learning_rate": 4.276074614760746e-05, "loss": 0.6976, "step": 7889 }, { "epoch": 0.23035823771568714, "grad_norm": 0.5851694052277733, "learning_rate": 4.275912408759124e-05, "loss": 0.6929, "step": 7890 }, { "epoch": 0.2303874339435345, "grad_norm": 0.5989831761164544, "learning_rate": 4.2757502027575026e-05, "loss": 0.7166, "step": 7891 }, { "epoch": 0.23041663017138186, "grad_norm": 0.5582735371335421, "learning_rate": 4.27558799675588e-05, "loss": 0.6661, "step": 7892 }, { "epoch": 0.23044582639922923, "grad_norm": 0.5801017064536825, "learning_rate": 4.275425790754258e-05, "loss": 0.6438, "step": 7893 }, { "epoch": 0.2304750226270766, "grad_norm": 0.6035425267318028, "learning_rate": 4.2752635847526365e-05, "loss": 0.7168, "step": 7894 }, { "epoch": 0.23050421885492395, "grad_norm": 0.5442906719875469, "learning_rate": 4.275101378751014e-05, "loss": 0.6075, "step": 7895 }, { "epoch": 0.2305334150827713, "grad_norm": 0.5338884830719064, "learning_rate": 4.274939172749392e-05, "loss": 0.6172, "step": 7896 }, { "epoch": 0.23056261131061867, "grad_norm": 0.5632229951021152, "learning_rate": 4.2747769667477696e-05, "loss": 0.6015, "step": 7897 }, { "epoch": 0.23059180753846603, "grad_norm": 0.5882601375663822, "learning_rate": 4.274614760746148e-05, "loss": 0.6208, "step": 7898 }, { "epoch": 0.2306210037663134, "grad_norm": 0.7829399645876621, "learning_rate": 4.274452554744526e-05, "loss": 0.801, "step": 7899 }, { "epoch": 0.23065019999416075, "grad_norm": 0.5560864245351624, "learning_rate": 4.2742903487429035e-05, "loss": 0.6495, "step": 7900 }, { "epoch": 0.23067939622200812, "grad_norm": 0.5977724450968411, "learning_rate": 4.274128142741282e-05, "loss": 0.7075, "step": 7901 }, { "epoch": 0.23070859244985548, "grad_norm": 0.5755556214418599, "learning_rate": 4.27396593673966e-05, "loss": 0.6681, "step": 7902 }, { "epoch": 0.23073778867770284, "grad_norm": 0.5621464590627261, "learning_rate": 4.273803730738037e-05, "loss": 0.625, "step": 7903 }, { "epoch": 0.2307669849055502, "grad_norm": 0.5740208172489694, "learning_rate": 4.2736415247364155e-05, "loss": 0.6841, "step": 7904 }, { "epoch": 0.23079618113339756, "grad_norm": 0.631059035979976, "learning_rate": 4.273479318734793e-05, "loss": 0.7933, "step": 7905 }, { "epoch": 0.23082537736124492, "grad_norm": 0.5602296164253051, "learning_rate": 4.273317112733171e-05, "loss": 0.5493, "step": 7906 }, { "epoch": 0.23085457358909228, "grad_norm": 0.55726294378539, "learning_rate": 4.2731549067315494e-05, "loss": 0.6544, "step": 7907 }, { "epoch": 0.23088376981693964, "grad_norm": 0.6064421141459778, "learning_rate": 4.272992700729927e-05, "loss": 0.7215, "step": 7908 }, { "epoch": 0.230912966044787, "grad_norm": 0.5444776359230075, "learning_rate": 4.272830494728305e-05, "loss": 0.6318, "step": 7909 }, { "epoch": 0.23094216227263437, "grad_norm": 0.6194955877704239, "learning_rate": 4.272668288726683e-05, "loss": 0.6915, "step": 7910 }, { "epoch": 0.23097135850048173, "grad_norm": 0.5473156665232413, "learning_rate": 4.2725060827250614e-05, "loss": 0.5974, "step": 7911 }, { "epoch": 0.2310005547283291, "grad_norm": 0.5554950932113192, "learning_rate": 4.272343876723439e-05, "loss": 0.6008, "step": 7912 }, { "epoch": 0.23102975095617645, "grad_norm": 0.6221896530099955, "learning_rate": 4.272181670721817e-05, "loss": 0.6572, "step": 7913 }, { "epoch": 0.2310589471840238, "grad_norm": 0.5523968175813438, "learning_rate": 4.272019464720195e-05, "loss": 0.6253, "step": 7914 }, { "epoch": 0.2310881434118712, "grad_norm": 0.5421681263104821, "learning_rate": 4.271857258718573e-05, "loss": 0.5598, "step": 7915 }, { "epoch": 0.23111733963971856, "grad_norm": 0.5691946527132148, "learning_rate": 4.271695052716951e-05, "loss": 0.7076, "step": 7916 }, { "epoch": 0.23114653586756592, "grad_norm": 0.5931595027741402, "learning_rate": 4.2715328467153284e-05, "loss": 0.7237, "step": 7917 }, { "epoch": 0.23117573209541328, "grad_norm": 0.5703872185875841, "learning_rate": 4.2713706407137066e-05, "loss": 0.6321, "step": 7918 }, { "epoch": 0.23120492832326064, "grad_norm": 0.5304166513790673, "learning_rate": 4.271208434712085e-05, "loss": 0.6042, "step": 7919 }, { "epoch": 0.231234124551108, "grad_norm": 0.5631266239757535, "learning_rate": 4.271046228710462e-05, "loss": 0.6713, "step": 7920 }, { "epoch": 0.23126332077895537, "grad_norm": 0.6467223757642163, "learning_rate": 4.2708840227088404e-05, "loss": 0.7943, "step": 7921 }, { "epoch": 0.23129251700680273, "grad_norm": 0.5953236003594209, "learning_rate": 4.270721816707218e-05, "loss": 0.6969, "step": 7922 }, { "epoch": 0.2313217132346501, "grad_norm": 0.5911796751912453, "learning_rate": 4.270559610705596e-05, "loss": 0.6847, "step": 7923 }, { "epoch": 0.23135090946249745, "grad_norm": 0.5357670848212405, "learning_rate": 4.270397404703974e-05, "loss": 0.5718, "step": 7924 }, { "epoch": 0.2313801056903448, "grad_norm": 0.551569918926652, "learning_rate": 4.270235198702352e-05, "loss": 0.5551, "step": 7925 }, { "epoch": 0.23140930191819217, "grad_norm": 0.5608625278786867, "learning_rate": 4.27007299270073e-05, "loss": 0.6479, "step": 7926 }, { "epoch": 0.23143849814603953, "grad_norm": 0.6321290258581538, "learning_rate": 4.269910786699108e-05, "loss": 0.6081, "step": 7927 }, { "epoch": 0.2314676943738869, "grad_norm": 0.6077851417082791, "learning_rate": 4.269748580697486e-05, "loss": 0.6697, "step": 7928 }, { "epoch": 0.23149689060173426, "grad_norm": 0.5754575974540731, "learning_rate": 4.2695863746958645e-05, "loss": 0.6979, "step": 7929 }, { "epoch": 0.23152608682958162, "grad_norm": 0.582353041446698, "learning_rate": 4.269424168694242e-05, "loss": 0.662, "step": 7930 }, { "epoch": 0.23155528305742898, "grad_norm": 0.6167798338809892, "learning_rate": 4.26926196269262e-05, "loss": 0.7347, "step": 7931 }, { "epoch": 0.23158447928527634, "grad_norm": 0.5233683712217971, "learning_rate": 4.269099756690998e-05, "loss": 0.5578, "step": 7932 }, { "epoch": 0.2316136755131237, "grad_norm": 0.6508753264262703, "learning_rate": 4.268937550689376e-05, "loss": 0.7547, "step": 7933 }, { "epoch": 0.23164287174097106, "grad_norm": 0.5530763865831508, "learning_rate": 4.268775344687754e-05, "loss": 0.5924, "step": 7934 }, { "epoch": 0.23167206796881842, "grad_norm": 0.591178983359922, "learning_rate": 4.2686131386861315e-05, "loss": 0.6421, "step": 7935 }, { "epoch": 0.23170126419666578, "grad_norm": 0.5809188360656787, "learning_rate": 4.26845093268451e-05, "loss": 0.7, "step": 7936 }, { "epoch": 0.23173046042451315, "grad_norm": 0.5435073864934729, "learning_rate": 4.268288726682887e-05, "loss": 0.6187, "step": 7937 }, { "epoch": 0.2317596566523605, "grad_norm": 0.5610151759303713, "learning_rate": 4.2681265206812654e-05, "loss": 0.6719, "step": 7938 }, { "epoch": 0.23178885288020787, "grad_norm": 0.5838912231670166, "learning_rate": 4.2679643146796435e-05, "loss": 0.7073, "step": 7939 }, { "epoch": 0.23181804910805523, "grad_norm": 0.5074175280662528, "learning_rate": 4.267802108678021e-05, "loss": 0.5327, "step": 7940 }, { "epoch": 0.2318472453359026, "grad_norm": 0.5735853282832634, "learning_rate": 4.267639902676399e-05, "loss": 0.6648, "step": 7941 }, { "epoch": 0.23187644156374995, "grad_norm": 0.5078929443052388, "learning_rate": 4.267477696674777e-05, "loss": 0.5537, "step": 7942 }, { "epoch": 0.2319056377915973, "grad_norm": 0.6030993755542561, "learning_rate": 4.267315490673155e-05, "loss": 0.703, "step": 7943 }, { "epoch": 0.23193483401944467, "grad_norm": 0.5538027890703926, "learning_rate": 4.267153284671533e-05, "loss": 0.6134, "step": 7944 }, { "epoch": 0.23196403024729206, "grad_norm": 0.5907509444382497, "learning_rate": 4.2669910786699106e-05, "loss": 0.7411, "step": 7945 }, { "epoch": 0.23199322647513942, "grad_norm": 0.5262593938577862, "learning_rate": 4.266828872668289e-05, "loss": 0.6023, "step": 7946 }, { "epoch": 0.23202242270298679, "grad_norm": 0.5237314072651699, "learning_rate": 4.266666666666667e-05, "loss": 0.585, "step": 7947 }, { "epoch": 0.23205161893083415, "grad_norm": 0.5914518593741757, "learning_rate": 4.266504460665045e-05, "loss": 0.6574, "step": 7948 }, { "epoch": 0.2320808151586815, "grad_norm": 0.5405681592260153, "learning_rate": 4.266342254663423e-05, "loss": 0.5886, "step": 7949 }, { "epoch": 0.23211001138652887, "grad_norm": 0.6033938918881604, "learning_rate": 4.266180048661801e-05, "loss": 0.6319, "step": 7950 }, { "epoch": 0.23213920761437623, "grad_norm": 0.5477914327726836, "learning_rate": 4.266017842660179e-05, "loss": 0.6639, "step": 7951 }, { "epoch": 0.2321684038422236, "grad_norm": 0.5011486240166697, "learning_rate": 4.2658556366585565e-05, "loss": 0.5487, "step": 7952 }, { "epoch": 0.23219760007007095, "grad_norm": 0.5466775468186211, "learning_rate": 4.2656934306569346e-05, "loss": 0.6393, "step": 7953 }, { "epoch": 0.23222679629791831, "grad_norm": 0.6431969712212247, "learning_rate": 4.265531224655313e-05, "loss": 0.7163, "step": 7954 }, { "epoch": 0.23225599252576568, "grad_norm": 0.5656602610911016, "learning_rate": 4.26536901865369e-05, "loss": 0.6307, "step": 7955 }, { "epoch": 0.23228518875361304, "grad_norm": 0.6296617014081934, "learning_rate": 4.2652068126520685e-05, "loss": 0.7397, "step": 7956 }, { "epoch": 0.2323143849814604, "grad_norm": 0.585088639042204, "learning_rate": 4.265044606650446e-05, "loss": 0.7031, "step": 7957 }, { "epoch": 0.23234358120930776, "grad_norm": 0.5928204637590854, "learning_rate": 4.264882400648824e-05, "loss": 0.7444, "step": 7958 }, { "epoch": 0.23237277743715512, "grad_norm": 0.552232705045077, "learning_rate": 4.264720194647202e-05, "loss": 0.664, "step": 7959 }, { "epoch": 0.23240197366500248, "grad_norm": 0.6086907043197414, "learning_rate": 4.26455798864558e-05, "loss": 0.6478, "step": 7960 }, { "epoch": 0.23243116989284984, "grad_norm": 0.5779911840608261, "learning_rate": 4.264395782643958e-05, "loss": 0.6534, "step": 7961 }, { "epoch": 0.2324603661206972, "grad_norm": 0.5479195894933084, "learning_rate": 4.2642335766423355e-05, "loss": 0.6374, "step": 7962 }, { "epoch": 0.23248956234854457, "grad_norm": 0.5569445460531368, "learning_rate": 4.264071370640714e-05, "loss": 0.635, "step": 7963 }, { "epoch": 0.23251875857639193, "grad_norm": 0.5833310376693632, "learning_rate": 4.263909164639092e-05, "loss": 0.6696, "step": 7964 }, { "epoch": 0.2325479548042393, "grad_norm": 0.5635538525918148, "learning_rate": 4.2637469586374694e-05, "loss": 0.6878, "step": 7965 }, { "epoch": 0.23257715103208665, "grad_norm": 0.5641719564813613, "learning_rate": 4.263584752635848e-05, "loss": 0.6491, "step": 7966 }, { "epoch": 0.232606347259934, "grad_norm": 0.5196631823399227, "learning_rate": 4.263422546634226e-05, "loss": 0.5774, "step": 7967 }, { "epoch": 0.23263554348778137, "grad_norm": 0.5894567725007183, "learning_rate": 4.263260340632604e-05, "loss": 0.6531, "step": 7968 }, { "epoch": 0.23266473971562873, "grad_norm": 0.5391990755402502, "learning_rate": 4.263098134630982e-05, "loss": 0.6481, "step": 7969 }, { "epoch": 0.2326939359434761, "grad_norm": 0.5762778264448156, "learning_rate": 4.2629359286293596e-05, "loss": 0.7475, "step": 7970 }, { "epoch": 0.23272313217132345, "grad_norm": 0.5828380447673662, "learning_rate": 4.262773722627738e-05, "loss": 0.7075, "step": 7971 }, { "epoch": 0.23275232839917082, "grad_norm": 0.5879346678110843, "learning_rate": 4.262611516626115e-05, "loss": 0.7019, "step": 7972 }, { "epoch": 0.23278152462701818, "grad_norm": 0.5011382765168984, "learning_rate": 4.2624493106244934e-05, "loss": 0.5519, "step": 7973 }, { "epoch": 0.23281072085486554, "grad_norm": 0.566573026758382, "learning_rate": 4.2622871046228716e-05, "loss": 0.6633, "step": 7974 }, { "epoch": 0.23283991708271293, "grad_norm": 0.5479395791637737, "learning_rate": 4.262124898621249e-05, "loss": 0.6184, "step": 7975 }, { "epoch": 0.2328691133105603, "grad_norm": 0.5923548085890346, "learning_rate": 4.261962692619627e-05, "loss": 0.6882, "step": 7976 }, { "epoch": 0.23289830953840765, "grad_norm": 0.5843826329975833, "learning_rate": 4.261800486618005e-05, "loss": 0.6811, "step": 7977 }, { "epoch": 0.232927505766255, "grad_norm": 0.5419040049822652, "learning_rate": 4.261638280616383e-05, "loss": 0.5916, "step": 7978 }, { "epoch": 0.23295670199410237, "grad_norm": 0.5956943224745042, "learning_rate": 4.261476074614761e-05, "loss": 0.6796, "step": 7979 }, { "epoch": 0.23298589822194973, "grad_norm": 0.5430567801382549, "learning_rate": 4.2613138686131386e-05, "loss": 0.6213, "step": 7980 }, { "epoch": 0.2330150944497971, "grad_norm": 0.5482653775039376, "learning_rate": 4.261151662611517e-05, "loss": 0.623, "step": 7981 }, { "epoch": 0.23304429067764446, "grad_norm": 0.8112119499948078, "learning_rate": 4.260989456609894e-05, "loss": 0.671, "step": 7982 }, { "epoch": 0.23307348690549182, "grad_norm": 0.5806994162504017, "learning_rate": 4.2608272506082725e-05, "loss": 0.6554, "step": 7983 }, { "epoch": 0.23310268313333918, "grad_norm": 0.5995338795532464, "learning_rate": 4.2606650446066506e-05, "loss": 0.6998, "step": 7984 }, { "epoch": 0.23313187936118654, "grad_norm": 0.6104275538293193, "learning_rate": 4.260502838605029e-05, "loss": 0.7586, "step": 7985 }, { "epoch": 0.2331610755890339, "grad_norm": 0.5467731127319577, "learning_rate": 4.260340632603407e-05, "loss": 0.6205, "step": 7986 }, { "epoch": 0.23319027181688126, "grad_norm": 0.5506527049719182, "learning_rate": 4.2601784266017845e-05, "loss": 0.6698, "step": 7987 }, { "epoch": 0.23321946804472862, "grad_norm": 0.5416629818147995, "learning_rate": 4.260016220600163e-05, "loss": 0.641, "step": 7988 }, { "epoch": 0.23324866427257598, "grad_norm": 0.5604548255029267, "learning_rate": 4.25985401459854e-05, "loss": 0.6908, "step": 7989 }, { "epoch": 0.23327786050042335, "grad_norm": 0.5482937358967027, "learning_rate": 4.259691808596918e-05, "loss": 0.5945, "step": 7990 }, { "epoch": 0.2333070567282707, "grad_norm": 0.6181381328624539, "learning_rate": 4.2595296025952965e-05, "loss": 0.6713, "step": 7991 }, { "epoch": 0.23333625295611807, "grad_norm": 0.6034307054233985, "learning_rate": 4.259367396593674e-05, "loss": 0.7539, "step": 7992 }, { "epoch": 0.23336544918396543, "grad_norm": 0.5427800641254219, "learning_rate": 4.259205190592052e-05, "loss": 0.595, "step": 7993 }, { "epoch": 0.2333946454118128, "grad_norm": 0.5346953948098908, "learning_rate": 4.2590429845904304e-05, "loss": 0.5479, "step": 7994 }, { "epoch": 0.23342384163966015, "grad_norm": 0.5446881226273033, "learning_rate": 4.258880778588808e-05, "loss": 0.6346, "step": 7995 }, { "epoch": 0.2334530378675075, "grad_norm": 0.5473254393730544, "learning_rate": 4.258718572587186e-05, "loss": 0.5925, "step": 7996 }, { "epoch": 0.23348223409535487, "grad_norm": 0.5159975799693005, "learning_rate": 4.2585563665855635e-05, "loss": 0.5639, "step": 7997 }, { "epoch": 0.23351143032320223, "grad_norm": 0.5802277219644246, "learning_rate": 4.258394160583942e-05, "loss": 0.7347, "step": 7998 }, { "epoch": 0.2335406265510496, "grad_norm": 0.5329241991077537, "learning_rate": 4.25823195458232e-05, "loss": 0.5841, "step": 7999 }, { "epoch": 0.23356982277889696, "grad_norm": 0.5930565150814247, "learning_rate": 4.2580697485806974e-05, "loss": 0.6418, "step": 8000 }, { "epoch": 0.23359901900674432, "grad_norm": 0.5673616947426517, "learning_rate": 4.2579075425790756e-05, "loss": 0.6469, "step": 8001 }, { "epoch": 0.23362821523459168, "grad_norm": 0.5863897313193129, "learning_rate": 4.257745336577453e-05, "loss": 0.6265, "step": 8002 }, { "epoch": 0.23365741146243904, "grad_norm": 0.6033171983566002, "learning_rate": 4.257583130575831e-05, "loss": 0.6651, "step": 8003 }, { "epoch": 0.2336866076902864, "grad_norm": 0.5899900203175107, "learning_rate": 4.2574209245742094e-05, "loss": 0.6902, "step": 8004 }, { "epoch": 0.2337158039181338, "grad_norm": 0.5962087296433948, "learning_rate": 4.2572587185725876e-05, "loss": 0.7243, "step": 8005 }, { "epoch": 0.23374500014598115, "grad_norm": 0.5776034397649322, "learning_rate": 4.257096512570966e-05, "loss": 0.6373, "step": 8006 }, { "epoch": 0.2337741963738285, "grad_norm": 0.9008651582323094, "learning_rate": 4.256934306569343e-05, "loss": 0.6525, "step": 8007 }, { "epoch": 0.23380339260167587, "grad_norm": 0.570036626673528, "learning_rate": 4.2567721005677214e-05, "loss": 0.6818, "step": 8008 }, { "epoch": 0.23383258882952324, "grad_norm": 0.5825623330143116, "learning_rate": 4.256609894566099e-05, "loss": 0.6895, "step": 8009 }, { "epoch": 0.2338617850573706, "grad_norm": 0.5377667663423474, "learning_rate": 4.256447688564477e-05, "loss": 0.5747, "step": 8010 }, { "epoch": 0.23389098128521796, "grad_norm": 0.5708590138904511, "learning_rate": 4.256285482562855e-05, "loss": 0.6999, "step": 8011 }, { "epoch": 0.23392017751306532, "grad_norm": 0.534139775786271, "learning_rate": 4.256123276561233e-05, "loss": 0.5988, "step": 8012 }, { "epoch": 0.23394937374091268, "grad_norm": 0.5124054188719362, "learning_rate": 4.255961070559611e-05, "loss": 0.5386, "step": 8013 }, { "epoch": 0.23397856996876004, "grad_norm": 0.5890259796654013, "learning_rate": 4.255798864557989e-05, "loss": 0.6677, "step": 8014 }, { "epoch": 0.2340077661966074, "grad_norm": 0.5938572336785305, "learning_rate": 4.2556366585563666e-05, "loss": 0.6952, "step": 8015 }, { "epoch": 0.23403696242445476, "grad_norm": 0.561164807008794, "learning_rate": 4.255474452554745e-05, "loss": 0.5986, "step": 8016 }, { "epoch": 0.23406615865230213, "grad_norm": 0.6206884110902308, "learning_rate": 4.255312246553122e-05, "loss": 0.6864, "step": 8017 }, { "epoch": 0.2340953548801495, "grad_norm": 0.5405515407013817, "learning_rate": 4.2551500405515005e-05, "loss": 0.6454, "step": 8018 }, { "epoch": 0.23412455110799685, "grad_norm": 0.5262575924699536, "learning_rate": 4.254987834549879e-05, "loss": 0.5983, "step": 8019 }, { "epoch": 0.2341537473358442, "grad_norm": 0.6326959563055089, "learning_rate": 4.254825628548256e-05, "loss": 0.7026, "step": 8020 }, { "epoch": 0.23418294356369157, "grad_norm": 0.5957192227969466, "learning_rate": 4.2546634225466343e-05, "loss": 0.6988, "step": 8021 }, { "epoch": 0.23421213979153893, "grad_norm": 0.6214853934654604, "learning_rate": 4.254501216545012e-05, "loss": 0.7429, "step": 8022 }, { "epoch": 0.2342413360193863, "grad_norm": 0.5562342620025236, "learning_rate": 4.254339010543391e-05, "loss": 0.6555, "step": 8023 }, { "epoch": 0.23427053224723365, "grad_norm": 0.5896439767327848, "learning_rate": 4.254176804541768e-05, "loss": 0.6478, "step": 8024 }, { "epoch": 0.23429972847508101, "grad_norm": 0.5791142395278045, "learning_rate": 4.2540145985401464e-05, "loss": 0.6578, "step": 8025 }, { "epoch": 0.23432892470292838, "grad_norm": 0.5286431001504273, "learning_rate": 4.2538523925385245e-05, "loss": 0.5617, "step": 8026 }, { "epoch": 0.23435812093077574, "grad_norm": 0.6827600192429045, "learning_rate": 4.253690186536902e-05, "loss": 0.6558, "step": 8027 }, { "epoch": 0.2343873171586231, "grad_norm": 0.5623326496883323, "learning_rate": 4.25352798053528e-05, "loss": 0.6825, "step": 8028 }, { "epoch": 0.23441651338647046, "grad_norm": 0.6843649146889424, "learning_rate": 4.253365774533658e-05, "loss": 0.6868, "step": 8029 }, { "epoch": 0.23444570961431782, "grad_norm": 0.6118310557385881, "learning_rate": 4.253203568532036e-05, "loss": 0.6762, "step": 8030 }, { "epoch": 0.23447490584216518, "grad_norm": 0.6133685044039566, "learning_rate": 4.253041362530414e-05, "loss": 0.7017, "step": 8031 }, { "epoch": 0.23450410207001254, "grad_norm": 0.5364302655247501, "learning_rate": 4.2528791565287916e-05, "loss": 0.6026, "step": 8032 }, { "epoch": 0.2345332982978599, "grad_norm": 0.531295993699194, "learning_rate": 4.25271695052717e-05, "loss": 0.5841, "step": 8033 }, { "epoch": 0.23456249452570727, "grad_norm": 0.5368423833710029, "learning_rate": 4.252554744525547e-05, "loss": 0.5681, "step": 8034 }, { "epoch": 0.23459169075355463, "grad_norm": 0.5925140751128636, "learning_rate": 4.2523925385239254e-05, "loss": 0.6702, "step": 8035 }, { "epoch": 0.23462088698140202, "grad_norm": 0.5202344376595308, "learning_rate": 4.2522303325223036e-05, "loss": 0.53, "step": 8036 }, { "epoch": 0.23465008320924938, "grad_norm": 0.5944622175614751, "learning_rate": 4.252068126520681e-05, "loss": 0.6548, "step": 8037 }, { "epoch": 0.23467927943709674, "grad_norm": 0.6191113847075221, "learning_rate": 4.251905920519059e-05, "loss": 0.7085, "step": 8038 }, { "epoch": 0.2347084756649441, "grad_norm": 0.5824566264557647, "learning_rate": 4.2517437145174375e-05, "loss": 0.6861, "step": 8039 }, { "epoch": 0.23473767189279146, "grad_norm": 0.5548276276039499, "learning_rate": 4.251581508515815e-05, "loss": 0.6242, "step": 8040 }, { "epoch": 0.23476686812063882, "grad_norm": 0.5585161711182464, "learning_rate": 4.251419302514193e-05, "loss": 0.6716, "step": 8041 }, { "epoch": 0.23479606434848618, "grad_norm": 0.6314679172989557, "learning_rate": 4.251257096512571e-05, "loss": 0.7334, "step": 8042 }, { "epoch": 0.23482526057633354, "grad_norm": 0.6101315047046136, "learning_rate": 4.2510948905109495e-05, "loss": 0.6994, "step": 8043 }, { "epoch": 0.2348544568041809, "grad_norm": 0.5428465532981164, "learning_rate": 4.250932684509327e-05, "loss": 0.6798, "step": 8044 }, { "epoch": 0.23488365303202827, "grad_norm": 0.5711852758534747, "learning_rate": 4.250770478507705e-05, "loss": 0.6609, "step": 8045 }, { "epoch": 0.23491284925987563, "grad_norm": 0.5851867385708164, "learning_rate": 4.250608272506083e-05, "loss": 0.6291, "step": 8046 }, { "epoch": 0.234942045487723, "grad_norm": 0.5625320857911801, "learning_rate": 4.250446066504461e-05, "loss": 0.6817, "step": 8047 }, { "epoch": 0.23497124171557035, "grad_norm": 0.6571306782780947, "learning_rate": 4.250283860502839e-05, "loss": 0.7463, "step": 8048 }, { "epoch": 0.2350004379434177, "grad_norm": 0.6398345674953799, "learning_rate": 4.2501216545012165e-05, "loss": 0.7483, "step": 8049 }, { "epoch": 0.23502963417126507, "grad_norm": 0.6535183325596312, "learning_rate": 4.249959448499595e-05, "loss": 0.7677, "step": 8050 }, { "epoch": 0.23505883039911243, "grad_norm": 0.5551554626859798, "learning_rate": 4.249797242497973e-05, "loss": 0.63, "step": 8051 }, { "epoch": 0.2350880266269598, "grad_norm": 0.589284535828321, "learning_rate": 4.2496350364963504e-05, "loss": 0.7299, "step": 8052 }, { "epoch": 0.23511722285480716, "grad_norm": 0.5827577444980627, "learning_rate": 4.2494728304947285e-05, "loss": 0.7295, "step": 8053 }, { "epoch": 0.23514641908265452, "grad_norm": 0.5340507544918992, "learning_rate": 4.249310624493106e-05, "loss": 0.6278, "step": 8054 }, { "epoch": 0.23517561531050188, "grad_norm": 0.5670987480375524, "learning_rate": 4.249148418491484e-05, "loss": 0.6479, "step": 8055 }, { "epoch": 0.23520481153834924, "grad_norm": 0.630028834818726, "learning_rate": 4.2489862124898624e-05, "loss": 0.765, "step": 8056 }, { "epoch": 0.2352340077661966, "grad_norm": 0.5580679227964754, "learning_rate": 4.24882400648824e-05, "loss": 0.6418, "step": 8057 }, { "epoch": 0.23526320399404396, "grad_norm": 0.5722297372807217, "learning_rate": 4.248661800486618e-05, "loss": 0.68, "step": 8058 }, { "epoch": 0.23529240022189132, "grad_norm": 0.56454827378764, "learning_rate": 4.248499594484996e-05, "loss": 0.6316, "step": 8059 }, { "epoch": 0.23532159644973868, "grad_norm": 0.5442223001365335, "learning_rate": 4.248337388483374e-05, "loss": 0.6143, "step": 8060 }, { "epoch": 0.23535079267758605, "grad_norm": 0.5774059721134457, "learning_rate": 4.2481751824817526e-05, "loss": 0.7199, "step": 8061 }, { "epoch": 0.2353799889054334, "grad_norm": 0.6520916245316111, "learning_rate": 4.24801297648013e-05, "loss": 0.6889, "step": 8062 }, { "epoch": 0.23540918513328077, "grad_norm": 0.5531348776473146, "learning_rate": 4.247850770478508e-05, "loss": 0.6318, "step": 8063 }, { "epoch": 0.23543838136112813, "grad_norm": 0.5659022664740675, "learning_rate": 4.247688564476886e-05, "loss": 0.6435, "step": 8064 }, { "epoch": 0.2354675775889755, "grad_norm": 0.5627022840987195, "learning_rate": 4.247526358475264e-05, "loss": 0.6372, "step": 8065 }, { "epoch": 0.23549677381682288, "grad_norm": 0.5645027815144209, "learning_rate": 4.247364152473642e-05, "loss": 0.6404, "step": 8066 }, { "epoch": 0.23552597004467024, "grad_norm": 0.5821578378133311, "learning_rate": 4.2472019464720196e-05, "loss": 0.7166, "step": 8067 }, { "epoch": 0.2355551662725176, "grad_norm": 0.5299723415235322, "learning_rate": 4.247039740470398e-05, "loss": 0.6024, "step": 8068 }, { "epoch": 0.23558436250036496, "grad_norm": 0.559957246306468, "learning_rate": 4.246877534468775e-05, "loss": 0.6477, "step": 8069 }, { "epoch": 0.23561355872821232, "grad_norm": 0.5361142316667428, "learning_rate": 4.2467153284671535e-05, "loss": 0.6519, "step": 8070 }, { "epoch": 0.23564275495605969, "grad_norm": 0.5370409427927034, "learning_rate": 4.2465531224655316e-05, "loss": 0.6156, "step": 8071 }, { "epoch": 0.23567195118390705, "grad_norm": 0.5741246019920616, "learning_rate": 4.246390916463909e-05, "loss": 0.6559, "step": 8072 }, { "epoch": 0.2357011474117544, "grad_norm": 0.5298473627816566, "learning_rate": 4.246228710462287e-05, "loss": 0.611, "step": 8073 }, { "epoch": 0.23573034363960177, "grad_norm": 0.6150824050126555, "learning_rate": 4.246066504460665e-05, "loss": 0.7595, "step": 8074 }, { "epoch": 0.23575953986744913, "grad_norm": 0.5610815559316844, "learning_rate": 4.245904298459043e-05, "loss": 0.6409, "step": 8075 }, { "epoch": 0.2357887360952965, "grad_norm": 0.5414124527667508, "learning_rate": 4.245742092457421e-05, "loss": 0.6056, "step": 8076 }, { "epoch": 0.23581793232314385, "grad_norm": 0.6381116076728288, "learning_rate": 4.245579886455799e-05, "loss": 0.7122, "step": 8077 }, { "epoch": 0.23584712855099121, "grad_norm": 0.5710354027361425, "learning_rate": 4.245417680454177e-05, "loss": 0.68, "step": 8078 }, { "epoch": 0.23587632477883858, "grad_norm": 0.5348896703693883, "learning_rate": 4.245255474452554e-05, "loss": 0.6442, "step": 8079 }, { "epoch": 0.23590552100668594, "grad_norm": 0.648747945302233, "learning_rate": 4.245093268450933e-05, "loss": 0.7009, "step": 8080 }, { "epoch": 0.2359347172345333, "grad_norm": 0.5796508254326335, "learning_rate": 4.2449310624493114e-05, "loss": 0.7212, "step": 8081 }, { "epoch": 0.23596391346238066, "grad_norm": 0.5975188871933881, "learning_rate": 4.244768856447689e-05, "loss": 0.7043, "step": 8082 }, { "epoch": 0.23599310969022802, "grad_norm": 0.6170628006612663, "learning_rate": 4.244606650446067e-05, "loss": 0.7182, "step": 8083 }, { "epoch": 0.23602230591807538, "grad_norm": 0.5343399162910885, "learning_rate": 4.2444444444444445e-05, "loss": 0.5908, "step": 8084 }, { "epoch": 0.23605150214592274, "grad_norm": 0.6190008926311165, "learning_rate": 4.244282238442823e-05, "loss": 0.7266, "step": 8085 }, { "epoch": 0.2360806983737701, "grad_norm": 0.6063757815327088, "learning_rate": 4.244120032441201e-05, "loss": 0.7029, "step": 8086 }, { "epoch": 0.23610989460161746, "grad_norm": 0.5731259608655181, "learning_rate": 4.2439578264395784e-05, "loss": 0.6977, "step": 8087 }, { "epoch": 0.23613909082946483, "grad_norm": 0.5847583869939925, "learning_rate": 4.2437956204379566e-05, "loss": 0.7532, "step": 8088 }, { "epoch": 0.2361682870573122, "grad_norm": 0.5709733698366358, "learning_rate": 4.243633414436334e-05, "loss": 0.6559, "step": 8089 }, { "epoch": 0.23619748328515955, "grad_norm": 0.7291887512836638, "learning_rate": 4.243471208434712e-05, "loss": 0.789, "step": 8090 }, { "epoch": 0.2362266795130069, "grad_norm": 0.566023375701989, "learning_rate": 4.2433090024330904e-05, "loss": 0.6469, "step": 8091 }, { "epoch": 0.23625587574085427, "grad_norm": 0.5672126987594445, "learning_rate": 4.243146796431468e-05, "loss": 0.6367, "step": 8092 }, { "epoch": 0.23628507196870163, "grad_norm": 0.575253844326253, "learning_rate": 4.242984590429846e-05, "loss": 0.705, "step": 8093 }, { "epoch": 0.236314268196549, "grad_norm": 0.5958360610530714, "learning_rate": 4.2428223844282236e-05, "loss": 0.6791, "step": 8094 }, { "epoch": 0.23634346442439635, "grad_norm": 0.5378348375311837, "learning_rate": 4.242660178426602e-05, "loss": 0.6074, "step": 8095 }, { "epoch": 0.23637266065224374, "grad_norm": 0.5203494729563296, "learning_rate": 4.24249797242498e-05, "loss": 0.5547, "step": 8096 }, { "epoch": 0.2364018568800911, "grad_norm": 0.5606685458812256, "learning_rate": 4.2423357664233574e-05, "loss": 0.6293, "step": 8097 }, { "epoch": 0.23643105310793847, "grad_norm": 0.6301841635066006, "learning_rate": 4.242173560421736e-05, "loss": 0.7964, "step": 8098 }, { "epoch": 0.23646024933578583, "grad_norm": 0.5554182576876328, "learning_rate": 4.242011354420114e-05, "loss": 0.613, "step": 8099 }, { "epoch": 0.2364894455636332, "grad_norm": 0.5703027057197214, "learning_rate": 4.241849148418492e-05, "loss": 0.6769, "step": 8100 }, { "epoch": 0.23651864179148055, "grad_norm": 0.5986434755058562, "learning_rate": 4.24168694241687e-05, "loss": 0.7051, "step": 8101 }, { "epoch": 0.2365478380193279, "grad_norm": 0.5821135689714922, "learning_rate": 4.2415247364152476e-05, "loss": 0.6479, "step": 8102 }, { "epoch": 0.23657703424717527, "grad_norm": 0.5547734968869804, "learning_rate": 4.241362530413626e-05, "loss": 0.5983, "step": 8103 }, { "epoch": 0.23660623047502263, "grad_norm": 0.5619766590031045, "learning_rate": 4.241200324412003e-05, "loss": 0.7025, "step": 8104 }, { "epoch": 0.23663542670287, "grad_norm": 0.5507055847966639, "learning_rate": 4.2410381184103815e-05, "loss": 0.6092, "step": 8105 }, { "epoch": 0.23666462293071736, "grad_norm": 0.5394698400767971, "learning_rate": 4.24087591240876e-05, "loss": 0.6393, "step": 8106 }, { "epoch": 0.23669381915856472, "grad_norm": 0.5648278821769397, "learning_rate": 4.240713706407137e-05, "loss": 0.6087, "step": 8107 }, { "epoch": 0.23672301538641208, "grad_norm": 0.5993568585748329, "learning_rate": 4.2405515004055153e-05, "loss": 0.6792, "step": 8108 }, { "epoch": 0.23675221161425944, "grad_norm": 0.582299130199716, "learning_rate": 4.240389294403893e-05, "loss": 0.7031, "step": 8109 }, { "epoch": 0.2367814078421068, "grad_norm": 0.5277034385001844, "learning_rate": 4.240227088402271e-05, "loss": 0.5832, "step": 8110 }, { "epoch": 0.23681060406995416, "grad_norm": 0.595469059012491, "learning_rate": 4.240064882400649e-05, "loss": 0.6777, "step": 8111 }, { "epoch": 0.23683980029780152, "grad_norm": 0.6341161716790215, "learning_rate": 4.239902676399027e-05, "loss": 0.8135, "step": 8112 }, { "epoch": 0.23686899652564888, "grad_norm": 0.5846964599936024, "learning_rate": 4.239740470397405e-05, "loss": 0.6802, "step": 8113 }, { "epoch": 0.23689819275349625, "grad_norm": 0.550569413084964, "learning_rate": 4.2395782643957824e-05, "loss": 0.6089, "step": 8114 }, { "epoch": 0.2369273889813436, "grad_norm": 0.5681487241317268, "learning_rate": 4.2394160583941605e-05, "loss": 0.6949, "step": 8115 }, { "epoch": 0.23695658520919097, "grad_norm": 0.5328147470368939, "learning_rate": 4.239253852392539e-05, "loss": 0.5996, "step": 8116 }, { "epoch": 0.23698578143703833, "grad_norm": 0.6362919989029104, "learning_rate": 4.239091646390917e-05, "loss": 0.6582, "step": 8117 }, { "epoch": 0.2370149776648857, "grad_norm": 0.5192665278022593, "learning_rate": 4.238929440389295e-05, "loss": 0.6106, "step": 8118 }, { "epoch": 0.23704417389273305, "grad_norm": 0.5819201148143256, "learning_rate": 4.2387672343876726e-05, "loss": 0.6978, "step": 8119 }, { "epoch": 0.2370733701205804, "grad_norm": 0.5099489723159293, "learning_rate": 4.238605028386051e-05, "loss": 0.5675, "step": 8120 }, { "epoch": 0.23710256634842777, "grad_norm": 0.5358155569268905, "learning_rate": 4.238442822384428e-05, "loss": 0.6298, "step": 8121 }, { "epoch": 0.23713176257627513, "grad_norm": 0.4775750773366745, "learning_rate": 4.2382806163828064e-05, "loss": 0.4968, "step": 8122 }, { "epoch": 0.2371609588041225, "grad_norm": 0.6094844764377811, "learning_rate": 4.2381184103811846e-05, "loss": 0.7375, "step": 8123 }, { "epoch": 0.23719015503196986, "grad_norm": 0.6045224420753268, "learning_rate": 4.237956204379562e-05, "loss": 0.7206, "step": 8124 }, { "epoch": 0.23721935125981722, "grad_norm": 0.5325994657081086, "learning_rate": 4.23779399837794e-05, "loss": 0.5951, "step": 8125 }, { "epoch": 0.2372485474876646, "grad_norm": 0.5806340942292946, "learning_rate": 4.2376317923763185e-05, "loss": 0.6868, "step": 8126 }, { "epoch": 0.23727774371551197, "grad_norm": 0.6156525073048122, "learning_rate": 4.237469586374696e-05, "loss": 0.7243, "step": 8127 }, { "epoch": 0.23730693994335933, "grad_norm": 0.557416579216817, "learning_rate": 4.237307380373074e-05, "loss": 0.6339, "step": 8128 }, { "epoch": 0.2373361361712067, "grad_norm": 0.5324484903933943, "learning_rate": 4.2371451743714516e-05, "loss": 0.5862, "step": 8129 }, { "epoch": 0.23736533239905405, "grad_norm": 0.5450815500907642, "learning_rate": 4.23698296836983e-05, "loss": 0.6222, "step": 8130 }, { "epoch": 0.2373945286269014, "grad_norm": 0.5828498282991473, "learning_rate": 4.236820762368208e-05, "loss": 0.6659, "step": 8131 }, { "epoch": 0.23742372485474877, "grad_norm": 0.576113652597581, "learning_rate": 4.2366585563665855e-05, "loss": 0.6942, "step": 8132 }, { "epoch": 0.23745292108259614, "grad_norm": 0.5925848649967412, "learning_rate": 4.2364963503649637e-05, "loss": 0.7293, "step": 8133 }, { "epoch": 0.2374821173104435, "grad_norm": 0.5700616986271694, "learning_rate": 4.236334144363341e-05, "loss": 0.6522, "step": 8134 }, { "epoch": 0.23751131353829086, "grad_norm": 0.5866276521401347, "learning_rate": 4.236171938361719e-05, "loss": 0.6795, "step": 8135 }, { "epoch": 0.23754050976613822, "grad_norm": 0.5343156686355777, "learning_rate": 4.2360097323600975e-05, "loss": 0.5763, "step": 8136 }, { "epoch": 0.23756970599398558, "grad_norm": 0.5327863097423338, "learning_rate": 4.235847526358476e-05, "loss": 0.633, "step": 8137 }, { "epoch": 0.23759890222183294, "grad_norm": 0.6354812747218164, "learning_rate": 4.235685320356854e-05, "loss": 0.7025, "step": 8138 }, { "epoch": 0.2376280984496803, "grad_norm": 0.5708456082883687, "learning_rate": 4.2355231143552314e-05, "loss": 0.5837, "step": 8139 }, { "epoch": 0.23765729467752766, "grad_norm": 0.5533488012576304, "learning_rate": 4.2353609083536095e-05, "loss": 0.6216, "step": 8140 }, { "epoch": 0.23768649090537503, "grad_norm": 0.5501763031814507, "learning_rate": 4.235198702351987e-05, "loss": 0.6591, "step": 8141 }, { "epoch": 0.2377156871332224, "grad_norm": 0.49632167633605356, "learning_rate": 4.235036496350365e-05, "loss": 0.5503, "step": 8142 }, { "epoch": 0.23774488336106975, "grad_norm": 0.6020312180864732, "learning_rate": 4.2348742903487434e-05, "loss": 0.6875, "step": 8143 }, { "epoch": 0.2377740795889171, "grad_norm": 0.5203099956853666, "learning_rate": 4.234712084347121e-05, "loss": 0.5907, "step": 8144 }, { "epoch": 0.23780327581676447, "grad_norm": 0.6051589741851804, "learning_rate": 4.234549878345499e-05, "loss": 0.6762, "step": 8145 }, { "epoch": 0.23783247204461183, "grad_norm": 0.5603876748961605, "learning_rate": 4.234387672343877e-05, "loss": 0.6301, "step": 8146 }, { "epoch": 0.2378616682724592, "grad_norm": 0.5829832130574113, "learning_rate": 4.234225466342255e-05, "loss": 0.71, "step": 8147 }, { "epoch": 0.23789086450030655, "grad_norm": 0.5266173227991887, "learning_rate": 4.234063260340633e-05, "loss": 0.6135, "step": 8148 }, { "epoch": 0.23792006072815391, "grad_norm": 0.542797601811061, "learning_rate": 4.2339010543390104e-05, "loss": 0.6327, "step": 8149 }, { "epoch": 0.23794925695600128, "grad_norm": 0.5209282437278563, "learning_rate": 4.2337388483373886e-05, "loss": 0.5994, "step": 8150 }, { "epoch": 0.23797845318384864, "grad_norm": 0.5267055441516316, "learning_rate": 4.233576642335767e-05, "loss": 0.5752, "step": 8151 }, { "epoch": 0.238007649411696, "grad_norm": 0.566755102386585, "learning_rate": 4.233414436334144e-05, "loss": 0.6718, "step": 8152 }, { "epoch": 0.23803684563954336, "grad_norm": 0.5542995406115075, "learning_rate": 4.2332522303325224e-05, "loss": 0.6659, "step": 8153 }, { "epoch": 0.23806604186739072, "grad_norm": 0.6202925987362634, "learning_rate": 4.2330900243309e-05, "loss": 0.7443, "step": 8154 }, { "epoch": 0.23809523809523808, "grad_norm": 0.5700230465776299, "learning_rate": 4.232927818329279e-05, "loss": 0.6694, "step": 8155 }, { "epoch": 0.23812443432308547, "grad_norm": 0.6094784539695524, "learning_rate": 4.232765612327656e-05, "loss": 0.6834, "step": 8156 }, { "epoch": 0.23815363055093283, "grad_norm": 0.5298140118209701, "learning_rate": 4.2326034063260345e-05, "loss": 0.6131, "step": 8157 }, { "epoch": 0.2381828267787802, "grad_norm": 0.5780584964858112, "learning_rate": 4.2324412003244126e-05, "loss": 0.644, "step": 8158 }, { "epoch": 0.23821202300662755, "grad_norm": 0.5345004458043019, "learning_rate": 4.23227899432279e-05, "loss": 0.6247, "step": 8159 }, { "epoch": 0.23824121923447492, "grad_norm": 0.6115978275754629, "learning_rate": 4.232116788321168e-05, "loss": 0.6792, "step": 8160 }, { "epoch": 0.23827041546232228, "grad_norm": 0.5150911083493921, "learning_rate": 4.231954582319546e-05, "loss": 0.5889, "step": 8161 }, { "epoch": 0.23829961169016964, "grad_norm": 0.5375492526655695, "learning_rate": 4.231792376317924e-05, "loss": 0.5998, "step": 8162 }, { "epoch": 0.238328807918017, "grad_norm": 0.5364428883853414, "learning_rate": 4.231630170316302e-05, "loss": 0.6546, "step": 8163 }, { "epoch": 0.23835800414586436, "grad_norm": 0.6199239245614451, "learning_rate": 4.23146796431468e-05, "loss": 0.6774, "step": 8164 }, { "epoch": 0.23838720037371172, "grad_norm": 0.5206643837455346, "learning_rate": 4.231305758313058e-05, "loss": 0.583, "step": 8165 }, { "epoch": 0.23841639660155908, "grad_norm": 0.5708757062457274, "learning_rate": 4.231143552311435e-05, "loss": 0.6308, "step": 8166 }, { "epoch": 0.23844559282940644, "grad_norm": 0.57251812664078, "learning_rate": 4.2309813463098135e-05, "loss": 0.6695, "step": 8167 }, { "epoch": 0.2384747890572538, "grad_norm": 0.516625114995432, "learning_rate": 4.230819140308192e-05, "loss": 0.5686, "step": 8168 }, { "epoch": 0.23850398528510117, "grad_norm": 0.5764629041156519, "learning_rate": 4.230656934306569e-05, "loss": 0.6154, "step": 8169 }, { "epoch": 0.23853318151294853, "grad_norm": 0.5696323828681005, "learning_rate": 4.2304947283049474e-05, "loss": 0.6996, "step": 8170 }, { "epoch": 0.2385623777407959, "grad_norm": 0.5527950602469703, "learning_rate": 4.2303325223033255e-05, "loss": 0.6279, "step": 8171 }, { "epoch": 0.23859157396864325, "grad_norm": 0.5585479186409198, "learning_rate": 4.230170316301703e-05, "loss": 0.6425, "step": 8172 }, { "epoch": 0.2386207701964906, "grad_norm": 0.5223121004848915, "learning_rate": 4.230008110300081e-05, "loss": 0.5624, "step": 8173 }, { "epoch": 0.23864996642433797, "grad_norm": 0.5599300436615575, "learning_rate": 4.2298459042984594e-05, "loss": 0.6761, "step": 8174 }, { "epoch": 0.23867916265218533, "grad_norm": 0.6048364187246951, "learning_rate": 4.2296836982968376e-05, "loss": 0.714, "step": 8175 }, { "epoch": 0.2387083588800327, "grad_norm": 0.5182656707024371, "learning_rate": 4.229521492295215e-05, "loss": 0.5492, "step": 8176 }, { "epoch": 0.23873755510788006, "grad_norm": 0.5824504144622799, "learning_rate": 4.229359286293593e-05, "loss": 0.7184, "step": 8177 }, { "epoch": 0.23876675133572742, "grad_norm": 0.5481676567240624, "learning_rate": 4.2291970802919714e-05, "loss": 0.6845, "step": 8178 }, { "epoch": 0.23879594756357478, "grad_norm": 0.5935658670972659, "learning_rate": 4.229034874290349e-05, "loss": 0.7248, "step": 8179 }, { "epoch": 0.23882514379142214, "grad_norm": 0.5308439159445186, "learning_rate": 4.228872668288727e-05, "loss": 0.5826, "step": 8180 }, { "epoch": 0.2388543400192695, "grad_norm": 0.5665312510879456, "learning_rate": 4.2287104622871046e-05, "loss": 0.6786, "step": 8181 }, { "epoch": 0.23888353624711686, "grad_norm": 0.562660376066703, "learning_rate": 4.228548256285483e-05, "loss": 0.6661, "step": 8182 }, { "epoch": 0.23891273247496422, "grad_norm": 0.569127139059846, "learning_rate": 4.228386050283861e-05, "loss": 0.6461, "step": 8183 }, { "epoch": 0.23894192870281158, "grad_norm": 0.6028428685233378, "learning_rate": 4.2282238442822384e-05, "loss": 0.7218, "step": 8184 }, { "epoch": 0.23897112493065895, "grad_norm": 0.543524344309152, "learning_rate": 4.2280616382806166e-05, "loss": 0.664, "step": 8185 }, { "epoch": 0.23900032115850633, "grad_norm": 0.6596611509472315, "learning_rate": 4.227899432278994e-05, "loss": 0.7567, "step": 8186 }, { "epoch": 0.2390295173863537, "grad_norm": 0.5498344895282335, "learning_rate": 4.227737226277372e-05, "loss": 0.6754, "step": 8187 }, { "epoch": 0.23905871361420106, "grad_norm": 0.5518538150471259, "learning_rate": 4.2275750202757505e-05, "loss": 0.6484, "step": 8188 }, { "epoch": 0.23908790984204842, "grad_norm": 0.5817387218687198, "learning_rate": 4.227412814274128e-05, "loss": 0.6865, "step": 8189 }, { "epoch": 0.23911710606989578, "grad_norm": 0.5716167395088031, "learning_rate": 4.227250608272506e-05, "loss": 0.6555, "step": 8190 }, { "epoch": 0.23914630229774314, "grad_norm": 0.5394954624705137, "learning_rate": 4.227088402270884e-05, "loss": 0.6189, "step": 8191 }, { "epoch": 0.2391754985255905, "grad_norm": 0.547457475493619, "learning_rate": 4.226926196269262e-05, "loss": 0.6394, "step": 8192 }, { "epoch": 0.23920469475343786, "grad_norm": 0.575054202450986, "learning_rate": 4.226763990267641e-05, "loss": 0.6577, "step": 8193 }, { "epoch": 0.23923389098128522, "grad_norm": 0.5344711172337089, "learning_rate": 4.226601784266018e-05, "loss": 0.6116, "step": 8194 }, { "epoch": 0.23926308720913259, "grad_norm": 0.5734767757099252, "learning_rate": 4.2264395782643963e-05, "loss": 0.6936, "step": 8195 }, { "epoch": 0.23929228343697995, "grad_norm": 0.5388744092672461, "learning_rate": 4.226277372262774e-05, "loss": 0.6185, "step": 8196 }, { "epoch": 0.2393214796648273, "grad_norm": 0.5826165450934667, "learning_rate": 4.226115166261152e-05, "loss": 0.6755, "step": 8197 }, { "epoch": 0.23935067589267467, "grad_norm": 0.6096696529921434, "learning_rate": 4.22595296025953e-05, "loss": 0.7194, "step": 8198 }, { "epoch": 0.23937987212052203, "grad_norm": 0.5501133232532454, "learning_rate": 4.225790754257908e-05, "loss": 0.6321, "step": 8199 }, { "epoch": 0.2394090683483694, "grad_norm": 0.5626307911607195, "learning_rate": 4.225628548256286e-05, "loss": 0.65, "step": 8200 }, { "epoch": 0.23943826457621675, "grad_norm": 0.6136766295878902, "learning_rate": 4.2254663422546634e-05, "loss": 0.7402, "step": 8201 }, { "epoch": 0.23946746080406411, "grad_norm": 0.5523192308316246, "learning_rate": 4.2253041362530415e-05, "loss": 0.624, "step": 8202 }, { "epoch": 0.23949665703191148, "grad_norm": 0.5379897214708573, "learning_rate": 4.22514193025142e-05, "loss": 0.618, "step": 8203 }, { "epoch": 0.23952585325975884, "grad_norm": 0.6065946272340349, "learning_rate": 4.224979724249797e-05, "loss": 0.7319, "step": 8204 }, { "epoch": 0.2395550494876062, "grad_norm": 0.5861368829374876, "learning_rate": 4.2248175182481754e-05, "loss": 0.6291, "step": 8205 }, { "epoch": 0.23958424571545356, "grad_norm": 0.5465728308492992, "learning_rate": 4.224655312246553e-05, "loss": 0.5419, "step": 8206 }, { "epoch": 0.23961344194330092, "grad_norm": 0.5916807986661158, "learning_rate": 4.224493106244931e-05, "loss": 0.6688, "step": 8207 }, { "epoch": 0.23964263817114828, "grad_norm": 0.5526065244635685, "learning_rate": 4.224330900243309e-05, "loss": 0.6564, "step": 8208 }, { "epoch": 0.23967183439899564, "grad_norm": 0.5463117652757132, "learning_rate": 4.224168694241687e-05, "loss": 0.6353, "step": 8209 }, { "epoch": 0.239701030626843, "grad_norm": 0.6184120619424582, "learning_rate": 4.224006488240065e-05, "loss": 0.671, "step": 8210 }, { "epoch": 0.23973022685469036, "grad_norm": 0.6459991556186535, "learning_rate": 4.2238442822384424e-05, "loss": 0.6377, "step": 8211 }, { "epoch": 0.23975942308253773, "grad_norm": 0.5537854156668449, "learning_rate": 4.223682076236821e-05, "loss": 0.6299, "step": 8212 }, { "epoch": 0.2397886193103851, "grad_norm": 0.5407801125212276, "learning_rate": 4.2235198702351995e-05, "loss": 0.6119, "step": 8213 }, { "epoch": 0.23981781553823245, "grad_norm": 0.5795598413920552, "learning_rate": 4.223357664233577e-05, "loss": 0.6914, "step": 8214 }, { "epoch": 0.2398470117660798, "grad_norm": 0.5253457389335674, "learning_rate": 4.223195458231955e-05, "loss": 0.6246, "step": 8215 }, { "epoch": 0.23987620799392717, "grad_norm": 0.5816560035741866, "learning_rate": 4.2230332522303326e-05, "loss": 0.6623, "step": 8216 }, { "epoch": 0.23990540422177456, "grad_norm": 0.5371041321449543, "learning_rate": 4.222871046228711e-05, "loss": 0.6436, "step": 8217 }, { "epoch": 0.23993460044962192, "grad_norm": 0.5459015942692144, "learning_rate": 4.222708840227089e-05, "loss": 0.6216, "step": 8218 }, { "epoch": 0.23996379667746928, "grad_norm": 0.6068925001187269, "learning_rate": 4.2225466342254665e-05, "loss": 0.7057, "step": 8219 }, { "epoch": 0.23999299290531664, "grad_norm": 0.5932694689829543, "learning_rate": 4.2223844282238447e-05, "loss": 0.6716, "step": 8220 }, { "epoch": 0.240022189133164, "grad_norm": 0.5721631951913396, "learning_rate": 4.222222222222222e-05, "loss": 0.6909, "step": 8221 }, { "epoch": 0.24005138536101137, "grad_norm": 0.5219475214543755, "learning_rate": 4.2220600162206e-05, "loss": 0.6136, "step": 8222 }, { "epoch": 0.24008058158885873, "grad_norm": 0.8200939327601378, "learning_rate": 4.2218978102189785e-05, "loss": 0.7869, "step": 8223 }, { "epoch": 0.2401097778167061, "grad_norm": 0.5668498398629459, "learning_rate": 4.221735604217356e-05, "loss": 0.6687, "step": 8224 }, { "epoch": 0.24013897404455345, "grad_norm": 0.6055281868279635, "learning_rate": 4.221573398215734e-05, "loss": 0.7393, "step": 8225 }, { "epoch": 0.2401681702724008, "grad_norm": 0.5540339573496519, "learning_rate": 4.221411192214112e-05, "loss": 0.6479, "step": 8226 }, { "epoch": 0.24019736650024817, "grad_norm": 0.5428672521888447, "learning_rate": 4.22124898621249e-05, "loss": 0.5986, "step": 8227 }, { "epoch": 0.24022656272809553, "grad_norm": 0.5828845923623217, "learning_rate": 4.221086780210868e-05, "loss": 0.6816, "step": 8228 }, { "epoch": 0.2402557589559429, "grad_norm": 0.5706882797932837, "learning_rate": 4.2209245742092455e-05, "loss": 0.7029, "step": 8229 }, { "epoch": 0.24028495518379026, "grad_norm": 0.5408526426638911, "learning_rate": 4.220762368207624e-05, "loss": 0.6305, "step": 8230 }, { "epoch": 0.24031415141163762, "grad_norm": 0.5778133146537177, "learning_rate": 4.220600162206002e-05, "loss": 0.7246, "step": 8231 }, { "epoch": 0.24034334763948498, "grad_norm": 0.5842199148536643, "learning_rate": 4.22043795620438e-05, "loss": 0.6564, "step": 8232 }, { "epoch": 0.24037254386733234, "grad_norm": 0.5131353087325419, "learning_rate": 4.2202757502027576e-05, "loss": 0.5609, "step": 8233 }, { "epoch": 0.2404017400951797, "grad_norm": 0.5072706857049509, "learning_rate": 4.220113544201136e-05, "loss": 0.5389, "step": 8234 }, { "epoch": 0.24043093632302706, "grad_norm": 0.5620841304433448, "learning_rate": 4.219951338199514e-05, "loss": 0.7138, "step": 8235 }, { "epoch": 0.24046013255087442, "grad_norm": 0.7969320878534424, "learning_rate": 4.2197891321978914e-05, "loss": 0.7829, "step": 8236 }, { "epoch": 0.24048932877872178, "grad_norm": 0.545070752395468, "learning_rate": 4.2196269261962696e-05, "loss": 0.6385, "step": 8237 }, { "epoch": 0.24051852500656914, "grad_norm": 0.6265959797750915, "learning_rate": 4.219464720194648e-05, "loss": 0.7422, "step": 8238 }, { "epoch": 0.2405477212344165, "grad_norm": 0.6082185212336984, "learning_rate": 4.219302514193025e-05, "loss": 0.7141, "step": 8239 }, { "epoch": 0.24057691746226387, "grad_norm": 0.5580624548035129, "learning_rate": 4.2191403081914034e-05, "loss": 0.6231, "step": 8240 }, { "epoch": 0.24060611369011123, "grad_norm": 0.5554520399633246, "learning_rate": 4.218978102189781e-05, "loss": 0.6091, "step": 8241 }, { "epoch": 0.2406353099179586, "grad_norm": 0.5887655044308711, "learning_rate": 4.218815896188159e-05, "loss": 0.7299, "step": 8242 }, { "epoch": 0.24066450614580595, "grad_norm": 0.5629293146002102, "learning_rate": 4.218653690186537e-05, "loss": 0.6057, "step": 8243 }, { "epoch": 0.2406937023736533, "grad_norm": 0.5438886416395874, "learning_rate": 4.218491484184915e-05, "loss": 0.6571, "step": 8244 }, { "epoch": 0.24072289860150067, "grad_norm": 0.5584241738940586, "learning_rate": 4.218329278183293e-05, "loss": 0.6403, "step": 8245 }, { "epoch": 0.24075209482934803, "grad_norm": 0.568940127135624, "learning_rate": 4.2181670721816705e-05, "loss": 0.6628, "step": 8246 }, { "epoch": 0.24078129105719542, "grad_norm": 0.5899764547515094, "learning_rate": 4.2180048661800486e-05, "loss": 0.7242, "step": 8247 }, { "epoch": 0.24081048728504278, "grad_norm": 0.6088469522816874, "learning_rate": 4.217842660178427e-05, "loss": 0.6779, "step": 8248 }, { "epoch": 0.24083968351289015, "grad_norm": 0.5719865866146552, "learning_rate": 4.217680454176805e-05, "loss": 0.6634, "step": 8249 }, { "epoch": 0.2408688797407375, "grad_norm": 0.5494830074523334, "learning_rate": 4.217518248175183e-05, "loss": 0.6479, "step": 8250 }, { "epoch": 0.24089807596858487, "grad_norm": 0.5821772846092177, "learning_rate": 4.217356042173561e-05, "loss": 0.7186, "step": 8251 }, { "epoch": 0.24092727219643223, "grad_norm": 0.549992891075517, "learning_rate": 4.217193836171939e-05, "loss": 0.595, "step": 8252 }, { "epoch": 0.2409564684242796, "grad_norm": 0.5441338653465094, "learning_rate": 4.217031630170316e-05, "loss": 0.5912, "step": 8253 }, { "epoch": 0.24098566465212695, "grad_norm": 0.5973896534492656, "learning_rate": 4.2168694241686945e-05, "loss": 0.6521, "step": 8254 }, { "epoch": 0.2410148608799743, "grad_norm": 0.5854767695558342, "learning_rate": 4.216707218167073e-05, "loss": 0.6701, "step": 8255 }, { "epoch": 0.24104405710782167, "grad_norm": 0.5392730434598171, "learning_rate": 4.21654501216545e-05, "loss": 0.6116, "step": 8256 }, { "epoch": 0.24107325333566904, "grad_norm": 0.5594813828191709, "learning_rate": 4.2163828061638284e-05, "loss": 0.6552, "step": 8257 }, { "epoch": 0.2411024495635164, "grad_norm": 0.5923925247187036, "learning_rate": 4.2162206001622065e-05, "loss": 0.7, "step": 8258 }, { "epoch": 0.24113164579136376, "grad_norm": 0.5684776550628964, "learning_rate": 4.216058394160584e-05, "loss": 0.6068, "step": 8259 }, { "epoch": 0.24116084201921112, "grad_norm": 0.5335593188789661, "learning_rate": 4.215896188158962e-05, "loss": 0.6183, "step": 8260 }, { "epoch": 0.24119003824705848, "grad_norm": 0.6084850751469746, "learning_rate": 4.21573398215734e-05, "loss": 0.7598, "step": 8261 }, { "epoch": 0.24121923447490584, "grad_norm": 0.6050765568363382, "learning_rate": 4.215571776155718e-05, "loss": 0.7018, "step": 8262 }, { "epoch": 0.2412484307027532, "grad_norm": 0.5557447241521477, "learning_rate": 4.215409570154096e-05, "loss": 0.6558, "step": 8263 }, { "epoch": 0.24127762693060056, "grad_norm": 0.5823775396118331, "learning_rate": 4.2152473641524736e-05, "loss": 0.6954, "step": 8264 }, { "epoch": 0.24130682315844793, "grad_norm": 0.5799897961697708, "learning_rate": 4.215085158150852e-05, "loss": 0.6758, "step": 8265 }, { "epoch": 0.2413360193862953, "grad_norm": 0.6135474431826569, "learning_rate": 4.214922952149229e-05, "loss": 0.7211, "step": 8266 }, { "epoch": 0.24136521561414265, "grad_norm": 0.5794136725577789, "learning_rate": 4.2147607461476074e-05, "loss": 0.7081, "step": 8267 }, { "epoch": 0.24139441184199, "grad_norm": 0.5603976735990599, "learning_rate": 4.2145985401459856e-05, "loss": 0.6308, "step": 8268 }, { "epoch": 0.24142360806983737, "grad_norm": 0.5566674631338472, "learning_rate": 4.214436334144364e-05, "loss": 0.656, "step": 8269 }, { "epoch": 0.24145280429768473, "grad_norm": 0.6077210281571349, "learning_rate": 4.214274128142742e-05, "loss": 0.6969, "step": 8270 }, { "epoch": 0.2414820005255321, "grad_norm": 0.5328987345665163, "learning_rate": 4.2141119221411194e-05, "loss": 0.6219, "step": 8271 }, { "epoch": 0.24151119675337945, "grad_norm": 0.5690984993256085, "learning_rate": 4.2139497161394976e-05, "loss": 0.6829, "step": 8272 }, { "epoch": 0.24154039298122681, "grad_norm": 0.5299400129286685, "learning_rate": 4.213787510137875e-05, "loss": 0.591, "step": 8273 }, { "epoch": 0.24156958920907418, "grad_norm": 0.5925754616907479, "learning_rate": 4.213625304136253e-05, "loss": 0.698, "step": 8274 }, { "epoch": 0.24159878543692154, "grad_norm": 0.5723888513739719, "learning_rate": 4.2134630981346315e-05, "loss": 0.7168, "step": 8275 }, { "epoch": 0.2416279816647689, "grad_norm": 0.5656698094737188, "learning_rate": 4.213300892133009e-05, "loss": 0.6556, "step": 8276 }, { "epoch": 0.2416571778926163, "grad_norm": 0.5252201165565317, "learning_rate": 4.213138686131387e-05, "loss": 0.6111, "step": 8277 }, { "epoch": 0.24168637412046365, "grad_norm": 0.5781325956387265, "learning_rate": 4.2129764801297646e-05, "loss": 0.7007, "step": 8278 }, { "epoch": 0.241715570348311, "grad_norm": 0.5871745456908914, "learning_rate": 4.212814274128143e-05, "loss": 0.7264, "step": 8279 }, { "epoch": 0.24174476657615837, "grad_norm": 0.6161675533307744, "learning_rate": 4.212652068126521e-05, "loss": 0.726, "step": 8280 }, { "epoch": 0.24177396280400573, "grad_norm": 0.5794802833420273, "learning_rate": 4.2124898621248985e-05, "loss": 0.6554, "step": 8281 }, { "epoch": 0.2418031590318531, "grad_norm": 0.552967765883173, "learning_rate": 4.212327656123277e-05, "loss": 0.6584, "step": 8282 }, { "epoch": 0.24183235525970045, "grad_norm": 0.5482905668693839, "learning_rate": 4.212165450121655e-05, "loss": 0.6401, "step": 8283 }, { "epoch": 0.24186155148754782, "grad_norm": 0.555405089835013, "learning_rate": 4.2120032441200323e-05, "loss": 0.6403, "step": 8284 }, { "epoch": 0.24189074771539518, "grad_norm": 0.5667789389975834, "learning_rate": 4.2118410381184105e-05, "loss": 0.6737, "step": 8285 }, { "epoch": 0.24191994394324254, "grad_norm": 0.5303952159781715, "learning_rate": 4.211678832116788e-05, "loss": 0.6083, "step": 8286 }, { "epoch": 0.2419491401710899, "grad_norm": 0.5553968915444428, "learning_rate": 4.211516626115167e-05, "loss": 0.6332, "step": 8287 }, { "epoch": 0.24197833639893726, "grad_norm": 0.5408449033155353, "learning_rate": 4.2113544201135444e-05, "loss": 0.6532, "step": 8288 }, { "epoch": 0.24200753262678462, "grad_norm": 0.5437107932642568, "learning_rate": 4.2111922141119226e-05, "loss": 0.6268, "step": 8289 }, { "epoch": 0.24203672885463198, "grad_norm": 0.5974391532323992, "learning_rate": 4.211030008110301e-05, "loss": 0.7732, "step": 8290 }, { "epoch": 0.24206592508247934, "grad_norm": 0.5403106404224746, "learning_rate": 4.210867802108678e-05, "loss": 0.6061, "step": 8291 }, { "epoch": 0.2420951213103267, "grad_norm": 0.5929390175857211, "learning_rate": 4.2107055961070564e-05, "loss": 0.642, "step": 8292 }, { "epoch": 0.24212431753817407, "grad_norm": 0.684351184867702, "learning_rate": 4.210543390105434e-05, "loss": 0.7545, "step": 8293 }, { "epoch": 0.24215351376602143, "grad_norm": 0.639224619455822, "learning_rate": 4.210381184103812e-05, "loss": 0.7445, "step": 8294 }, { "epoch": 0.2421827099938688, "grad_norm": 0.5480780535042885, "learning_rate": 4.21021897810219e-05, "loss": 0.5999, "step": 8295 }, { "epoch": 0.24221190622171615, "grad_norm": 0.5181865703793447, "learning_rate": 4.210056772100568e-05, "loss": 0.5687, "step": 8296 }, { "epoch": 0.2422411024495635, "grad_norm": 0.5438662231523967, "learning_rate": 4.209894566098946e-05, "loss": 0.601, "step": 8297 }, { "epoch": 0.24227029867741087, "grad_norm": 0.7457397424282242, "learning_rate": 4.2097323600973234e-05, "loss": 0.713, "step": 8298 }, { "epoch": 0.24229949490525823, "grad_norm": 0.5588123712506382, "learning_rate": 4.2095701540957016e-05, "loss": 0.6643, "step": 8299 }, { "epoch": 0.2423286911331056, "grad_norm": 0.5461024462439471, "learning_rate": 4.20940794809408e-05, "loss": 0.6744, "step": 8300 }, { "epoch": 0.24235788736095296, "grad_norm": 0.5891533573132482, "learning_rate": 4.209245742092457e-05, "loss": 0.7411, "step": 8301 }, { "epoch": 0.24238708358880032, "grad_norm": 0.5662160592848657, "learning_rate": 4.2090835360908355e-05, "loss": 0.7181, "step": 8302 }, { "epoch": 0.24241627981664768, "grad_norm": 0.5457569179782576, "learning_rate": 4.2089213300892136e-05, "loss": 0.6284, "step": 8303 }, { "epoch": 0.24244547604449504, "grad_norm": 0.563933069225182, "learning_rate": 4.208759124087591e-05, "loss": 0.6559, "step": 8304 }, { "epoch": 0.2424746722723424, "grad_norm": 0.5606578929355936, "learning_rate": 4.208596918085969e-05, "loss": 0.7007, "step": 8305 }, { "epoch": 0.24250386850018976, "grad_norm": 0.640177242462625, "learning_rate": 4.2084347120843475e-05, "loss": 0.7487, "step": 8306 }, { "epoch": 0.24253306472803715, "grad_norm": 0.5721292485335703, "learning_rate": 4.2082725060827257e-05, "loss": 0.6763, "step": 8307 }, { "epoch": 0.2425622609558845, "grad_norm": 0.5463221432969476, "learning_rate": 4.208110300081103e-05, "loss": 0.6349, "step": 8308 }, { "epoch": 0.24259145718373187, "grad_norm": 0.5359614668873046, "learning_rate": 4.207948094079481e-05, "loss": 0.6254, "step": 8309 }, { "epoch": 0.24262065341157923, "grad_norm": 0.582387026012886, "learning_rate": 4.2077858880778595e-05, "loss": 0.7042, "step": 8310 }, { "epoch": 0.2426498496394266, "grad_norm": 0.5227480247659647, "learning_rate": 4.207623682076237e-05, "loss": 0.5774, "step": 8311 }, { "epoch": 0.24267904586727396, "grad_norm": 0.6234939497321429, "learning_rate": 4.207461476074615e-05, "loss": 0.7874, "step": 8312 }, { "epoch": 0.24270824209512132, "grad_norm": 0.6029223978500944, "learning_rate": 4.207299270072993e-05, "loss": 0.6182, "step": 8313 }, { "epoch": 0.24273743832296868, "grad_norm": 0.6654188565971202, "learning_rate": 4.207137064071371e-05, "loss": 0.7925, "step": 8314 }, { "epoch": 0.24276663455081604, "grad_norm": 0.5816811528309682, "learning_rate": 4.206974858069749e-05, "loss": 0.6902, "step": 8315 }, { "epoch": 0.2427958307786634, "grad_norm": 0.7880705653078224, "learning_rate": 4.2068126520681265e-05, "loss": 0.6516, "step": 8316 }, { "epoch": 0.24282502700651076, "grad_norm": 0.5446611588231457, "learning_rate": 4.206650446066505e-05, "loss": 0.6433, "step": 8317 }, { "epoch": 0.24285422323435812, "grad_norm": 0.5556575152802355, "learning_rate": 4.206488240064882e-05, "loss": 0.6722, "step": 8318 }, { "epoch": 0.24288341946220549, "grad_norm": 0.5870703524297888, "learning_rate": 4.2063260340632604e-05, "loss": 0.6939, "step": 8319 }, { "epoch": 0.24291261569005285, "grad_norm": 0.5656669493198087, "learning_rate": 4.2061638280616386e-05, "loss": 0.6522, "step": 8320 }, { "epoch": 0.2429418119179002, "grad_norm": 0.5425832190279195, "learning_rate": 4.206001622060016e-05, "loss": 0.625, "step": 8321 }, { "epoch": 0.24297100814574757, "grad_norm": 0.5353242623616171, "learning_rate": 4.205839416058394e-05, "loss": 0.5854, "step": 8322 }, { "epoch": 0.24300020437359493, "grad_norm": 0.5572988317646617, "learning_rate": 4.205677210056772e-05, "loss": 0.635, "step": 8323 }, { "epoch": 0.2430294006014423, "grad_norm": 0.6332691191579066, "learning_rate": 4.20551500405515e-05, "loss": 0.7456, "step": 8324 }, { "epoch": 0.24305859682928965, "grad_norm": 0.5652243758330402, "learning_rate": 4.205352798053529e-05, "loss": 0.6093, "step": 8325 }, { "epoch": 0.243087793057137, "grad_norm": 0.5975457975116883, "learning_rate": 4.205190592051906e-05, "loss": 0.7489, "step": 8326 }, { "epoch": 0.24311698928498437, "grad_norm": 0.5633046317757944, "learning_rate": 4.2050283860502844e-05, "loss": 0.6395, "step": 8327 }, { "epoch": 0.24314618551283174, "grad_norm": 0.5667654692511829, "learning_rate": 4.204866180048662e-05, "loss": 0.697, "step": 8328 }, { "epoch": 0.2431753817406791, "grad_norm": 0.5477502116647045, "learning_rate": 4.20470397404704e-05, "loss": 0.5976, "step": 8329 }, { "epoch": 0.24320457796852646, "grad_norm": 0.5413027932414999, "learning_rate": 4.204541768045418e-05, "loss": 0.6182, "step": 8330 }, { "epoch": 0.24323377419637382, "grad_norm": 0.586879598337633, "learning_rate": 4.204379562043796e-05, "loss": 0.6738, "step": 8331 }, { "epoch": 0.24326297042422118, "grad_norm": 0.5345385388930396, "learning_rate": 4.204217356042174e-05, "loss": 0.5742, "step": 8332 }, { "epoch": 0.24329216665206854, "grad_norm": 0.5638531079573996, "learning_rate": 4.2040551500405515e-05, "loss": 0.6942, "step": 8333 }, { "epoch": 0.2433213628799159, "grad_norm": 0.5790929606499411, "learning_rate": 4.2038929440389296e-05, "loss": 0.6384, "step": 8334 }, { "epoch": 0.24335055910776326, "grad_norm": 0.6095353412989137, "learning_rate": 4.203730738037308e-05, "loss": 0.7308, "step": 8335 }, { "epoch": 0.24337975533561063, "grad_norm": 0.6051546967728881, "learning_rate": 4.203568532035685e-05, "loss": 0.7362, "step": 8336 }, { "epoch": 0.24340895156345801, "grad_norm": 0.5475392662096801, "learning_rate": 4.2034063260340635e-05, "loss": 0.6084, "step": 8337 }, { "epoch": 0.24343814779130538, "grad_norm": 0.5791870931383254, "learning_rate": 4.203244120032441e-05, "loss": 0.6722, "step": 8338 }, { "epoch": 0.24346734401915274, "grad_norm": 0.558262807658405, "learning_rate": 4.203081914030819e-05, "loss": 0.6187, "step": 8339 }, { "epoch": 0.2434965402470001, "grad_norm": 0.5819317400539267, "learning_rate": 4.2029197080291973e-05, "loss": 0.6784, "step": 8340 }, { "epoch": 0.24352573647484746, "grad_norm": 0.6500260273590681, "learning_rate": 4.202757502027575e-05, "loss": 0.7524, "step": 8341 }, { "epoch": 0.24355493270269482, "grad_norm": 0.5609050655871023, "learning_rate": 4.202595296025953e-05, "loss": 0.7052, "step": 8342 }, { "epoch": 0.24358412893054218, "grad_norm": 0.5307488741480919, "learning_rate": 4.2024330900243305e-05, "loss": 0.5861, "step": 8343 }, { "epoch": 0.24361332515838954, "grad_norm": 0.6171872522874768, "learning_rate": 4.2022708840227094e-05, "loss": 0.709, "step": 8344 }, { "epoch": 0.2436425213862369, "grad_norm": 0.5742217941638902, "learning_rate": 4.2021086780210875e-05, "loss": 0.7363, "step": 8345 }, { "epoch": 0.24367171761408427, "grad_norm": 0.5068152905660754, "learning_rate": 4.201946472019465e-05, "loss": 0.5649, "step": 8346 }, { "epoch": 0.24370091384193163, "grad_norm": 0.5571055352357868, "learning_rate": 4.201784266017843e-05, "loss": 0.6201, "step": 8347 }, { "epoch": 0.243730110069779, "grad_norm": 0.6257417327765672, "learning_rate": 4.201622060016221e-05, "loss": 0.788, "step": 8348 }, { "epoch": 0.24375930629762635, "grad_norm": 0.5771834237627914, "learning_rate": 4.201459854014599e-05, "loss": 0.653, "step": 8349 }, { "epoch": 0.2437885025254737, "grad_norm": 0.5295001538435925, "learning_rate": 4.201297648012977e-05, "loss": 0.5964, "step": 8350 }, { "epoch": 0.24381769875332107, "grad_norm": 0.5697556229969511, "learning_rate": 4.2011354420113546e-05, "loss": 0.6746, "step": 8351 }, { "epoch": 0.24384689498116843, "grad_norm": 0.5360434803050648, "learning_rate": 4.200973236009733e-05, "loss": 0.6127, "step": 8352 }, { "epoch": 0.2438760912090158, "grad_norm": 0.5320974060139616, "learning_rate": 4.20081103000811e-05, "loss": 0.6216, "step": 8353 }, { "epoch": 0.24390528743686316, "grad_norm": 0.5402761646365585, "learning_rate": 4.2006488240064884e-05, "loss": 0.5946, "step": 8354 }, { "epoch": 0.24393448366471052, "grad_norm": 0.5619387106621678, "learning_rate": 4.2004866180048666e-05, "loss": 0.6808, "step": 8355 }, { "epoch": 0.24396367989255788, "grad_norm": 0.5559746765319944, "learning_rate": 4.200324412003244e-05, "loss": 0.6392, "step": 8356 }, { "epoch": 0.24399287612040524, "grad_norm": 0.5943258291765475, "learning_rate": 4.200162206001622e-05, "loss": 0.6808, "step": 8357 }, { "epoch": 0.2440220723482526, "grad_norm": 0.5790775492595405, "learning_rate": 4.2e-05, "loss": 0.6828, "step": 8358 }, { "epoch": 0.24405126857609996, "grad_norm": 0.5597985230141732, "learning_rate": 4.199837793998378e-05, "loss": 0.6612, "step": 8359 }, { "epoch": 0.24408046480394732, "grad_norm": 0.5716215803565562, "learning_rate": 4.199675587996756e-05, "loss": 0.6983, "step": 8360 }, { "epoch": 0.24410966103179468, "grad_norm": 0.5391253421178083, "learning_rate": 4.1995133819951336e-05, "loss": 0.63, "step": 8361 }, { "epoch": 0.24413885725964204, "grad_norm": 0.5603330808153252, "learning_rate": 4.199351175993512e-05, "loss": 0.6741, "step": 8362 }, { "epoch": 0.2441680534874894, "grad_norm": 0.5484439630471891, "learning_rate": 4.19918896999189e-05, "loss": 0.5986, "step": 8363 }, { "epoch": 0.24419724971533677, "grad_norm": 0.5379288608890387, "learning_rate": 4.199026763990268e-05, "loss": 0.6005, "step": 8364 }, { "epoch": 0.24422644594318413, "grad_norm": 0.551231610906358, "learning_rate": 4.1988645579886456e-05, "loss": 0.6219, "step": 8365 }, { "epoch": 0.2442556421710315, "grad_norm": 0.5930251823348812, "learning_rate": 4.198702351987024e-05, "loss": 0.6858, "step": 8366 }, { "epoch": 0.24428483839887888, "grad_norm": 0.5552551456223624, "learning_rate": 4.198540145985402e-05, "loss": 0.6541, "step": 8367 }, { "epoch": 0.24431403462672624, "grad_norm": 0.5563442327575544, "learning_rate": 4.1983779399837795e-05, "loss": 0.6391, "step": 8368 }, { "epoch": 0.2443432308545736, "grad_norm": 0.5244942722803001, "learning_rate": 4.198215733982158e-05, "loss": 0.5858, "step": 8369 }, { "epoch": 0.24437242708242096, "grad_norm": 0.5861346659114811, "learning_rate": 4.198053527980536e-05, "loss": 0.6989, "step": 8370 }, { "epoch": 0.24440162331026832, "grad_norm": 0.5256390070804885, "learning_rate": 4.1978913219789133e-05, "loss": 0.6241, "step": 8371 }, { "epoch": 0.24443081953811568, "grad_norm": 0.5577322608644659, "learning_rate": 4.1977291159772915e-05, "loss": 0.6408, "step": 8372 }, { "epoch": 0.24446001576596305, "grad_norm": 0.5974373801270766, "learning_rate": 4.197566909975669e-05, "loss": 0.6541, "step": 8373 }, { "epoch": 0.2444892119938104, "grad_norm": 0.5356157084969956, "learning_rate": 4.197404703974047e-05, "loss": 0.6601, "step": 8374 }, { "epoch": 0.24451840822165777, "grad_norm": 0.5597356104846031, "learning_rate": 4.1972424979724254e-05, "loss": 0.6554, "step": 8375 }, { "epoch": 0.24454760444950513, "grad_norm": 0.5845814156869976, "learning_rate": 4.197080291970803e-05, "loss": 0.7019, "step": 8376 }, { "epoch": 0.2445768006773525, "grad_norm": 0.5968091296656718, "learning_rate": 4.196918085969181e-05, "loss": 0.6529, "step": 8377 }, { "epoch": 0.24460599690519985, "grad_norm": 0.4963092267430502, "learning_rate": 4.1967558799675585e-05, "loss": 0.5081, "step": 8378 }, { "epoch": 0.2446351931330472, "grad_norm": 0.5639829820200525, "learning_rate": 4.196593673965937e-05, "loss": 0.6793, "step": 8379 }, { "epoch": 0.24466438936089457, "grad_norm": 0.5973725406223221, "learning_rate": 4.196431467964315e-05, "loss": 0.7197, "step": 8380 }, { "epoch": 0.24469358558874194, "grad_norm": 0.5918375385617629, "learning_rate": 4.1962692619626924e-05, "loss": 0.6771, "step": 8381 }, { "epoch": 0.2447227818165893, "grad_norm": 0.5646871047064902, "learning_rate": 4.196107055961071e-05, "loss": 0.6676, "step": 8382 }, { "epoch": 0.24475197804443666, "grad_norm": 0.5504019983835511, "learning_rate": 4.195944849959449e-05, "loss": 0.6205, "step": 8383 }, { "epoch": 0.24478117427228402, "grad_norm": 0.6032688474032638, "learning_rate": 4.195782643957827e-05, "loss": 0.7362, "step": 8384 }, { "epoch": 0.24481037050013138, "grad_norm": 0.5518548810259183, "learning_rate": 4.1956204379562044e-05, "loss": 0.6922, "step": 8385 }, { "epoch": 0.24483956672797874, "grad_norm": 0.551795925235179, "learning_rate": 4.1954582319545826e-05, "loss": 0.589, "step": 8386 }, { "epoch": 0.2448687629558261, "grad_norm": 0.5252007605120135, "learning_rate": 4.195296025952961e-05, "loss": 0.5988, "step": 8387 }, { "epoch": 0.24489795918367346, "grad_norm": 0.5692043713384128, "learning_rate": 4.195133819951338e-05, "loss": 0.6907, "step": 8388 }, { "epoch": 0.24492715541152082, "grad_norm": 0.552101027781189, "learning_rate": 4.1949716139497165e-05, "loss": 0.6294, "step": 8389 }, { "epoch": 0.2449563516393682, "grad_norm": 0.5649955202446784, "learning_rate": 4.1948094079480946e-05, "loss": 0.7054, "step": 8390 }, { "epoch": 0.24498554786721555, "grad_norm": 0.5426301877954705, "learning_rate": 4.194647201946472e-05, "loss": 0.5882, "step": 8391 }, { "epoch": 0.2450147440950629, "grad_norm": 0.5337950168833582, "learning_rate": 4.19448499594485e-05, "loss": 0.6406, "step": 8392 }, { "epoch": 0.24504394032291027, "grad_norm": 0.5689098385602296, "learning_rate": 4.194322789943228e-05, "loss": 0.6838, "step": 8393 }, { "epoch": 0.24507313655075763, "grad_norm": 0.5414389534800883, "learning_rate": 4.194160583941606e-05, "loss": 0.6183, "step": 8394 }, { "epoch": 0.245102332778605, "grad_norm": 0.5182792767905251, "learning_rate": 4.193998377939984e-05, "loss": 0.6122, "step": 8395 }, { "epoch": 0.24513152900645235, "grad_norm": 0.596037957648343, "learning_rate": 4.1938361719383617e-05, "loss": 0.6631, "step": 8396 }, { "epoch": 0.24516072523429974, "grad_norm": 0.556767894946321, "learning_rate": 4.19367396593674e-05, "loss": 0.6635, "step": 8397 }, { "epoch": 0.2451899214621471, "grad_norm": 0.522438583488797, "learning_rate": 4.193511759935117e-05, "loss": 0.5656, "step": 8398 }, { "epoch": 0.24521911768999446, "grad_norm": 0.5380809157958617, "learning_rate": 4.1933495539334955e-05, "loss": 0.5868, "step": 8399 }, { "epoch": 0.24524831391784183, "grad_norm": 0.5162193974805317, "learning_rate": 4.193187347931874e-05, "loss": 0.5894, "step": 8400 }, { "epoch": 0.2452775101456892, "grad_norm": 0.5949544206455419, "learning_rate": 4.193025141930252e-05, "loss": 0.6889, "step": 8401 }, { "epoch": 0.24530670637353655, "grad_norm": 0.5350606776910033, "learning_rate": 4.19286293592863e-05, "loss": 0.6121, "step": 8402 }, { "epoch": 0.2453359026013839, "grad_norm": 0.5789146327347642, "learning_rate": 4.1927007299270075e-05, "loss": 0.6854, "step": 8403 }, { "epoch": 0.24536509882923127, "grad_norm": 0.5852007449630809, "learning_rate": 4.192538523925386e-05, "loss": 0.6523, "step": 8404 }, { "epoch": 0.24539429505707863, "grad_norm": 0.5281081720790735, "learning_rate": 4.192376317923763e-05, "loss": 0.5712, "step": 8405 }, { "epoch": 0.245423491284926, "grad_norm": 0.5438937118719909, "learning_rate": 4.1922141119221414e-05, "loss": 0.6122, "step": 8406 }, { "epoch": 0.24545268751277335, "grad_norm": 0.5593054896873894, "learning_rate": 4.1920519059205196e-05, "loss": 0.7073, "step": 8407 }, { "epoch": 0.24548188374062072, "grad_norm": 0.5704469239345594, "learning_rate": 4.191889699918897e-05, "loss": 0.6633, "step": 8408 }, { "epoch": 0.24551107996846808, "grad_norm": 0.5142015515834155, "learning_rate": 4.191727493917275e-05, "loss": 0.5743, "step": 8409 }, { "epoch": 0.24554027619631544, "grad_norm": 0.5415519785062315, "learning_rate": 4.191565287915653e-05, "loss": 0.6007, "step": 8410 }, { "epoch": 0.2455694724241628, "grad_norm": 0.5751968027197654, "learning_rate": 4.191403081914031e-05, "loss": 0.6779, "step": 8411 }, { "epoch": 0.24559866865201016, "grad_norm": 0.5418022455381325, "learning_rate": 4.191240875912409e-05, "loss": 0.6314, "step": 8412 }, { "epoch": 0.24562786487985752, "grad_norm": 0.5363033374209126, "learning_rate": 4.1910786699107866e-05, "loss": 0.5887, "step": 8413 }, { "epoch": 0.24565706110770488, "grad_norm": 0.5712588805129238, "learning_rate": 4.190916463909165e-05, "loss": 0.6394, "step": 8414 }, { "epoch": 0.24568625733555224, "grad_norm": 0.503636098091959, "learning_rate": 4.190754257907543e-05, "loss": 0.5551, "step": 8415 }, { "epoch": 0.2457154535633996, "grad_norm": 0.5905858874913055, "learning_rate": 4.1905920519059204e-05, "loss": 0.6613, "step": 8416 }, { "epoch": 0.24574464979124697, "grad_norm": 0.6171695054849855, "learning_rate": 4.1904298459042986e-05, "loss": 0.8063, "step": 8417 }, { "epoch": 0.24577384601909433, "grad_norm": 0.5283095948399431, "learning_rate": 4.190267639902676e-05, "loss": 0.6234, "step": 8418 }, { "epoch": 0.2458030422469417, "grad_norm": 0.6009962303106245, "learning_rate": 4.190105433901055e-05, "loss": 0.7162, "step": 8419 }, { "epoch": 0.24583223847478905, "grad_norm": 0.5507644624088388, "learning_rate": 4.1899432278994325e-05, "loss": 0.6312, "step": 8420 }, { "epoch": 0.2458614347026364, "grad_norm": 0.5754747952047139, "learning_rate": 4.1897810218978106e-05, "loss": 0.7052, "step": 8421 }, { "epoch": 0.24589063093048377, "grad_norm": 0.5354487973160882, "learning_rate": 4.189618815896189e-05, "loss": 0.6771, "step": 8422 }, { "epoch": 0.24591982715833113, "grad_norm": 0.5532791583583421, "learning_rate": 4.189456609894566e-05, "loss": 0.6621, "step": 8423 }, { "epoch": 0.2459490233861785, "grad_norm": 0.6216753293155602, "learning_rate": 4.1892944038929445e-05, "loss": 0.6539, "step": 8424 }, { "epoch": 0.24597821961402586, "grad_norm": 0.5785182591702618, "learning_rate": 4.189132197891322e-05, "loss": 0.6893, "step": 8425 }, { "epoch": 0.24600741584187322, "grad_norm": 0.5846432134700895, "learning_rate": 4.1889699918897e-05, "loss": 0.6207, "step": 8426 }, { "epoch": 0.24603661206972058, "grad_norm": 0.576664829225931, "learning_rate": 4.1888077858880783e-05, "loss": 0.6981, "step": 8427 }, { "epoch": 0.24606580829756797, "grad_norm": 0.6010426120990153, "learning_rate": 4.188645579886456e-05, "loss": 0.6976, "step": 8428 }, { "epoch": 0.24609500452541533, "grad_norm": 0.5754046305126882, "learning_rate": 4.188483373884834e-05, "loss": 0.6948, "step": 8429 }, { "epoch": 0.2461242007532627, "grad_norm": 0.6434074017221277, "learning_rate": 4.1883211678832115e-05, "loss": 0.7689, "step": 8430 }, { "epoch": 0.24615339698111005, "grad_norm": 0.572919148878428, "learning_rate": 4.18815896188159e-05, "loss": 0.6904, "step": 8431 }, { "epoch": 0.2461825932089574, "grad_norm": 0.610896044805407, "learning_rate": 4.187996755879968e-05, "loss": 0.6092, "step": 8432 }, { "epoch": 0.24621178943680477, "grad_norm": 0.5509126123453844, "learning_rate": 4.1878345498783454e-05, "loss": 0.6159, "step": 8433 }, { "epoch": 0.24624098566465213, "grad_norm": 0.5533702749381613, "learning_rate": 4.1876723438767235e-05, "loss": 0.6771, "step": 8434 }, { "epoch": 0.2462701818924995, "grad_norm": 0.6039530956879356, "learning_rate": 4.187510137875102e-05, "loss": 0.6554, "step": 8435 }, { "epoch": 0.24629937812034686, "grad_norm": 0.5792363131672527, "learning_rate": 4.187347931873479e-05, "loss": 0.6915, "step": 8436 }, { "epoch": 0.24632857434819422, "grad_norm": 0.5659413768574998, "learning_rate": 4.1871857258718574e-05, "loss": 0.6638, "step": 8437 }, { "epoch": 0.24635777057604158, "grad_norm": 0.5828700510183633, "learning_rate": 4.1870235198702356e-05, "loss": 0.6901, "step": 8438 }, { "epoch": 0.24638696680388894, "grad_norm": 0.6277615317059793, "learning_rate": 4.186861313868614e-05, "loss": 0.7643, "step": 8439 }, { "epoch": 0.2464161630317363, "grad_norm": 0.5587188182218494, "learning_rate": 4.186699107866991e-05, "loss": 0.7008, "step": 8440 }, { "epoch": 0.24644535925958366, "grad_norm": 0.5538413563683668, "learning_rate": 4.1865369018653694e-05, "loss": 0.6413, "step": 8441 }, { "epoch": 0.24647455548743102, "grad_norm": 0.5360675197601051, "learning_rate": 4.1863746958637476e-05, "loss": 0.5625, "step": 8442 }, { "epoch": 0.24650375171527839, "grad_norm": 0.5294851083088401, "learning_rate": 4.186212489862125e-05, "loss": 0.5836, "step": 8443 }, { "epoch": 0.24653294794312575, "grad_norm": 0.5773144352899081, "learning_rate": 4.186050283860503e-05, "loss": 0.6778, "step": 8444 }, { "epoch": 0.2465621441709731, "grad_norm": 0.606224339216802, "learning_rate": 4.185888077858881e-05, "loss": 0.6474, "step": 8445 }, { "epoch": 0.24659134039882047, "grad_norm": 0.56537235275165, "learning_rate": 4.185725871857259e-05, "loss": 0.6584, "step": 8446 }, { "epoch": 0.24662053662666783, "grad_norm": 0.5582507909429159, "learning_rate": 4.185563665855637e-05, "loss": 0.6391, "step": 8447 }, { "epoch": 0.2466497328545152, "grad_norm": 0.5886003386766201, "learning_rate": 4.1854014598540146e-05, "loss": 0.6756, "step": 8448 }, { "epoch": 0.24667892908236255, "grad_norm": 0.5729939489212256, "learning_rate": 4.185239253852393e-05, "loss": 0.6513, "step": 8449 }, { "epoch": 0.2467081253102099, "grad_norm": 0.5275588616849587, "learning_rate": 4.18507704785077e-05, "loss": 0.6144, "step": 8450 }, { "epoch": 0.24673732153805727, "grad_norm": 0.5561809365413377, "learning_rate": 4.1849148418491485e-05, "loss": 0.6227, "step": 8451 }, { "epoch": 0.24676651776590464, "grad_norm": 0.5103916898706098, "learning_rate": 4.1847526358475266e-05, "loss": 0.5466, "step": 8452 }, { "epoch": 0.246795713993752, "grad_norm": 0.5526811070142991, "learning_rate": 4.184590429845904e-05, "loss": 0.6477, "step": 8453 }, { "epoch": 0.24682491022159936, "grad_norm": 0.5771643554159784, "learning_rate": 4.184428223844282e-05, "loss": 0.6598, "step": 8454 }, { "epoch": 0.24685410644944672, "grad_norm": 0.535783780970052, "learning_rate": 4.18426601784266e-05, "loss": 0.6005, "step": 8455 }, { "epoch": 0.24688330267729408, "grad_norm": 0.5372186728231088, "learning_rate": 4.184103811841038e-05, "loss": 0.6526, "step": 8456 }, { "epoch": 0.24691249890514144, "grad_norm": 0.5010500029384152, "learning_rate": 4.183941605839417e-05, "loss": 0.5492, "step": 8457 }, { "epoch": 0.24694169513298883, "grad_norm": 0.572159474420049, "learning_rate": 4.1837793998377943e-05, "loss": 0.6983, "step": 8458 }, { "epoch": 0.2469708913608362, "grad_norm": 0.5035693343789095, "learning_rate": 4.1836171938361725e-05, "loss": 0.5591, "step": 8459 }, { "epoch": 0.24700008758868355, "grad_norm": 0.5507945001956195, "learning_rate": 4.18345498783455e-05, "loss": 0.6311, "step": 8460 }, { "epoch": 0.24702928381653091, "grad_norm": 0.6126992775268377, "learning_rate": 4.183292781832928e-05, "loss": 0.6857, "step": 8461 }, { "epoch": 0.24705848004437828, "grad_norm": 0.5762920800873935, "learning_rate": 4.1831305758313064e-05, "loss": 0.6408, "step": 8462 }, { "epoch": 0.24708767627222564, "grad_norm": 0.552365097926107, "learning_rate": 4.182968369829684e-05, "loss": 0.5777, "step": 8463 }, { "epoch": 0.247116872500073, "grad_norm": 0.6234860575916211, "learning_rate": 4.182806163828062e-05, "loss": 0.6823, "step": 8464 }, { "epoch": 0.24714606872792036, "grad_norm": 0.5844811558842032, "learning_rate": 4.1826439578264395e-05, "loss": 0.6596, "step": 8465 }, { "epoch": 0.24717526495576772, "grad_norm": 0.5656652413106265, "learning_rate": 4.182481751824818e-05, "loss": 0.6688, "step": 8466 }, { "epoch": 0.24720446118361508, "grad_norm": 0.589271122580949, "learning_rate": 4.182319545823196e-05, "loss": 0.7073, "step": 8467 }, { "epoch": 0.24723365741146244, "grad_norm": 0.5197894800406403, "learning_rate": 4.1821573398215734e-05, "loss": 0.6343, "step": 8468 }, { "epoch": 0.2472628536393098, "grad_norm": 0.5690196160521697, "learning_rate": 4.1819951338199516e-05, "loss": 0.6737, "step": 8469 }, { "epoch": 0.24729204986715717, "grad_norm": 0.6211669957938013, "learning_rate": 4.181832927818329e-05, "loss": 0.7463, "step": 8470 }, { "epoch": 0.24732124609500453, "grad_norm": 0.5990993309476295, "learning_rate": 4.181670721816707e-05, "loss": 0.6651, "step": 8471 }, { "epoch": 0.2473504423228519, "grad_norm": 0.5578929526732824, "learning_rate": 4.1815085158150854e-05, "loss": 0.6951, "step": 8472 }, { "epoch": 0.24737963855069925, "grad_norm": 0.7650083505210316, "learning_rate": 4.181346309813463e-05, "loss": 0.7141, "step": 8473 }, { "epoch": 0.2474088347785466, "grad_norm": 0.5287570816223472, "learning_rate": 4.181184103811841e-05, "loss": 0.5667, "step": 8474 }, { "epoch": 0.24743803100639397, "grad_norm": 0.6087917347881265, "learning_rate": 4.1810218978102186e-05, "loss": 0.6505, "step": 8475 }, { "epoch": 0.24746722723424133, "grad_norm": 0.5485884462155153, "learning_rate": 4.1808596918085975e-05, "loss": 0.7252, "step": 8476 }, { "epoch": 0.2474964234620887, "grad_norm": 0.5605833622839232, "learning_rate": 4.180697485806975e-05, "loss": 0.6356, "step": 8477 }, { "epoch": 0.24752561968993606, "grad_norm": 0.539672507660272, "learning_rate": 4.180535279805353e-05, "loss": 0.6196, "step": 8478 }, { "epoch": 0.24755481591778342, "grad_norm": 0.5869923022500746, "learning_rate": 4.180373073803731e-05, "loss": 0.6875, "step": 8479 }, { "epoch": 0.24758401214563078, "grad_norm": 0.5953808071072514, "learning_rate": 4.180210867802109e-05, "loss": 0.6642, "step": 8480 }, { "epoch": 0.24761320837347814, "grad_norm": 0.5765288536296278, "learning_rate": 4.180048661800487e-05, "loss": 0.6897, "step": 8481 }, { "epoch": 0.2476424046013255, "grad_norm": 0.5708764730436948, "learning_rate": 4.179886455798865e-05, "loss": 0.6354, "step": 8482 }, { "epoch": 0.24767160082917286, "grad_norm": 0.5767016754169155, "learning_rate": 4.1797242497972427e-05, "loss": 0.6815, "step": 8483 }, { "epoch": 0.24770079705702022, "grad_norm": 0.5412501827981073, "learning_rate": 4.179562043795621e-05, "loss": 0.5948, "step": 8484 }, { "epoch": 0.24772999328486758, "grad_norm": 0.5403298654708365, "learning_rate": 4.179399837793998e-05, "loss": 0.6335, "step": 8485 }, { "epoch": 0.24775918951271494, "grad_norm": 0.6239244117426836, "learning_rate": 4.1792376317923765e-05, "loss": 0.7764, "step": 8486 }, { "epoch": 0.2477883857405623, "grad_norm": 0.503323577287802, "learning_rate": 4.179075425790755e-05, "loss": 0.5636, "step": 8487 }, { "epoch": 0.2478175819684097, "grad_norm": 0.6091034787404275, "learning_rate": 4.178913219789132e-05, "loss": 0.6684, "step": 8488 }, { "epoch": 0.24784677819625706, "grad_norm": 0.5210088645477118, "learning_rate": 4.1787510137875104e-05, "loss": 0.5731, "step": 8489 }, { "epoch": 0.24787597442410442, "grad_norm": 0.5553660560589404, "learning_rate": 4.178588807785888e-05, "loss": 0.6515, "step": 8490 }, { "epoch": 0.24790517065195178, "grad_norm": 0.5583689448466378, "learning_rate": 4.178426601784266e-05, "loss": 0.6262, "step": 8491 }, { "epoch": 0.24793436687979914, "grad_norm": 0.554401058157053, "learning_rate": 4.178264395782644e-05, "loss": 0.6069, "step": 8492 }, { "epoch": 0.2479635631076465, "grad_norm": 0.57896020815854, "learning_rate": 4.178102189781022e-05, "loss": 0.6678, "step": 8493 }, { "epoch": 0.24799275933549386, "grad_norm": 0.6012341960170247, "learning_rate": 4.1779399837794e-05, "loss": 0.7357, "step": 8494 }, { "epoch": 0.24802195556334122, "grad_norm": 0.5520480914652266, "learning_rate": 4.177777777777778e-05, "loss": 0.5641, "step": 8495 }, { "epoch": 0.24805115179118858, "grad_norm": 0.5739219319374841, "learning_rate": 4.177615571776156e-05, "loss": 0.6454, "step": 8496 }, { "epoch": 0.24808034801903595, "grad_norm": 0.5782338215462148, "learning_rate": 4.177453365774534e-05, "loss": 0.6797, "step": 8497 }, { "epoch": 0.2481095442468833, "grad_norm": 0.5405151898131763, "learning_rate": 4.177291159772912e-05, "loss": 0.6362, "step": 8498 }, { "epoch": 0.24813874047473067, "grad_norm": 0.6070193430214607, "learning_rate": 4.17712895377129e-05, "loss": 0.7156, "step": 8499 }, { "epoch": 0.24816793670257803, "grad_norm": 0.6023671485644826, "learning_rate": 4.1769667477696676e-05, "loss": 0.6834, "step": 8500 }, { "epoch": 0.2481971329304254, "grad_norm": 0.5674411209552472, "learning_rate": 4.176804541768046e-05, "loss": 0.6612, "step": 8501 }, { "epoch": 0.24822632915827275, "grad_norm": 0.562445983083586, "learning_rate": 4.176642335766424e-05, "loss": 0.6422, "step": 8502 }, { "epoch": 0.2482555253861201, "grad_norm": 0.5819173367434964, "learning_rate": 4.1764801297648014e-05, "loss": 0.7464, "step": 8503 }, { "epoch": 0.24828472161396747, "grad_norm": 0.531034834987471, "learning_rate": 4.1763179237631796e-05, "loss": 0.5868, "step": 8504 }, { "epoch": 0.24831391784181484, "grad_norm": 0.5381811370963312, "learning_rate": 4.176155717761557e-05, "loss": 0.5972, "step": 8505 }, { "epoch": 0.2483431140696622, "grad_norm": 0.5597379020063085, "learning_rate": 4.175993511759935e-05, "loss": 0.6776, "step": 8506 }, { "epoch": 0.24837231029750956, "grad_norm": 0.543556003776051, "learning_rate": 4.1758313057583135e-05, "loss": 0.6113, "step": 8507 }, { "epoch": 0.24840150652535692, "grad_norm": 0.5385709457743204, "learning_rate": 4.175669099756691e-05, "loss": 0.6725, "step": 8508 }, { "epoch": 0.24843070275320428, "grad_norm": 0.5333760371827498, "learning_rate": 4.175506893755069e-05, "loss": 0.5682, "step": 8509 }, { "epoch": 0.24845989898105164, "grad_norm": 0.5775739662851607, "learning_rate": 4.1753446877534466e-05, "loss": 0.6577, "step": 8510 }, { "epoch": 0.248489095208899, "grad_norm": 0.549338769483146, "learning_rate": 4.175182481751825e-05, "loss": 0.6634, "step": 8511 }, { "epoch": 0.24851829143674636, "grad_norm": 0.5784604400099237, "learning_rate": 4.175020275750203e-05, "loss": 0.6934, "step": 8512 }, { "epoch": 0.24854748766459372, "grad_norm": 0.5600026890718015, "learning_rate": 4.1748580697485805e-05, "loss": 0.6366, "step": 8513 }, { "epoch": 0.24857668389244109, "grad_norm": 0.5677872094431063, "learning_rate": 4.1746958637469593e-05, "loss": 0.6449, "step": 8514 }, { "epoch": 0.24860588012028845, "grad_norm": 0.5686527986708255, "learning_rate": 4.174533657745337e-05, "loss": 0.6564, "step": 8515 }, { "epoch": 0.2486350763481358, "grad_norm": 0.5843604469803182, "learning_rate": 4.174371451743715e-05, "loss": 0.6857, "step": 8516 }, { "epoch": 0.24866427257598317, "grad_norm": 0.5087029856308979, "learning_rate": 4.1742092457420925e-05, "loss": 0.5798, "step": 8517 }, { "epoch": 0.24869346880383056, "grad_norm": 0.5809105777342746, "learning_rate": 4.174047039740471e-05, "loss": 0.655, "step": 8518 }, { "epoch": 0.24872266503167792, "grad_norm": 0.5675614138738335, "learning_rate": 4.173884833738849e-05, "loss": 0.6056, "step": 8519 }, { "epoch": 0.24875186125952528, "grad_norm": 0.5661915985359198, "learning_rate": 4.1737226277372264e-05, "loss": 0.6329, "step": 8520 }, { "epoch": 0.24878105748737264, "grad_norm": 0.5478170267257504, "learning_rate": 4.1735604217356045e-05, "loss": 0.6196, "step": 8521 }, { "epoch": 0.24881025371522, "grad_norm": 0.55808804847132, "learning_rate": 4.173398215733982e-05, "loss": 0.63, "step": 8522 }, { "epoch": 0.24883944994306736, "grad_norm": 0.5243693749221572, "learning_rate": 4.17323600973236e-05, "loss": 0.6241, "step": 8523 }, { "epoch": 0.24886864617091473, "grad_norm": 0.5436020931121089, "learning_rate": 4.1730738037307384e-05, "loss": 0.6293, "step": 8524 }, { "epoch": 0.2488978423987621, "grad_norm": 0.507774051659651, "learning_rate": 4.172911597729116e-05, "loss": 0.5576, "step": 8525 }, { "epoch": 0.24892703862660945, "grad_norm": 0.6080233090275421, "learning_rate": 4.172749391727494e-05, "loss": 0.6714, "step": 8526 }, { "epoch": 0.2489562348544568, "grad_norm": 0.5787368350021628, "learning_rate": 4.172587185725872e-05, "loss": 0.6996, "step": 8527 }, { "epoch": 0.24898543108230417, "grad_norm": 0.6574272035595712, "learning_rate": 4.17242497972425e-05, "loss": 0.7178, "step": 8528 }, { "epoch": 0.24901462731015153, "grad_norm": 0.7292731973737777, "learning_rate": 4.172262773722628e-05, "loss": 0.7166, "step": 8529 }, { "epoch": 0.2490438235379989, "grad_norm": 0.5316719422210063, "learning_rate": 4.1721005677210054e-05, "loss": 0.5899, "step": 8530 }, { "epoch": 0.24907301976584625, "grad_norm": 0.6083722111251378, "learning_rate": 4.1719383617193836e-05, "loss": 0.7397, "step": 8531 }, { "epoch": 0.24910221599369362, "grad_norm": 0.5510277140370453, "learning_rate": 4.171776155717762e-05, "loss": 0.5943, "step": 8532 }, { "epoch": 0.24913141222154098, "grad_norm": 0.5233568296818221, "learning_rate": 4.17161394971614e-05, "loss": 0.5945, "step": 8533 }, { "epoch": 0.24916060844938834, "grad_norm": 0.5636053085282344, "learning_rate": 4.171451743714518e-05, "loss": 0.666, "step": 8534 }, { "epoch": 0.2491898046772357, "grad_norm": 0.5595442018250076, "learning_rate": 4.1712895377128956e-05, "loss": 0.6969, "step": 8535 }, { "epoch": 0.24921900090508306, "grad_norm": 0.5735723472637402, "learning_rate": 4.171127331711274e-05, "loss": 0.6647, "step": 8536 }, { "epoch": 0.24924819713293042, "grad_norm": 0.5727380663230549, "learning_rate": 4.170965125709651e-05, "loss": 0.6275, "step": 8537 }, { "epoch": 0.24927739336077778, "grad_norm": 0.560873536600244, "learning_rate": 4.1708029197080295e-05, "loss": 0.648, "step": 8538 }, { "epoch": 0.24930658958862514, "grad_norm": 0.5780223362278466, "learning_rate": 4.1706407137064076e-05, "loss": 0.6999, "step": 8539 }, { "epoch": 0.2493357858164725, "grad_norm": 0.5537207787816897, "learning_rate": 4.170478507704785e-05, "loss": 0.653, "step": 8540 }, { "epoch": 0.24936498204431987, "grad_norm": 0.5322305057486011, "learning_rate": 4.170316301703163e-05, "loss": 0.6199, "step": 8541 }, { "epoch": 0.24939417827216723, "grad_norm": 0.5134002980569221, "learning_rate": 4.170154095701541e-05, "loss": 0.5416, "step": 8542 }, { "epoch": 0.2494233745000146, "grad_norm": 0.5690096327682257, "learning_rate": 4.169991889699919e-05, "loss": 0.6371, "step": 8543 }, { "epoch": 0.24945257072786195, "grad_norm": 0.5683508854623519, "learning_rate": 4.169829683698297e-05, "loss": 0.6828, "step": 8544 }, { "epoch": 0.2494817669557093, "grad_norm": 0.7291781980572593, "learning_rate": 4.169667477696675e-05, "loss": 0.717, "step": 8545 }, { "epoch": 0.24951096318355667, "grad_norm": 0.5330774597548452, "learning_rate": 4.169505271695053e-05, "loss": 0.604, "step": 8546 }, { "epoch": 0.24954015941140403, "grad_norm": 0.5382027827170492, "learning_rate": 4.169343065693431e-05, "loss": 0.6511, "step": 8547 }, { "epoch": 0.24956935563925142, "grad_norm": 0.5442931791919107, "learning_rate": 4.1691808596918085e-05, "loss": 0.625, "step": 8548 }, { "epoch": 0.24959855186709878, "grad_norm": 0.5893812846103498, "learning_rate": 4.169018653690187e-05, "loss": 0.6857, "step": 8549 }, { "epoch": 0.24962774809494614, "grad_norm": 0.5354705714361566, "learning_rate": 4.168856447688564e-05, "loss": 0.6046, "step": 8550 }, { "epoch": 0.2496569443227935, "grad_norm": 0.5760648846123918, "learning_rate": 4.168694241686943e-05, "loss": 0.7232, "step": 8551 }, { "epoch": 0.24968614055064087, "grad_norm": 0.595646122532794, "learning_rate": 4.1685320356853206e-05, "loss": 0.6988, "step": 8552 }, { "epoch": 0.24971533677848823, "grad_norm": 0.5264102002416797, "learning_rate": 4.168369829683699e-05, "loss": 0.5895, "step": 8553 }, { "epoch": 0.2497445330063356, "grad_norm": 0.5466712873187636, "learning_rate": 4.168207623682077e-05, "loss": 0.642, "step": 8554 }, { "epoch": 0.24977372923418295, "grad_norm": 0.6233381564026129, "learning_rate": 4.1680454176804544e-05, "loss": 0.6991, "step": 8555 }, { "epoch": 0.2498029254620303, "grad_norm": 0.5596344801862569, "learning_rate": 4.1678832116788326e-05, "loss": 0.6646, "step": 8556 }, { "epoch": 0.24983212168987767, "grad_norm": 0.6103771195400313, "learning_rate": 4.16772100567721e-05, "loss": 0.7434, "step": 8557 }, { "epoch": 0.24986131791772503, "grad_norm": 0.5900038477558145, "learning_rate": 4.167558799675588e-05, "loss": 0.7505, "step": 8558 }, { "epoch": 0.2498905141455724, "grad_norm": 0.5642857559399816, "learning_rate": 4.1673965936739664e-05, "loss": 0.6558, "step": 8559 }, { "epoch": 0.24991971037341976, "grad_norm": 0.5584791244747649, "learning_rate": 4.167234387672344e-05, "loss": 0.6721, "step": 8560 }, { "epoch": 0.24994890660126712, "grad_norm": 0.6743787896727622, "learning_rate": 4.167072181670722e-05, "loss": 0.7141, "step": 8561 }, { "epoch": 0.24997810282911448, "grad_norm": 0.5493911743336634, "learning_rate": 4.1669099756690996e-05, "loss": 0.711, "step": 8562 }, { "epoch": 0.25000729905696184, "grad_norm": 0.5583736289259922, "learning_rate": 4.166747769667478e-05, "loss": 0.6826, "step": 8563 }, { "epoch": 0.2500364952848092, "grad_norm": 0.605730776889681, "learning_rate": 4.166585563665856e-05, "loss": 0.8043, "step": 8564 }, { "epoch": 0.25006569151265656, "grad_norm": 0.5545607161053193, "learning_rate": 4.1664233576642335e-05, "loss": 0.6526, "step": 8565 }, { "epoch": 0.2500948877405039, "grad_norm": 0.5831365132500205, "learning_rate": 4.1662611516626116e-05, "loss": 0.6485, "step": 8566 }, { "epoch": 0.2501240839683513, "grad_norm": 0.5449265718683699, "learning_rate": 4.166098945660989e-05, "loss": 0.634, "step": 8567 }, { "epoch": 0.25015328019619865, "grad_norm": 0.6329646864155765, "learning_rate": 4.165936739659367e-05, "loss": 0.6339, "step": 8568 }, { "epoch": 0.250182476424046, "grad_norm": 0.5846359494963165, "learning_rate": 4.1657745336577455e-05, "loss": 0.6822, "step": 8569 }, { "epoch": 0.25021167265189337, "grad_norm": 0.52764826404969, "learning_rate": 4.1656123276561237e-05, "loss": 0.622, "step": 8570 }, { "epoch": 0.25024086887974073, "grad_norm": 0.5359194955611081, "learning_rate": 4.165450121654502e-05, "loss": 0.64, "step": 8571 }, { "epoch": 0.2502700651075881, "grad_norm": 0.5364301270207215, "learning_rate": 4.165287915652879e-05, "loss": 0.5926, "step": 8572 }, { "epoch": 0.25029926133543545, "grad_norm": 0.5442791950861757, "learning_rate": 4.1651257096512575e-05, "loss": 0.6233, "step": 8573 }, { "epoch": 0.2503284575632828, "grad_norm": 0.5113898457408519, "learning_rate": 4.164963503649636e-05, "loss": 0.6074, "step": 8574 }, { "epoch": 0.2503576537911302, "grad_norm": 0.5299707217436005, "learning_rate": 4.164801297648013e-05, "loss": 0.5942, "step": 8575 }, { "epoch": 0.25038685001897754, "grad_norm": 0.5414271077144558, "learning_rate": 4.1646390916463914e-05, "loss": 0.5565, "step": 8576 }, { "epoch": 0.2504160462468249, "grad_norm": 0.5838957176866777, "learning_rate": 4.164476885644769e-05, "loss": 0.7265, "step": 8577 }, { "epoch": 0.25044524247467226, "grad_norm": 0.6550867914528872, "learning_rate": 4.164314679643147e-05, "loss": 0.647, "step": 8578 }, { "epoch": 0.2504744387025196, "grad_norm": 0.5220669452034463, "learning_rate": 4.164152473641525e-05, "loss": 0.5556, "step": 8579 }, { "epoch": 0.250503634930367, "grad_norm": 0.5457526654433763, "learning_rate": 4.163990267639903e-05, "loss": 0.5479, "step": 8580 }, { "epoch": 0.25053283115821434, "grad_norm": 0.5781195752668598, "learning_rate": 4.163828061638281e-05, "loss": 0.6635, "step": 8581 }, { "epoch": 0.2505620273860617, "grad_norm": 0.5136094756064998, "learning_rate": 4.1636658556366584e-05, "loss": 0.5884, "step": 8582 }, { "epoch": 0.25059122361390906, "grad_norm": 0.5688653781901187, "learning_rate": 4.1635036496350366e-05, "loss": 0.6607, "step": 8583 }, { "epoch": 0.2506204198417564, "grad_norm": 0.5947795443223577, "learning_rate": 4.163341443633415e-05, "loss": 0.6926, "step": 8584 }, { "epoch": 0.2506496160696038, "grad_norm": 0.5542068222077471, "learning_rate": 4.163179237631792e-05, "loss": 0.6186, "step": 8585 }, { "epoch": 0.25067881229745115, "grad_norm": 0.5838532902604364, "learning_rate": 4.1630170316301704e-05, "loss": 0.6789, "step": 8586 }, { "epoch": 0.2507080085252985, "grad_norm": 0.5363990514115214, "learning_rate": 4.162854825628548e-05, "loss": 0.6666, "step": 8587 }, { "epoch": 0.25073720475314587, "grad_norm": 0.5444394419700375, "learning_rate": 4.162692619626926e-05, "loss": 0.6525, "step": 8588 }, { "epoch": 0.25076640098099323, "grad_norm": 0.5791170667935172, "learning_rate": 4.162530413625305e-05, "loss": 0.668, "step": 8589 }, { "epoch": 0.2507955972088406, "grad_norm": 0.5408322842278257, "learning_rate": 4.1623682076236824e-05, "loss": 0.6168, "step": 8590 }, { "epoch": 0.25082479343668795, "grad_norm": 0.9218563146594394, "learning_rate": 4.1622060016220606e-05, "loss": 0.7797, "step": 8591 }, { "epoch": 0.2508539896645353, "grad_norm": 0.5799377848639806, "learning_rate": 4.162043795620438e-05, "loss": 0.6506, "step": 8592 }, { "epoch": 0.25088318589238273, "grad_norm": 0.6018977856756771, "learning_rate": 4.161881589618816e-05, "loss": 0.7024, "step": 8593 }, { "epoch": 0.2509123821202301, "grad_norm": 0.5471331869367585, "learning_rate": 4.1617193836171945e-05, "loss": 0.6619, "step": 8594 }, { "epoch": 0.25094157834807745, "grad_norm": 0.5160038114603047, "learning_rate": 4.161557177615572e-05, "loss": 0.5912, "step": 8595 }, { "epoch": 0.2509707745759248, "grad_norm": 0.5927653054069643, "learning_rate": 4.16139497161395e-05, "loss": 0.6604, "step": 8596 }, { "epoch": 0.2509999708037722, "grad_norm": 0.5756705864347603, "learning_rate": 4.1612327656123276e-05, "loss": 0.6673, "step": 8597 }, { "epoch": 0.25102916703161954, "grad_norm": 0.6208642561700939, "learning_rate": 4.161070559610706e-05, "loss": 0.6146, "step": 8598 }, { "epoch": 0.2510583632594669, "grad_norm": 0.5849694062729386, "learning_rate": 4.160908353609084e-05, "loss": 0.6946, "step": 8599 }, { "epoch": 0.25108755948731426, "grad_norm": 0.5447192810974515, "learning_rate": 4.1607461476074615e-05, "loss": 0.5971, "step": 8600 }, { "epoch": 0.2511167557151616, "grad_norm": 0.6185350641969047, "learning_rate": 4.16058394160584e-05, "loss": 0.6574, "step": 8601 }, { "epoch": 0.251145951943009, "grad_norm": 0.5644276281575751, "learning_rate": 4.160421735604217e-05, "loss": 0.6136, "step": 8602 }, { "epoch": 0.25117514817085634, "grad_norm": 0.6016094413406861, "learning_rate": 4.1602595296025953e-05, "loss": 0.7201, "step": 8603 }, { "epoch": 0.2512043443987037, "grad_norm": 0.5579761873152566, "learning_rate": 4.1600973236009735e-05, "loss": 0.6708, "step": 8604 }, { "epoch": 0.25123354062655107, "grad_norm": 0.5686097411284978, "learning_rate": 4.159935117599351e-05, "loss": 0.6483, "step": 8605 }, { "epoch": 0.2512627368543984, "grad_norm": 0.5902844911411874, "learning_rate": 4.159772911597729e-05, "loss": 0.6721, "step": 8606 }, { "epoch": 0.2512919330822458, "grad_norm": 0.6095897395132378, "learning_rate": 4.159610705596107e-05, "loss": 0.7433, "step": 8607 }, { "epoch": 0.25132112931009315, "grad_norm": 0.534774834577057, "learning_rate": 4.1594484995944855e-05, "loss": 0.5917, "step": 8608 }, { "epoch": 0.2513503255379405, "grad_norm": 0.5453898367516627, "learning_rate": 4.159286293592863e-05, "loss": 0.658, "step": 8609 }, { "epoch": 0.25137952176578787, "grad_norm": 0.6246335490629177, "learning_rate": 4.159124087591241e-05, "loss": 0.7394, "step": 8610 }, { "epoch": 0.25140871799363523, "grad_norm": 0.5455368679711818, "learning_rate": 4.1589618815896194e-05, "loss": 0.5912, "step": 8611 }, { "epoch": 0.2514379142214826, "grad_norm": 0.5446069602075166, "learning_rate": 4.158799675587997e-05, "loss": 0.6373, "step": 8612 }, { "epoch": 0.25146711044932996, "grad_norm": 0.6074368564228821, "learning_rate": 4.158637469586375e-05, "loss": 0.7436, "step": 8613 }, { "epoch": 0.2514963066771773, "grad_norm": 0.5541397006693447, "learning_rate": 4.158475263584753e-05, "loss": 0.6081, "step": 8614 }, { "epoch": 0.2515255029050247, "grad_norm": 0.564083664662814, "learning_rate": 4.158313057583131e-05, "loss": 0.6769, "step": 8615 }, { "epoch": 0.25155469913287204, "grad_norm": 0.5323591202914807, "learning_rate": 4.158150851581509e-05, "loss": 0.6515, "step": 8616 }, { "epoch": 0.2515838953607194, "grad_norm": 0.5420452753279849, "learning_rate": 4.1579886455798864e-05, "loss": 0.6601, "step": 8617 }, { "epoch": 0.25161309158856676, "grad_norm": 0.5701062573208082, "learning_rate": 4.1578264395782646e-05, "loss": 0.6824, "step": 8618 }, { "epoch": 0.2516422878164141, "grad_norm": 0.524979666349372, "learning_rate": 4.157664233576643e-05, "loss": 0.6077, "step": 8619 }, { "epoch": 0.2516714840442615, "grad_norm": 0.5189043649574451, "learning_rate": 4.15750202757502e-05, "loss": 0.5919, "step": 8620 }, { "epoch": 0.25170068027210885, "grad_norm": 0.533528445060743, "learning_rate": 4.1573398215733984e-05, "loss": 0.6372, "step": 8621 }, { "epoch": 0.2517298764999562, "grad_norm": 0.5761183338859506, "learning_rate": 4.157177615571776e-05, "loss": 0.6428, "step": 8622 }, { "epoch": 0.25175907272780357, "grad_norm": 0.5844378480747312, "learning_rate": 4.157015409570154e-05, "loss": 0.7077, "step": 8623 }, { "epoch": 0.25178826895565093, "grad_norm": 0.5687928911599187, "learning_rate": 4.156853203568532e-05, "loss": 0.681, "step": 8624 }, { "epoch": 0.2518174651834983, "grad_norm": 0.5479265118152371, "learning_rate": 4.15669099756691e-05, "loss": 0.6672, "step": 8625 }, { "epoch": 0.25184666141134565, "grad_norm": 0.557061822718053, "learning_rate": 4.156528791565288e-05, "loss": 0.645, "step": 8626 }, { "epoch": 0.251875857639193, "grad_norm": 0.6400813577850128, "learning_rate": 4.156366585563666e-05, "loss": 0.7464, "step": 8627 }, { "epoch": 0.2519050538670404, "grad_norm": 0.5315640692246547, "learning_rate": 4.156204379562044e-05, "loss": 0.6143, "step": 8628 }, { "epoch": 0.25193425009488774, "grad_norm": 0.623084897207859, "learning_rate": 4.156042173560422e-05, "loss": 0.7684, "step": 8629 }, { "epoch": 0.2519634463227351, "grad_norm": 0.5224263553348484, "learning_rate": 4.1558799675588e-05, "loss": 0.6021, "step": 8630 }, { "epoch": 0.25199264255058246, "grad_norm": 0.5687696697967808, "learning_rate": 4.155717761557178e-05, "loss": 0.6309, "step": 8631 }, { "epoch": 0.2520218387784298, "grad_norm": 0.5463064936223482, "learning_rate": 4.155555555555556e-05, "loss": 0.6317, "step": 8632 }, { "epoch": 0.2520510350062772, "grad_norm": 0.5736570558031694, "learning_rate": 4.155393349553934e-05, "loss": 0.7699, "step": 8633 }, { "epoch": 0.25208023123412454, "grad_norm": 0.5542221234424549, "learning_rate": 4.155231143552312e-05, "loss": 0.6508, "step": 8634 }, { "epoch": 0.2521094274619719, "grad_norm": 0.5392389756870452, "learning_rate": 4.1550689375506895e-05, "loss": 0.6217, "step": 8635 }, { "epoch": 0.25213862368981926, "grad_norm": 0.5513011907534012, "learning_rate": 4.154906731549068e-05, "loss": 0.6269, "step": 8636 }, { "epoch": 0.2521678199176666, "grad_norm": 0.5615403711489101, "learning_rate": 4.154744525547445e-05, "loss": 0.6305, "step": 8637 }, { "epoch": 0.252197016145514, "grad_norm": 0.5287889811060316, "learning_rate": 4.1545823195458234e-05, "loss": 0.5937, "step": 8638 }, { "epoch": 0.25222621237336135, "grad_norm": 0.5704420811791903, "learning_rate": 4.1544201135442016e-05, "loss": 0.7525, "step": 8639 }, { "epoch": 0.2522554086012087, "grad_norm": 0.537338555873568, "learning_rate": 4.154257907542579e-05, "loss": 0.6238, "step": 8640 }, { "epoch": 0.25228460482905607, "grad_norm": 0.5403461821274391, "learning_rate": 4.154095701540957e-05, "loss": 0.5876, "step": 8641 }, { "epoch": 0.25231380105690343, "grad_norm": 0.5566382951883938, "learning_rate": 4.153933495539335e-05, "loss": 0.6409, "step": 8642 }, { "epoch": 0.2523429972847508, "grad_norm": 0.6381243311721696, "learning_rate": 4.153771289537713e-05, "loss": 0.7006, "step": 8643 }, { "epoch": 0.25237219351259815, "grad_norm": 0.5667804131349012, "learning_rate": 4.153609083536091e-05, "loss": 0.6502, "step": 8644 }, { "epoch": 0.2524013897404455, "grad_norm": 0.5826200795634184, "learning_rate": 4.1534468775344686e-05, "loss": 0.7486, "step": 8645 }, { "epoch": 0.2524305859682929, "grad_norm": 0.562547194766231, "learning_rate": 4.1532846715328474e-05, "loss": 0.6471, "step": 8646 }, { "epoch": 0.25245978219614024, "grad_norm": 0.5731682556953126, "learning_rate": 4.153122465531225e-05, "loss": 0.6349, "step": 8647 }, { "epoch": 0.2524889784239876, "grad_norm": 0.6116716242171397, "learning_rate": 4.152960259529603e-05, "loss": 0.7703, "step": 8648 }, { "epoch": 0.25251817465183496, "grad_norm": 0.5899973514627949, "learning_rate": 4.1527980535279806e-05, "loss": 0.6571, "step": 8649 }, { "epoch": 0.2525473708796823, "grad_norm": 0.6108106855770197, "learning_rate": 4.152635847526359e-05, "loss": 0.7593, "step": 8650 }, { "epoch": 0.2525765671075297, "grad_norm": 0.5967596758901225, "learning_rate": 4.152473641524737e-05, "loss": 0.7039, "step": 8651 }, { "epoch": 0.25260576333537704, "grad_norm": 0.6036637489558913, "learning_rate": 4.1523114355231145e-05, "loss": 0.7385, "step": 8652 }, { "epoch": 0.2526349595632244, "grad_norm": 0.5850891747748126, "learning_rate": 4.1521492295214926e-05, "loss": 0.6666, "step": 8653 }, { "epoch": 0.2526641557910718, "grad_norm": 0.5528657440901653, "learning_rate": 4.15198702351987e-05, "loss": 0.6179, "step": 8654 }, { "epoch": 0.2526933520189192, "grad_norm": 0.5457987821889155, "learning_rate": 4.151824817518248e-05, "loss": 0.6244, "step": 8655 }, { "epoch": 0.25272254824676654, "grad_norm": 0.5278401897660996, "learning_rate": 4.1516626115166265e-05, "loss": 0.612, "step": 8656 }, { "epoch": 0.2527517444746139, "grad_norm": 0.571363678222108, "learning_rate": 4.151500405515004e-05, "loss": 0.6652, "step": 8657 }, { "epoch": 0.25278094070246127, "grad_norm": 0.5368954870502545, "learning_rate": 4.151338199513382e-05, "loss": 0.6317, "step": 8658 }, { "epoch": 0.2528101369303086, "grad_norm": 0.5846517688483209, "learning_rate": 4.15117599351176e-05, "loss": 0.6927, "step": 8659 }, { "epoch": 0.252839333158156, "grad_norm": 0.598699813277221, "learning_rate": 4.151013787510138e-05, "loss": 0.6677, "step": 8660 }, { "epoch": 0.25286852938600335, "grad_norm": 0.6021641436007671, "learning_rate": 4.150851581508516e-05, "loss": 0.6699, "step": 8661 }, { "epoch": 0.2528977256138507, "grad_norm": 0.6307655530487886, "learning_rate": 4.1506893755068935e-05, "loss": 0.7333, "step": 8662 }, { "epoch": 0.25292692184169807, "grad_norm": 0.6125159927143857, "learning_rate": 4.150527169505272e-05, "loss": 0.7663, "step": 8663 }, { "epoch": 0.25295611806954543, "grad_norm": 0.5481904981554923, "learning_rate": 4.15036496350365e-05, "loss": 0.6689, "step": 8664 }, { "epoch": 0.2529853142973928, "grad_norm": 0.5716180679499626, "learning_rate": 4.150202757502028e-05, "loss": 0.6585, "step": 8665 }, { "epoch": 0.25301451052524015, "grad_norm": 0.6554563500749275, "learning_rate": 4.150040551500406e-05, "loss": 0.66, "step": 8666 }, { "epoch": 0.2530437067530875, "grad_norm": 0.5507908210617172, "learning_rate": 4.149878345498784e-05, "loss": 0.6753, "step": 8667 }, { "epoch": 0.2530729029809349, "grad_norm": 0.5849925490141095, "learning_rate": 4.149716139497162e-05, "loss": 0.7615, "step": 8668 }, { "epoch": 0.25310209920878224, "grad_norm": 0.5637342107471879, "learning_rate": 4.1495539334955394e-05, "loss": 0.6659, "step": 8669 }, { "epoch": 0.2531312954366296, "grad_norm": 0.5646865531726587, "learning_rate": 4.1493917274939176e-05, "loss": 0.7111, "step": 8670 }, { "epoch": 0.25316049166447696, "grad_norm": 0.5556242244519568, "learning_rate": 4.149229521492296e-05, "loss": 0.6888, "step": 8671 }, { "epoch": 0.2531896878923243, "grad_norm": 0.5661131117587775, "learning_rate": 4.149067315490673e-05, "loss": 0.7311, "step": 8672 }, { "epoch": 0.2532188841201717, "grad_norm": 0.523641780437275, "learning_rate": 4.1489051094890514e-05, "loss": 0.5831, "step": 8673 }, { "epoch": 0.25324808034801904, "grad_norm": 0.5849011212574025, "learning_rate": 4.148742903487429e-05, "loss": 0.7281, "step": 8674 }, { "epoch": 0.2532772765758664, "grad_norm": 0.574095320641787, "learning_rate": 4.148580697485807e-05, "loss": 0.6556, "step": 8675 }, { "epoch": 0.25330647280371377, "grad_norm": 0.5303135452159365, "learning_rate": 4.148418491484185e-05, "loss": 0.6224, "step": 8676 }, { "epoch": 0.25333566903156113, "grad_norm": 0.5843028074264888, "learning_rate": 4.148256285482563e-05, "loss": 0.7445, "step": 8677 }, { "epoch": 0.2533648652594085, "grad_norm": 0.5471819422176157, "learning_rate": 4.148094079480941e-05, "loss": 0.6425, "step": 8678 }, { "epoch": 0.25339406148725585, "grad_norm": 0.5521741858468316, "learning_rate": 4.147931873479319e-05, "loss": 0.6345, "step": 8679 }, { "epoch": 0.2534232577151032, "grad_norm": 0.6143812193552575, "learning_rate": 4.1477696674776966e-05, "loss": 0.7356, "step": 8680 }, { "epoch": 0.2534524539429506, "grad_norm": 0.5200369913339787, "learning_rate": 4.147607461476075e-05, "loss": 0.5948, "step": 8681 }, { "epoch": 0.25348165017079793, "grad_norm": 0.7285731636998505, "learning_rate": 4.147445255474452e-05, "loss": 0.6768, "step": 8682 }, { "epoch": 0.2535108463986453, "grad_norm": 0.5404714717562583, "learning_rate": 4.1472830494728305e-05, "loss": 0.6367, "step": 8683 }, { "epoch": 0.25354004262649266, "grad_norm": 0.5765211295004511, "learning_rate": 4.1471208434712086e-05, "loss": 0.6212, "step": 8684 }, { "epoch": 0.25356923885434, "grad_norm": 0.5897595026598188, "learning_rate": 4.146958637469587e-05, "loss": 0.6948, "step": 8685 }, { "epoch": 0.2535984350821874, "grad_norm": 0.5605321058632315, "learning_rate": 4.146796431467965e-05, "loss": 0.6427, "step": 8686 }, { "epoch": 0.25362763131003474, "grad_norm": 0.6251392468430768, "learning_rate": 4.1466342254663425e-05, "loss": 0.6998, "step": 8687 }, { "epoch": 0.2536568275378821, "grad_norm": 0.5458917823013496, "learning_rate": 4.146472019464721e-05, "loss": 0.6308, "step": 8688 }, { "epoch": 0.25368602376572946, "grad_norm": 0.5915754517762959, "learning_rate": 4.146309813463098e-05, "loss": 0.6266, "step": 8689 }, { "epoch": 0.2537152199935768, "grad_norm": 0.5743511750557376, "learning_rate": 4.1461476074614763e-05, "loss": 0.6602, "step": 8690 }, { "epoch": 0.2537444162214242, "grad_norm": 0.5632905934923206, "learning_rate": 4.1459854014598545e-05, "loss": 0.6675, "step": 8691 }, { "epoch": 0.25377361244927155, "grad_norm": 0.5617134950483819, "learning_rate": 4.145823195458232e-05, "loss": 0.7066, "step": 8692 }, { "epoch": 0.2538028086771189, "grad_norm": 0.5712201844096992, "learning_rate": 4.14566098945661e-05, "loss": 0.6809, "step": 8693 }, { "epoch": 0.25383200490496627, "grad_norm": 0.57121367704945, "learning_rate": 4.145498783454988e-05, "loss": 0.675, "step": 8694 }, { "epoch": 0.25386120113281363, "grad_norm": 0.583804608311243, "learning_rate": 4.145336577453366e-05, "loss": 0.7047, "step": 8695 }, { "epoch": 0.253890397360661, "grad_norm": 0.568362121189396, "learning_rate": 4.145174371451744e-05, "loss": 0.6573, "step": 8696 }, { "epoch": 0.25391959358850835, "grad_norm": 0.5414465333844325, "learning_rate": 4.1450121654501215e-05, "loss": 0.6102, "step": 8697 }, { "epoch": 0.2539487898163557, "grad_norm": 0.5708461032548292, "learning_rate": 4.1448499594485e-05, "loss": 0.6466, "step": 8698 }, { "epoch": 0.2539779860442031, "grad_norm": 0.6127381022395918, "learning_rate": 4.144687753446877e-05, "loss": 0.6907, "step": 8699 }, { "epoch": 0.25400718227205044, "grad_norm": 0.5879311331170501, "learning_rate": 4.1445255474452554e-05, "loss": 0.7458, "step": 8700 }, { "epoch": 0.2540363784998978, "grad_norm": 0.5613962485539542, "learning_rate": 4.1443633414436336e-05, "loss": 0.6237, "step": 8701 }, { "epoch": 0.25406557472774516, "grad_norm": 0.5482737973839652, "learning_rate": 4.144201135442012e-05, "loss": 0.5856, "step": 8702 }, { "epoch": 0.2540947709555925, "grad_norm": 0.583760804183353, "learning_rate": 4.14403892944039e-05, "loss": 0.727, "step": 8703 }, { "epoch": 0.2541239671834399, "grad_norm": 0.5184538490206343, "learning_rate": 4.1438767234387674e-05, "loss": 0.5805, "step": 8704 }, { "epoch": 0.25415316341128724, "grad_norm": 0.5675000206354601, "learning_rate": 4.1437145174371456e-05, "loss": 0.6774, "step": 8705 }, { "epoch": 0.2541823596391346, "grad_norm": 0.6066191440917557, "learning_rate": 4.143552311435524e-05, "loss": 0.7934, "step": 8706 }, { "epoch": 0.25421155586698196, "grad_norm": 0.5182112930057123, "learning_rate": 4.143390105433901e-05, "loss": 0.5715, "step": 8707 }, { "epoch": 0.2542407520948293, "grad_norm": 0.5388515007350956, "learning_rate": 4.1432278994322794e-05, "loss": 0.5847, "step": 8708 }, { "epoch": 0.2542699483226767, "grad_norm": 0.5306461369920038, "learning_rate": 4.143065693430657e-05, "loss": 0.6116, "step": 8709 }, { "epoch": 0.25429914455052405, "grad_norm": 0.5477754796405135, "learning_rate": 4.142903487429035e-05, "loss": 0.6568, "step": 8710 }, { "epoch": 0.2543283407783714, "grad_norm": 0.554631264623833, "learning_rate": 4.142741281427413e-05, "loss": 0.6324, "step": 8711 }, { "epoch": 0.25435753700621877, "grad_norm": 0.5665914415069698, "learning_rate": 4.142579075425791e-05, "loss": 0.7192, "step": 8712 }, { "epoch": 0.25438673323406613, "grad_norm": 0.6510317110501698, "learning_rate": 4.142416869424169e-05, "loss": 0.784, "step": 8713 }, { "epoch": 0.25441592946191355, "grad_norm": 0.5029763583526259, "learning_rate": 4.1422546634225465e-05, "loss": 0.5887, "step": 8714 }, { "epoch": 0.2544451256897609, "grad_norm": 0.5439019078801482, "learning_rate": 4.1420924574209246e-05, "loss": 0.6596, "step": 8715 }, { "epoch": 0.25447432191760827, "grad_norm": 0.5268949537434308, "learning_rate": 4.141930251419303e-05, "loss": 0.6121, "step": 8716 }, { "epoch": 0.25450351814545563, "grad_norm": 0.5923835297394802, "learning_rate": 4.14176804541768e-05, "loss": 0.7164, "step": 8717 }, { "epoch": 0.254532714373303, "grad_norm": 0.5626417123093346, "learning_rate": 4.1416058394160585e-05, "loss": 0.6867, "step": 8718 }, { "epoch": 0.25456191060115035, "grad_norm": 0.5873471140003583, "learning_rate": 4.141443633414436e-05, "loss": 0.699, "step": 8719 }, { "epoch": 0.2545911068289977, "grad_norm": 0.5782022403903628, "learning_rate": 4.141281427412814e-05, "loss": 0.6836, "step": 8720 }, { "epoch": 0.2546203030568451, "grad_norm": 0.5659396292958652, "learning_rate": 4.1411192214111923e-05, "loss": 0.6289, "step": 8721 }, { "epoch": 0.25464949928469244, "grad_norm": 0.5859492952380672, "learning_rate": 4.1409570154095705e-05, "loss": 0.741, "step": 8722 }, { "epoch": 0.2546786955125398, "grad_norm": 0.5551696705780936, "learning_rate": 4.140794809407949e-05, "loss": 0.6153, "step": 8723 }, { "epoch": 0.25470789174038716, "grad_norm": 0.5950562882247852, "learning_rate": 4.140632603406326e-05, "loss": 0.6782, "step": 8724 }, { "epoch": 0.2547370879682345, "grad_norm": 0.550701434732987, "learning_rate": 4.1404703974047044e-05, "loss": 0.6541, "step": 8725 }, { "epoch": 0.2547662841960819, "grad_norm": 0.5984276452105105, "learning_rate": 4.1403081914030826e-05, "loss": 0.6007, "step": 8726 }, { "epoch": 0.25479548042392924, "grad_norm": 0.6061825744332161, "learning_rate": 4.14014598540146e-05, "loss": 0.6758, "step": 8727 }, { "epoch": 0.2548246766517766, "grad_norm": 0.5694148371460604, "learning_rate": 4.139983779399838e-05, "loss": 0.6724, "step": 8728 }, { "epoch": 0.25485387287962397, "grad_norm": 0.6114204237270321, "learning_rate": 4.139821573398216e-05, "loss": 0.708, "step": 8729 }, { "epoch": 0.2548830691074713, "grad_norm": 0.4927652135413761, "learning_rate": 4.139659367396594e-05, "loss": 0.514, "step": 8730 }, { "epoch": 0.2549122653353187, "grad_norm": 0.5919129239132846, "learning_rate": 4.139497161394972e-05, "loss": 0.6906, "step": 8731 }, { "epoch": 0.25494146156316605, "grad_norm": 0.5361037953971867, "learning_rate": 4.1393349553933496e-05, "loss": 0.5987, "step": 8732 }, { "epoch": 0.2549706577910134, "grad_norm": 0.5691437161730941, "learning_rate": 4.139172749391728e-05, "loss": 0.6531, "step": 8733 }, { "epoch": 0.25499985401886077, "grad_norm": 0.5597544297421279, "learning_rate": 4.139010543390105e-05, "loss": 0.6599, "step": 8734 }, { "epoch": 0.25502905024670813, "grad_norm": 0.5429455605727693, "learning_rate": 4.1388483373884834e-05, "loss": 0.6095, "step": 8735 }, { "epoch": 0.2550582464745555, "grad_norm": 0.5161579250763457, "learning_rate": 4.1386861313868616e-05, "loss": 0.5715, "step": 8736 }, { "epoch": 0.25508744270240286, "grad_norm": 0.5782147415606048, "learning_rate": 4.138523925385239e-05, "loss": 0.6492, "step": 8737 }, { "epoch": 0.2551166389302502, "grad_norm": 0.5528060460953669, "learning_rate": 4.138361719383617e-05, "loss": 0.6706, "step": 8738 }, { "epoch": 0.2551458351580976, "grad_norm": 0.6177638540438726, "learning_rate": 4.138199513381995e-05, "loss": 0.7594, "step": 8739 }, { "epoch": 0.25517503138594494, "grad_norm": 0.5666269875942613, "learning_rate": 4.1380373073803736e-05, "loss": 0.6816, "step": 8740 }, { "epoch": 0.2552042276137923, "grad_norm": 0.5355246845958626, "learning_rate": 4.137875101378751e-05, "loss": 0.646, "step": 8741 }, { "epoch": 0.25523342384163966, "grad_norm": 0.5297814276196965, "learning_rate": 4.137712895377129e-05, "loss": 0.5887, "step": 8742 }, { "epoch": 0.255262620069487, "grad_norm": 0.6467258698519188, "learning_rate": 4.1375506893755075e-05, "loss": 0.7549, "step": 8743 }, { "epoch": 0.2552918162973344, "grad_norm": 0.5832848301740385, "learning_rate": 4.137388483373885e-05, "loss": 0.6969, "step": 8744 }, { "epoch": 0.25532101252518175, "grad_norm": 0.572251635614778, "learning_rate": 4.137226277372263e-05, "loss": 0.6819, "step": 8745 }, { "epoch": 0.2553502087530291, "grad_norm": 0.5641798949458074, "learning_rate": 4.137064071370641e-05, "loss": 0.6611, "step": 8746 }, { "epoch": 0.25537940498087647, "grad_norm": 0.5623676746701215, "learning_rate": 4.136901865369019e-05, "loss": 0.6654, "step": 8747 }, { "epoch": 0.25540860120872383, "grad_norm": 0.5394378429780391, "learning_rate": 4.136739659367397e-05, "loss": 0.6189, "step": 8748 }, { "epoch": 0.2554377974365712, "grad_norm": 0.5722765285765222, "learning_rate": 4.1365774533657745e-05, "loss": 0.6432, "step": 8749 }, { "epoch": 0.25546699366441855, "grad_norm": 0.5451996830928619, "learning_rate": 4.136415247364153e-05, "loss": 0.6953, "step": 8750 }, { "epoch": 0.2554961898922659, "grad_norm": 0.4964281962268971, "learning_rate": 4.136253041362531e-05, "loss": 0.5406, "step": 8751 }, { "epoch": 0.2555253861201133, "grad_norm": 0.5586216472035858, "learning_rate": 4.1360908353609084e-05, "loss": 0.68, "step": 8752 }, { "epoch": 0.25555458234796063, "grad_norm": 0.6353740595597905, "learning_rate": 4.1359286293592865e-05, "loss": 0.681, "step": 8753 }, { "epoch": 0.255583778575808, "grad_norm": 0.6164879048120685, "learning_rate": 4.135766423357664e-05, "loss": 0.6917, "step": 8754 }, { "epoch": 0.25561297480365536, "grad_norm": 0.5911821744212179, "learning_rate": 4.135604217356042e-05, "loss": 0.7498, "step": 8755 }, { "epoch": 0.2556421710315027, "grad_norm": 0.5644707359392424, "learning_rate": 4.1354420113544204e-05, "loss": 0.6416, "step": 8756 }, { "epoch": 0.2556713672593501, "grad_norm": 0.5563774839651328, "learning_rate": 4.135279805352798e-05, "loss": 0.6509, "step": 8757 }, { "epoch": 0.25570056348719744, "grad_norm": 0.49106851475458185, "learning_rate": 4.135117599351176e-05, "loss": 0.5454, "step": 8758 }, { "epoch": 0.2557297597150448, "grad_norm": 0.5745014881397238, "learning_rate": 4.134955393349554e-05, "loss": 0.6499, "step": 8759 }, { "epoch": 0.25575895594289216, "grad_norm": 0.5342644827287072, "learning_rate": 4.1347931873479324e-05, "loss": 0.5549, "step": 8760 }, { "epoch": 0.2557881521707395, "grad_norm": 0.6280779898002269, "learning_rate": 4.13463098134631e-05, "loss": 0.7114, "step": 8761 }, { "epoch": 0.2558173483985869, "grad_norm": 0.542882066068355, "learning_rate": 4.134468775344688e-05, "loss": 0.576, "step": 8762 }, { "epoch": 0.25584654462643425, "grad_norm": 0.5882320602180284, "learning_rate": 4.134306569343066e-05, "loss": 0.7049, "step": 8763 }, { "epoch": 0.2558757408542816, "grad_norm": 0.5664082860821275, "learning_rate": 4.134144363341444e-05, "loss": 0.6809, "step": 8764 }, { "epoch": 0.25590493708212897, "grad_norm": 0.5777239074289426, "learning_rate": 4.133982157339822e-05, "loss": 0.6348, "step": 8765 }, { "epoch": 0.25593413330997633, "grad_norm": 0.5495770098832281, "learning_rate": 4.1338199513381994e-05, "loss": 0.6137, "step": 8766 }, { "epoch": 0.2559633295378237, "grad_norm": 0.5478743453832265, "learning_rate": 4.1336577453365776e-05, "loss": 0.6374, "step": 8767 }, { "epoch": 0.25599252576567105, "grad_norm": 0.5611095016103882, "learning_rate": 4.133495539334956e-05, "loss": 0.6896, "step": 8768 }, { "epoch": 0.2560217219935184, "grad_norm": 0.5895477982883879, "learning_rate": 4.133333333333333e-05, "loss": 0.6161, "step": 8769 }, { "epoch": 0.2560509182213658, "grad_norm": 0.6158696107037287, "learning_rate": 4.1331711273317115e-05, "loss": 0.7133, "step": 8770 }, { "epoch": 0.25608011444921314, "grad_norm": 0.6063230857818583, "learning_rate": 4.1330089213300896e-05, "loss": 0.7239, "step": 8771 }, { "epoch": 0.2561093106770605, "grad_norm": 0.618741038233779, "learning_rate": 4.132846715328467e-05, "loss": 0.7573, "step": 8772 }, { "epoch": 0.25613850690490786, "grad_norm": 0.6046853407481628, "learning_rate": 4.132684509326845e-05, "loss": 0.767, "step": 8773 }, { "epoch": 0.2561677031327553, "grad_norm": 0.5491348615553048, "learning_rate": 4.132522303325223e-05, "loss": 0.6602, "step": 8774 }, { "epoch": 0.25619689936060264, "grad_norm": 0.6194910499883952, "learning_rate": 4.132360097323601e-05, "loss": 0.7157, "step": 8775 }, { "epoch": 0.25622609558845, "grad_norm": 0.5754275644621482, "learning_rate": 4.132197891321979e-05, "loss": 0.7209, "step": 8776 }, { "epoch": 0.25625529181629736, "grad_norm": 0.5123900520385342, "learning_rate": 4.132035685320357e-05, "loss": 0.5686, "step": 8777 }, { "epoch": 0.2562844880441447, "grad_norm": 0.5854843601831083, "learning_rate": 4.1318734793187355e-05, "loss": 0.7278, "step": 8778 }, { "epoch": 0.2563136842719921, "grad_norm": 0.5398290487204603, "learning_rate": 4.131711273317113e-05, "loss": 0.6347, "step": 8779 }, { "epoch": 0.25634288049983944, "grad_norm": 0.521783952928493, "learning_rate": 4.131549067315491e-05, "loss": 0.5732, "step": 8780 }, { "epoch": 0.2563720767276868, "grad_norm": 0.5391983286422696, "learning_rate": 4.131386861313869e-05, "loss": 0.5566, "step": 8781 }, { "epoch": 0.25640127295553417, "grad_norm": 0.539517571209498, "learning_rate": 4.131224655312247e-05, "loss": 0.5718, "step": 8782 }, { "epoch": 0.2564304691833815, "grad_norm": 0.5602176418752991, "learning_rate": 4.131062449310625e-05, "loss": 0.6129, "step": 8783 }, { "epoch": 0.2564596654112289, "grad_norm": 0.5419604347008957, "learning_rate": 4.1309002433090025e-05, "loss": 0.6383, "step": 8784 }, { "epoch": 0.25648886163907625, "grad_norm": 0.5139590466448561, "learning_rate": 4.130738037307381e-05, "loss": 0.5569, "step": 8785 }, { "epoch": 0.2565180578669236, "grad_norm": 0.5354979606773248, "learning_rate": 4.130575831305758e-05, "loss": 0.6068, "step": 8786 }, { "epoch": 0.25654725409477097, "grad_norm": 0.5283752553263695, "learning_rate": 4.1304136253041364e-05, "loss": 0.5827, "step": 8787 }, { "epoch": 0.25657645032261833, "grad_norm": 0.527228124757295, "learning_rate": 4.1302514193025146e-05, "loss": 0.6157, "step": 8788 }, { "epoch": 0.2566056465504657, "grad_norm": 0.5545962838509563, "learning_rate": 4.130089213300892e-05, "loss": 0.6532, "step": 8789 }, { "epoch": 0.25663484277831305, "grad_norm": 0.5489415622971575, "learning_rate": 4.12992700729927e-05, "loss": 0.6029, "step": 8790 }, { "epoch": 0.2566640390061604, "grad_norm": 0.5650276659691631, "learning_rate": 4.1297648012976484e-05, "loss": 0.6387, "step": 8791 }, { "epoch": 0.2566932352340078, "grad_norm": 0.5575791672836435, "learning_rate": 4.129602595296026e-05, "loss": 0.6568, "step": 8792 }, { "epoch": 0.25672243146185514, "grad_norm": 0.5621979332288235, "learning_rate": 4.129440389294404e-05, "loss": 0.6763, "step": 8793 }, { "epoch": 0.2567516276897025, "grad_norm": 0.5048824448987368, "learning_rate": 4.1292781832927816e-05, "loss": 0.5797, "step": 8794 }, { "epoch": 0.25678082391754986, "grad_norm": 0.5465975248123659, "learning_rate": 4.12911597729116e-05, "loss": 0.6195, "step": 8795 }, { "epoch": 0.2568100201453972, "grad_norm": 0.6254066218066964, "learning_rate": 4.128953771289538e-05, "loss": 0.6274, "step": 8796 }, { "epoch": 0.2568392163732446, "grad_norm": 0.5357744967292518, "learning_rate": 4.128791565287916e-05, "loss": 0.5823, "step": 8797 }, { "epoch": 0.25686841260109194, "grad_norm": 0.5589147519724204, "learning_rate": 4.128629359286294e-05, "loss": 0.6365, "step": 8798 }, { "epoch": 0.2568976088289393, "grad_norm": 0.5231934497041004, "learning_rate": 4.128467153284672e-05, "loss": 0.5887, "step": 8799 }, { "epoch": 0.25692680505678667, "grad_norm": 0.5321078076500374, "learning_rate": 4.12830494728305e-05, "loss": 0.6187, "step": 8800 }, { "epoch": 0.25695600128463403, "grad_norm": 0.580808205328052, "learning_rate": 4.1281427412814275e-05, "loss": 0.67, "step": 8801 }, { "epoch": 0.2569851975124814, "grad_norm": 0.5807131080382147, "learning_rate": 4.1279805352798056e-05, "loss": 0.6568, "step": 8802 }, { "epoch": 0.25701439374032875, "grad_norm": 0.6307821820845285, "learning_rate": 4.127818329278184e-05, "loss": 0.7066, "step": 8803 }, { "epoch": 0.2570435899681761, "grad_norm": 0.5397068812810797, "learning_rate": 4.127656123276561e-05, "loss": 0.6509, "step": 8804 }, { "epoch": 0.2570727861960235, "grad_norm": 0.5151885491843978, "learning_rate": 4.1274939172749395e-05, "loss": 0.5869, "step": 8805 }, { "epoch": 0.25710198242387083, "grad_norm": 0.5548512142686527, "learning_rate": 4.127331711273317e-05, "loss": 0.6301, "step": 8806 }, { "epoch": 0.2571311786517182, "grad_norm": 0.5923736427014019, "learning_rate": 4.127169505271695e-05, "loss": 0.6744, "step": 8807 }, { "epoch": 0.25716037487956556, "grad_norm": 0.6623937342355419, "learning_rate": 4.1270072992700734e-05, "loss": 0.7755, "step": 8808 }, { "epoch": 0.2571895711074129, "grad_norm": 0.5762290989468851, "learning_rate": 4.126845093268451e-05, "loss": 0.637, "step": 8809 }, { "epoch": 0.2572187673352603, "grad_norm": 1.0344340863514703, "learning_rate": 4.126682887266829e-05, "loss": 0.7649, "step": 8810 }, { "epoch": 0.25724796356310764, "grad_norm": 0.5068829104425093, "learning_rate": 4.1265206812652065e-05, "loss": 0.5618, "step": 8811 }, { "epoch": 0.257277159790955, "grad_norm": 0.593623666336797, "learning_rate": 4.126358475263585e-05, "loss": 0.7093, "step": 8812 }, { "epoch": 0.25730635601880236, "grad_norm": 0.5747881662962901, "learning_rate": 4.126196269261963e-05, "loss": 0.7031, "step": 8813 }, { "epoch": 0.2573355522466497, "grad_norm": 0.6654209668467138, "learning_rate": 4.1260340632603404e-05, "loss": 0.8316, "step": 8814 }, { "epoch": 0.2573647484744971, "grad_norm": 0.5321891928235629, "learning_rate": 4.1258718572587186e-05, "loss": 0.5886, "step": 8815 }, { "epoch": 0.25739394470234445, "grad_norm": 0.5585438764944306, "learning_rate": 4.125709651257097e-05, "loss": 0.6842, "step": 8816 }, { "epoch": 0.2574231409301918, "grad_norm": 0.5254837758986579, "learning_rate": 4.125547445255475e-05, "loss": 0.5703, "step": 8817 }, { "epoch": 0.25745233715803917, "grad_norm": 0.565450977644324, "learning_rate": 4.125385239253853e-05, "loss": 0.6675, "step": 8818 }, { "epoch": 0.25748153338588653, "grad_norm": 0.5948800236289162, "learning_rate": 4.1252230332522306e-05, "loss": 0.7603, "step": 8819 }, { "epoch": 0.2575107296137339, "grad_norm": 0.5558801798800302, "learning_rate": 4.125060827250609e-05, "loss": 0.6767, "step": 8820 }, { "epoch": 0.25753992584158125, "grad_norm": 0.5651443818536896, "learning_rate": 4.124898621248986e-05, "loss": 0.6455, "step": 8821 }, { "epoch": 0.2575691220694286, "grad_norm": 0.5175016844204634, "learning_rate": 4.1247364152473644e-05, "loss": 0.583, "step": 8822 }, { "epoch": 0.257598318297276, "grad_norm": 0.6045338849468024, "learning_rate": 4.1245742092457426e-05, "loss": 0.6812, "step": 8823 }, { "epoch": 0.25762751452512334, "grad_norm": 0.6042680222780342, "learning_rate": 4.12441200324412e-05, "loss": 0.7383, "step": 8824 }, { "epoch": 0.2576567107529707, "grad_norm": 0.5855939610421623, "learning_rate": 4.124249797242498e-05, "loss": 0.7515, "step": 8825 }, { "epoch": 0.25768590698081806, "grad_norm": 0.5993514652841896, "learning_rate": 4.124087591240876e-05, "loss": 0.7416, "step": 8826 }, { "epoch": 0.2577151032086654, "grad_norm": 0.5515206730423519, "learning_rate": 4.123925385239254e-05, "loss": 0.6673, "step": 8827 }, { "epoch": 0.2577442994365128, "grad_norm": 0.6241280383038303, "learning_rate": 4.123763179237632e-05, "loss": 0.6284, "step": 8828 }, { "epoch": 0.25777349566436014, "grad_norm": 0.5560677956398836, "learning_rate": 4.1236009732360096e-05, "loss": 0.585, "step": 8829 }, { "epoch": 0.2578026918922075, "grad_norm": 0.6012705662825046, "learning_rate": 4.123438767234388e-05, "loss": 0.6921, "step": 8830 }, { "epoch": 0.25783188812005486, "grad_norm": 0.5677105019963135, "learning_rate": 4.123276561232765e-05, "loss": 0.6202, "step": 8831 }, { "epoch": 0.2578610843479022, "grad_norm": 0.5902695274029366, "learning_rate": 4.1231143552311435e-05, "loss": 0.6411, "step": 8832 }, { "epoch": 0.2578902805757496, "grad_norm": 0.6406625600382132, "learning_rate": 4.1229521492295217e-05, "loss": 0.725, "step": 8833 }, { "epoch": 0.25791947680359695, "grad_norm": 0.537597640747408, "learning_rate": 4.122789943227899e-05, "loss": 0.6151, "step": 8834 }, { "epoch": 0.25794867303144436, "grad_norm": 0.497963063423067, "learning_rate": 4.122627737226278e-05, "loss": 0.5254, "step": 8835 }, { "epoch": 0.2579778692592917, "grad_norm": 0.5650850403605553, "learning_rate": 4.1224655312246555e-05, "loss": 0.6486, "step": 8836 }, { "epoch": 0.2580070654871391, "grad_norm": 0.5529058114631069, "learning_rate": 4.122303325223034e-05, "loss": 0.6426, "step": 8837 }, { "epoch": 0.25803626171498645, "grad_norm": 0.5985908269702023, "learning_rate": 4.122141119221412e-05, "loss": 0.7118, "step": 8838 }, { "epoch": 0.2580654579428338, "grad_norm": 0.5397990883509567, "learning_rate": 4.1219789132197894e-05, "loss": 0.6086, "step": 8839 }, { "epoch": 0.25809465417068117, "grad_norm": 0.5795016845251783, "learning_rate": 4.1218167072181675e-05, "loss": 0.7714, "step": 8840 }, { "epoch": 0.25812385039852853, "grad_norm": 0.5770828871960874, "learning_rate": 4.121654501216545e-05, "loss": 0.6658, "step": 8841 }, { "epoch": 0.2581530466263759, "grad_norm": 0.5011236116277816, "learning_rate": 4.121492295214923e-05, "loss": 0.5389, "step": 8842 }, { "epoch": 0.25818224285422325, "grad_norm": 0.5443760189514979, "learning_rate": 4.1213300892133014e-05, "loss": 0.6131, "step": 8843 }, { "epoch": 0.2582114390820706, "grad_norm": 0.5167054303955726, "learning_rate": 4.121167883211679e-05, "loss": 0.5537, "step": 8844 }, { "epoch": 0.258240635309918, "grad_norm": 0.5534700736869262, "learning_rate": 4.121005677210057e-05, "loss": 0.6162, "step": 8845 }, { "epoch": 0.25826983153776534, "grad_norm": 0.588034878667114, "learning_rate": 4.1208434712084346e-05, "loss": 0.7361, "step": 8846 }, { "epoch": 0.2582990277656127, "grad_norm": 0.5688134671791627, "learning_rate": 4.120681265206813e-05, "loss": 0.6733, "step": 8847 }, { "epoch": 0.25832822399346006, "grad_norm": 0.5305956356507395, "learning_rate": 4.120519059205191e-05, "loss": 0.6217, "step": 8848 }, { "epoch": 0.2583574202213074, "grad_norm": 0.5519025412482033, "learning_rate": 4.1203568532035684e-05, "loss": 0.6872, "step": 8849 }, { "epoch": 0.2583866164491548, "grad_norm": 0.5493732036380753, "learning_rate": 4.1201946472019466e-05, "loss": 0.6422, "step": 8850 }, { "epoch": 0.25841581267700214, "grad_norm": 0.5669220770265391, "learning_rate": 4.120032441200324e-05, "loss": 0.6736, "step": 8851 }, { "epoch": 0.2584450089048495, "grad_norm": 0.5162272700853836, "learning_rate": 4.119870235198702e-05, "loss": 0.5733, "step": 8852 }, { "epoch": 0.25847420513269687, "grad_norm": 0.5549983182816898, "learning_rate": 4.1197080291970804e-05, "loss": 0.5742, "step": 8853 }, { "epoch": 0.2585034013605442, "grad_norm": 0.5561882557766101, "learning_rate": 4.1195458231954586e-05, "loss": 0.6751, "step": 8854 }, { "epoch": 0.2585325975883916, "grad_norm": 0.5568318760725298, "learning_rate": 4.119383617193837e-05, "loss": 0.6408, "step": 8855 }, { "epoch": 0.25856179381623895, "grad_norm": 0.5616113816106273, "learning_rate": 4.119221411192214e-05, "loss": 0.7059, "step": 8856 }, { "epoch": 0.2585909900440863, "grad_norm": 0.5286987433910916, "learning_rate": 4.1190592051905925e-05, "loss": 0.5776, "step": 8857 }, { "epoch": 0.25862018627193367, "grad_norm": 0.5292018763493868, "learning_rate": 4.1188969991889706e-05, "loss": 0.5784, "step": 8858 }, { "epoch": 0.25864938249978103, "grad_norm": 0.5581380592776337, "learning_rate": 4.118734793187348e-05, "loss": 0.6913, "step": 8859 }, { "epoch": 0.2586785787276284, "grad_norm": 0.5347075304638387, "learning_rate": 4.118572587185726e-05, "loss": 0.6591, "step": 8860 }, { "epoch": 0.25870777495547576, "grad_norm": 0.5560040542682606, "learning_rate": 4.118410381184104e-05, "loss": 0.6467, "step": 8861 }, { "epoch": 0.2587369711833231, "grad_norm": 0.5604042536213996, "learning_rate": 4.118248175182482e-05, "loss": 0.7001, "step": 8862 }, { "epoch": 0.2587661674111705, "grad_norm": 0.6175734611739252, "learning_rate": 4.11808596918086e-05, "loss": 0.6903, "step": 8863 }, { "epoch": 0.25879536363901784, "grad_norm": 0.5309428043946053, "learning_rate": 4.117923763179238e-05, "loss": 0.6149, "step": 8864 }, { "epoch": 0.2588245598668652, "grad_norm": 0.5445799987595231, "learning_rate": 4.117761557177616e-05, "loss": 0.6735, "step": 8865 }, { "epoch": 0.25885375609471256, "grad_norm": 0.5287190055490868, "learning_rate": 4.1175993511759933e-05, "loss": 0.6241, "step": 8866 }, { "epoch": 0.2588829523225599, "grad_norm": 0.5723686643576034, "learning_rate": 4.1174371451743715e-05, "loss": 0.6819, "step": 8867 }, { "epoch": 0.2589121485504073, "grad_norm": 0.6162280898574964, "learning_rate": 4.11727493917275e-05, "loss": 0.621, "step": 8868 }, { "epoch": 0.25894134477825465, "grad_norm": 0.5610361125760025, "learning_rate": 4.117112733171127e-05, "loss": 0.5756, "step": 8869 }, { "epoch": 0.258970541006102, "grad_norm": 0.5886544443180121, "learning_rate": 4.1169505271695054e-05, "loss": 0.6697, "step": 8870 }, { "epoch": 0.25899973723394937, "grad_norm": 0.5128299997546338, "learning_rate": 4.116788321167883e-05, "loss": 0.5483, "step": 8871 }, { "epoch": 0.25902893346179673, "grad_norm": 0.5300805893090913, "learning_rate": 4.116626115166262e-05, "loss": 0.6376, "step": 8872 }, { "epoch": 0.2590581296896441, "grad_norm": 0.531527132384336, "learning_rate": 4.116463909164639e-05, "loss": 0.6528, "step": 8873 }, { "epoch": 0.25908732591749145, "grad_norm": 0.594596239956109, "learning_rate": 4.1163017031630174e-05, "loss": 0.7299, "step": 8874 }, { "epoch": 0.2591165221453388, "grad_norm": 0.5515486353000173, "learning_rate": 4.1161394971613956e-05, "loss": 0.6737, "step": 8875 }, { "epoch": 0.2591457183731862, "grad_norm": 0.5855318704353815, "learning_rate": 4.115977291159773e-05, "loss": 0.7383, "step": 8876 }, { "epoch": 0.25917491460103353, "grad_norm": 0.5624962027221047, "learning_rate": 4.115815085158151e-05, "loss": 0.7075, "step": 8877 }, { "epoch": 0.2592041108288809, "grad_norm": 0.4946918721866984, "learning_rate": 4.1156528791565294e-05, "loss": 0.514, "step": 8878 }, { "epoch": 0.25923330705672826, "grad_norm": 0.5412949861308743, "learning_rate": 4.115490673154907e-05, "loss": 0.6216, "step": 8879 }, { "epoch": 0.2592625032845756, "grad_norm": 0.590399244579216, "learning_rate": 4.115328467153285e-05, "loss": 0.6957, "step": 8880 }, { "epoch": 0.259291699512423, "grad_norm": 0.5438992214564963, "learning_rate": 4.1151662611516626e-05, "loss": 0.6104, "step": 8881 }, { "epoch": 0.25932089574027034, "grad_norm": 0.5378626304823666, "learning_rate": 4.115004055150041e-05, "loss": 0.6448, "step": 8882 }, { "epoch": 0.2593500919681177, "grad_norm": 0.5597809073812245, "learning_rate": 4.114841849148419e-05, "loss": 0.6761, "step": 8883 }, { "epoch": 0.25937928819596506, "grad_norm": 0.4804507079400482, "learning_rate": 4.1146796431467964e-05, "loss": 0.5365, "step": 8884 }, { "epoch": 0.2594084844238124, "grad_norm": 0.5243642372827533, "learning_rate": 4.1145174371451746e-05, "loss": 0.6005, "step": 8885 }, { "epoch": 0.2594376806516598, "grad_norm": 0.5492404914474713, "learning_rate": 4.114355231143552e-05, "loss": 0.6811, "step": 8886 }, { "epoch": 0.25946687687950715, "grad_norm": 0.5651063342164306, "learning_rate": 4.11419302514193e-05, "loss": 0.7059, "step": 8887 }, { "epoch": 0.2594960731073545, "grad_norm": 0.5349909704014176, "learning_rate": 4.1140308191403085e-05, "loss": 0.5907, "step": 8888 }, { "epoch": 0.25952526933520187, "grad_norm": 0.5488860922579425, "learning_rate": 4.113868613138686e-05, "loss": 0.6432, "step": 8889 }, { "epoch": 0.25955446556304923, "grad_norm": 0.5746639667534869, "learning_rate": 4.113706407137064e-05, "loss": 0.6427, "step": 8890 }, { "epoch": 0.2595836617908966, "grad_norm": 0.5426201407792479, "learning_rate": 4.113544201135442e-05, "loss": 0.6598, "step": 8891 }, { "epoch": 0.25961285801874395, "grad_norm": 0.5494772134222501, "learning_rate": 4.1133819951338205e-05, "loss": 0.6354, "step": 8892 }, { "epoch": 0.2596420542465913, "grad_norm": 0.6098481284659205, "learning_rate": 4.113219789132198e-05, "loss": 0.64, "step": 8893 }, { "epoch": 0.2596712504744387, "grad_norm": 0.5218180089794835, "learning_rate": 4.113057583130576e-05, "loss": 0.5713, "step": 8894 }, { "epoch": 0.2597004467022861, "grad_norm": 0.5708841640576787, "learning_rate": 4.1128953771289544e-05, "loss": 0.7145, "step": 8895 }, { "epoch": 0.25972964293013345, "grad_norm": 0.511248507408507, "learning_rate": 4.112733171127332e-05, "loss": 0.5692, "step": 8896 }, { "epoch": 0.2597588391579808, "grad_norm": 0.5434695727707305, "learning_rate": 4.11257096512571e-05, "loss": 0.6427, "step": 8897 }, { "epoch": 0.2597880353858282, "grad_norm": 0.5905351966189137, "learning_rate": 4.1124087591240875e-05, "loss": 0.6824, "step": 8898 }, { "epoch": 0.25981723161367554, "grad_norm": 0.5619274128984625, "learning_rate": 4.112246553122466e-05, "loss": 0.6501, "step": 8899 }, { "epoch": 0.2598464278415229, "grad_norm": 0.5858978857308282, "learning_rate": 4.112084347120844e-05, "loss": 0.6862, "step": 8900 }, { "epoch": 0.25987562406937026, "grad_norm": 0.488934067150581, "learning_rate": 4.1119221411192214e-05, "loss": 0.5317, "step": 8901 }, { "epoch": 0.2599048202972176, "grad_norm": 0.5602008175302322, "learning_rate": 4.1117599351175996e-05, "loss": 0.689, "step": 8902 }, { "epoch": 0.259934016525065, "grad_norm": 0.54344716344191, "learning_rate": 4.111597729115978e-05, "loss": 0.6774, "step": 8903 }, { "epoch": 0.25996321275291234, "grad_norm": 0.49595698813920314, "learning_rate": 4.111435523114355e-05, "loss": 0.523, "step": 8904 }, { "epoch": 0.2599924089807597, "grad_norm": 0.5064345194625384, "learning_rate": 4.1112733171127334e-05, "loss": 0.5442, "step": 8905 }, { "epoch": 0.26002160520860707, "grad_norm": 0.5983390132772827, "learning_rate": 4.111111111111111e-05, "loss": 0.6835, "step": 8906 }, { "epoch": 0.2600508014364544, "grad_norm": 0.5111438618419855, "learning_rate": 4.110948905109489e-05, "loss": 0.5092, "step": 8907 }, { "epoch": 0.2600799976643018, "grad_norm": 0.5303205404426041, "learning_rate": 4.110786699107867e-05, "loss": 0.5977, "step": 8908 }, { "epoch": 0.26010919389214915, "grad_norm": 0.5254313932171728, "learning_rate": 4.110624493106245e-05, "loss": 0.6037, "step": 8909 }, { "epoch": 0.2601383901199965, "grad_norm": 0.5571517734990684, "learning_rate": 4.1104622871046236e-05, "loss": 0.6566, "step": 8910 }, { "epoch": 0.26016758634784387, "grad_norm": 0.5877995210622295, "learning_rate": 4.110300081103001e-05, "loss": 0.6861, "step": 8911 }, { "epoch": 0.26019678257569123, "grad_norm": 0.6164147123446435, "learning_rate": 4.110137875101379e-05, "loss": 0.7596, "step": 8912 }, { "epoch": 0.2602259788035386, "grad_norm": 0.5594768182774145, "learning_rate": 4.109975669099757e-05, "loss": 0.7131, "step": 8913 }, { "epoch": 0.26025517503138595, "grad_norm": 0.49797569260999214, "learning_rate": 4.109813463098135e-05, "loss": 0.5512, "step": 8914 }, { "epoch": 0.2602843712592333, "grad_norm": 0.5914786790946857, "learning_rate": 4.109651257096513e-05, "loss": 0.7053, "step": 8915 }, { "epoch": 0.2603135674870807, "grad_norm": 0.5738331350921185, "learning_rate": 4.1094890510948906e-05, "loss": 0.6427, "step": 8916 }, { "epoch": 0.26034276371492804, "grad_norm": 0.5959379393790323, "learning_rate": 4.109326845093269e-05, "loss": 0.6693, "step": 8917 }, { "epoch": 0.2603719599427754, "grad_norm": 0.5379405124569532, "learning_rate": 4.109164639091646e-05, "loss": 0.6311, "step": 8918 }, { "epoch": 0.26040115617062276, "grad_norm": 0.5982766106895203, "learning_rate": 4.1090024330900245e-05, "loss": 0.7539, "step": 8919 }, { "epoch": 0.2604303523984701, "grad_norm": 0.5753518335037457, "learning_rate": 4.1088402270884027e-05, "loss": 0.712, "step": 8920 }, { "epoch": 0.2604595486263175, "grad_norm": 0.5171858048289235, "learning_rate": 4.10867802108678e-05, "loss": 0.5456, "step": 8921 }, { "epoch": 0.26048874485416484, "grad_norm": 0.5304880869041619, "learning_rate": 4.108515815085158e-05, "loss": 0.6051, "step": 8922 }, { "epoch": 0.2605179410820122, "grad_norm": 0.5276456015087398, "learning_rate": 4.1083536090835365e-05, "loss": 0.5952, "step": 8923 }, { "epoch": 0.26054713730985957, "grad_norm": 0.6135696517586322, "learning_rate": 4.108191403081914e-05, "loss": 0.7287, "step": 8924 }, { "epoch": 0.26057633353770693, "grad_norm": 0.6201209471733916, "learning_rate": 4.108029197080292e-05, "loss": 0.7223, "step": 8925 }, { "epoch": 0.2606055297655543, "grad_norm": 0.5406316059247652, "learning_rate": 4.10786699107867e-05, "loss": 0.5923, "step": 8926 }, { "epoch": 0.26063472599340165, "grad_norm": 0.7088710229921077, "learning_rate": 4.107704785077048e-05, "loss": 0.6999, "step": 8927 }, { "epoch": 0.260663922221249, "grad_norm": 0.5505604982687168, "learning_rate": 4.107542579075426e-05, "loss": 0.6547, "step": 8928 }, { "epoch": 0.2606931184490964, "grad_norm": 0.5098766743866086, "learning_rate": 4.107380373073804e-05, "loss": 0.5619, "step": 8929 }, { "epoch": 0.26072231467694373, "grad_norm": 0.5791928261488772, "learning_rate": 4.1072181670721824e-05, "loss": 0.6887, "step": 8930 }, { "epoch": 0.2607515109047911, "grad_norm": 0.5216447961343357, "learning_rate": 4.10705596107056e-05, "loss": 0.5685, "step": 8931 }, { "epoch": 0.26078070713263846, "grad_norm": 0.5409633613453649, "learning_rate": 4.106893755068938e-05, "loss": 0.6101, "step": 8932 }, { "epoch": 0.2608099033604858, "grad_norm": 0.5384507995690179, "learning_rate": 4.1067315490673156e-05, "loss": 0.5757, "step": 8933 }, { "epoch": 0.2608390995883332, "grad_norm": 0.5386748198007831, "learning_rate": 4.106569343065694e-05, "loss": 0.6374, "step": 8934 }, { "epoch": 0.26086829581618054, "grad_norm": 0.5948584364979959, "learning_rate": 4.106407137064072e-05, "loss": 0.7205, "step": 8935 }, { "epoch": 0.2608974920440279, "grad_norm": 0.555002131007709, "learning_rate": 4.1062449310624494e-05, "loss": 0.6718, "step": 8936 }, { "epoch": 0.26092668827187526, "grad_norm": 0.5826055257730323, "learning_rate": 4.1060827250608276e-05, "loss": 0.7109, "step": 8937 }, { "epoch": 0.2609558844997226, "grad_norm": 0.5962681437242442, "learning_rate": 4.105920519059205e-05, "loss": 0.7338, "step": 8938 }, { "epoch": 0.26098508072757, "grad_norm": 0.562075682692579, "learning_rate": 4.105758313057583e-05, "loss": 0.7441, "step": 8939 }, { "epoch": 0.26101427695541735, "grad_norm": 0.5336376025795967, "learning_rate": 4.1055961070559614e-05, "loss": 0.5885, "step": 8940 }, { "epoch": 0.2610434731832647, "grad_norm": 0.5492277898697462, "learning_rate": 4.105433901054339e-05, "loss": 0.6119, "step": 8941 }, { "epoch": 0.26107266941111207, "grad_norm": 0.5471180638773049, "learning_rate": 4.105271695052717e-05, "loss": 0.6233, "step": 8942 }, { "epoch": 0.26110186563895943, "grad_norm": 0.5205097947237307, "learning_rate": 4.1051094890510946e-05, "loss": 0.6657, "step": 8943 }, { "epoch": 0.2611310618668068, "grad_norm": 0.5946279641580529, "learning_rate": 4.104947283049473e-05, "loss": 0.6986, "step": 8944 }, { "epoch": 0.26116025809465415, "grad_norm": 0.5745548656663761, "learning_rate": 4.104785077047851e-05, "loss": 0.6918, "step": 8945 }, { "epoch": 0.2611894543225015, "grad_norm": 0.5595824585569203, "learning_rate": 4.1046228710462285e-05, "loss": 0.619, "step": 8946 }, { "epoch": 0.2612186505503489, "grad_norm": 0.5039132205820834, "learning_rate": 4.1044606650446066e-05, "loss": 0.5814, "step": 8947 }, { "epoch": 0.26124784677819624, "grad_norm": 0.5782978334283293, "learning_rate": 4.104298459042985e-05, "loss": 0.661, "step": 8948 }, { "epoch": 0.2612770430060436, "grad_norm": 0.5476575224981781, "learning_rate": 4.104136253041363e-05, "loss": 0.6672, "step": 8949 }, { "epoch": 0.26130623923389096, "grad_norm": 0.6056079889759299, "learning_rate": 4.103974047039741e-05, "loss": 0.6389, "step": 8950 }, { "epoch": 0.2613354354617383, "grad_norm": 0.5412398264182554, "learning_rate": 4.103811841038119e-05, "loss": 0.6038, "step": 8951 }, { "epoch": 0.2613646316895857, "grad_norm": 0.572242886006531, "learning_rate": 4.103649635036497e-05, "loss": 0.6509, "step": 8952 }, { "epoch": 0.26139382791743304, "grad_norm": 0.5433632824650817, "learning_rate": 4.1034874290348743e-05, "loss": 0.6389, "step": 8953 }, { "epoch": 0.2614230241452804, "grad_norm": 0.5171016024011161, "learning_rate": 4.1033252230332525e-05, "loss": 0.6061, "step": 8954 }, { "epoch": 0.2614522203731278, "grad_norm": 0.5396966074672666, "learning_rate": 4.103163017031631e-05, "loss": 0.5525, "step": 8955 }, { "epoch": 0.2614814166009752, "grad_norm": 0.5322480300769388, "learning_rate": 4.103000811030008e-05, "loss": 0.5969, "step": 8956 }, { "epoch": 0.26151061282882254, "grad_norm": 0.6078781084157406, "learning_rate": 4.1028386050283864e-05, "loss": 0.7286, "step": 8957 }, { "epoch": 0.2615398090566699, "grad_norm": 0.5600211129388379, "learning_rate": 4.102676399026764e-05, "loss": 0.6508, "step": 8958 }, { "epoch": 0.26156900528451726, "grad_norm": 0.5338862467565401, "learning_rate": 4.102514193025142e-05, "loss": 0.6216, "step": 8959 }, { "epoch": 0.2615982015123646, "grad_norm": 0.5709751847925533, "learning_rate": 4.10235198702352e-05, "loss": 0.7048, "step": 8960 }, { "epoch": 0.261627397740212, "grad_norm": 0.558438295427015, "learning_rate": 4.102189781021898e-05, "loss": 0.7011, "step": 8961 }, { "epoch": 0.26165659396805935, "grad_norm": 0.5252831451628437, "learning_rate": 4.102027575020276e-05, "loss": 0.6042, "step": 8962 }, { "epoch": 0.2616857901959067, "grad_norm": 0.5819454928127096, "learning_rate": 4.1018653690186534e-05, "loss": 0.6853, "step": 8963 }, { "epoch": 0.26171498642375407, "grad_norm": 0.5540173008408937, "learning_rate": 4.1017031630170316e-05, "loss": 0.656, "step": 8964 }, { "epoch": 0.26174418265160143, "grad_norm": 0.5172184482889137, "learning_rate": 4.10154095701541e-05, "loss": 0.5779, "step": 8965 }, { "epoch": 0.2617733788794488, "grad_norm": 0.5537780754431483, "learning_rate": 4.101378751013787e-05, "loss": 0.6124, "step": 8966 }, { "epoch": 0.26180257510729615, "grad_norm": 0.5457908416093095, "learning_rate": 4.101216545012166e-05, "loss": 0.6297, "step": 8967 }, { "epoch": 0.2618317713351435, "grad_norm": 0.5332142481273743, "learning_rate": 4.1010543390105436e-05, "loss": 0.5919, "step": 8968 }, { "epoch": 0.2618609675629909, "grad_norm": 0.5384225473195569, "learning_rate": 4.100892133008922e-05, "loss": 0.5832, "step": 8969 }, { "epoch": 0.26189016379083824, "grad_norm": 0.5724086608341279, "learning_rate": 4.1007299270073e-05, "loss": 0.6966, "step": 8970 }, { "epoch": 0.2619193600186856, "grad_norm": 0.5723381112278954, "learning_rate": 4.1005677210056774e-05, "loss": 0.6407, "step": 8971 }, { "epoch": 0.26194855624653296, "grad_norm": 0.5770968366066961, "learning_rate": 4.1004055150040556e-05, "loss": 0.6227, "step": 8972 }, { "epoch": 0.2619777524743803, "grad_norm": 0.5775803277694828, "learning_rate": 4.100243309002433e-05, "loss": 0.757, "step": 8973 }, { "epoch": 0.2620069487022277, "grad_norm": 0.5423584907626319, "learning_rate": 4.100081103000811e-05, "loss": 0.6196, "step": 8974 }, { "epoch": 0.26203614493007504, "grad_norm": 0.554532170720585, "learning_rate": 4.0999188969991895e-05, "loss": 0.6743, "step": 8975 }, { "epoch": 0.2620653411579224, "grad_norm": 0.6160846297727307, "learning_rate": 4.099756690997567e-05, "loss": 0.7063, "step": 8976 }, { "epoch": 0.26209453738576977, "grad_norm": 0.5799416879866364, "learning_rate": 4.099594484995945e-05, "loss": 0.6685, "step": 8977 }, { "epoch": 0.2621237336136171, "grad_norm": 0.5707668744851612, "learning_rate": 4.0994322789943226e-05, "loss": 0.5722, "step": 8978 }, { "epoch": 0.2621529298414645, "grad_norm": 0.6000395463668424, "learning_rate": 4.099270072992701e-05, "loss": 0.6941, "step": 8979 }, { "epoch": 0.26218212606931185, "grad_norm": 0.5689518998754304, "learning_rate": 4.099107866991079e-05, "loss": 0.6413, "step": 8980 }, { "epoch": 0.2622113222971592, "grad_norm": 0.533561012904682, "learning_rate": 4.0989456609894565e-05, "loss": 0.608, "step": 8981 }, { "epoch": 0.26224051852500657, "grad_norm": 0.533536655691861, "learning_rate": 4.098783454987835e-05, "loss": 0.5779, "step": 8982 }, { "epoch": 0.26226971475285393, "grad_norm": 0.5174957106402475, "learning_rate": 4.098621248986212e-05, "loss": 0.5891, "step": 8983 }, { "epoch": 0.2622989109807013, "grad_norm": 0.5756534919797482, "learning_rate": 4.0984590429845903e-05, "loss": 0.6584, "step": 8984 }, { "epoch": 0.26232810720854866, "grad_norm": 0.5062802354097524, "learning_rate": 4.0982968369829685e-05, "loss": 0.5901, "step": 8985 }, { "epoch": 0.262357303436396, "grad_norm": 0.5697749320533224, "learning_rate": 4.098134630981347e-05, "loss": 0.7311, "step": 8986 }, { "epoch": 0.2623864996642434, "grad_norm": 0.5203008485951344, "learning_rate": 4.097972424979725e-05, "loss": 0.6205, "step": 8987 }, { "epoch": 0.26241569589209074, "grad_norm": 0.5778344134465756, "learning_rate": 4.0978102189781024e-05, "loss": 0.6653, "step": 8988 }, { "epoch": 0.2624448921199381, "grad_norm": 0.5705090498390465, "learning_rate": 4.0976480129764806e-05, "loss": 0.5481, "step": 8989 }, { "epoch": 0.26247408834778546, "grad_norm": 0.49689836016852396, "learning_rate": 4.097485806974859e-05, "loss": 0.5288, "step": 8990 }, { "epoch": 0.2625032845756328, "grad_norm": 0.5610104299029215, "learning_rate": 4.097323600973236e-05, "loss": 0.6639, "step": 8991 }, { "epoch": 0.2625324808034802, "grad_norm": 0.5156411098584358, "learning_rate": 4.0971613949716144e-05, "loss": 0.6018, "step": 8992 }, { "epoch": 0.26256167703132754, "grad_norm": 0.57004697948614, "learning_rate": 4.096999188969992e-05, "loss": 0.7203, "step": 8993 }, { "epoch": 0.2625908732591749, "grad_norm": 0.5631477237415095, "learning_rate": 4.09683698296837e-05, "loss": 0.6521, "step": 8994 }, { "epoch": 0.26262006948702227, "grad_norm": 0.5400988849492803, "learning_rate": 4.096674776966748e-05, "loss": 0.6549, "step": 8995 }, { "epoch": 0.26264926571486963, "grad_norm": 0.5662802912020932, "learning_rate": 4.096512570965126e-05, "loss": 0.6901, "step": 8996 }, { "epoch": 0.262678461942717, "grad_norm": 0.5637424366622948, "learning_rate": 4.096350364963504e-05, "loss": 0.668, "step": 8997 }, { "epoch": 0.26270765817056435, "grad_norm": 0.5508939124621776, "learning_rate": 4.0961881589618814e-05, "loss": 0.632, "step": 8998 }, { "epoch": 0.2627368543984117, "grad_norm": 0.5776891340798626, "learning_rate": 4.0960259529602596e-05, "loss": 0.7178, "step": 8999 }, { "epoch": 0.2627660506262591, "grad_norm": 0.5420206382196331, "learning_rate": 4.095863746958638e-05, "loss": 0.6104, "step": 9000 }, { "epoch": 0.26279524685410643, "grad_norm": 0.5829105199484698, "learning_rate": 4.095701540957015e-05, "loss": 0.684, "step": 9001 }, { "epoch": 0.2628244430819538, "grad_norm": 0.5165078167693965, "learning_rate": 4.0955393349553935e-05, "loss": 0.5475, "step": 9002 }, { "epoch": 0.26285363930980116, "grad_norm": 0.5569764434135357, "learning_rate": 4.095377128953771e-05, "loss": 0.6732, "step": 9003 }, { "epoch": 0.2628828355376485, "grad_norm": 0.5421776165261354, "learning_rate": 4.09521492295215e-05, "loss": 0.6242, "step": 9004 }, { "epoch": 0.2629120317654959, "grad_norm": 0.48473679374643086, "learning_rate": 4.095052716950527e-05, "loss": 0.5502, "step": 9005 }, { "epoch": 0.26294122799334324, "grad_norm": 0.5380097538268065, "learning_rate": 4.0948905109489055e-05, "loss": 0.5993, "step": 9006 }, { "epoch": 0.2629704242211906, "grad_norm": 0.5330082803453643, "learning_rate": 4.0947283049472837e-05, "loss": 0.6036, "step": 9007 }, { "epoch": 0.26299962044903796, "grad_norm": 0.5443714772180361, "learning_rate": 4.094566098945661e-05, "loss": 0.5571, "step": 9008 }, { "epoch": 0.2630288166768853, "grad_norm": 0.5702546251688991, "learning_rate": 4.094403892944039e-05, "loss": 0.6691, "step": 9009 }, { "epoch": 0.2630580129047327, "grad_norm": 0.5143720136443702, "learning_rate": 4.094241686942417e-05, "loss": 0.5538, "step": 9010 }, { "epoch": 0.26308720913258005, "grad_norm": 0.5838253966220626, "learning_rate": 4.094079480940795e-05, "loss": 0.6257, "step": 9011 }, { "epoch": 0.2631164053604274, "grad_norm": 0.5463453896390258, "learning_rate": 4.093917274939173e-05, "loss": 0.6531, "step": 9012 }, { "epoch": 0.26314560158827477, "grad_norm": 0.49353956765365525, "learning_rate": 4.093755068937551e-05, "loss": 0.5385, "step": 9013 }, { "epoch": 0.26317479781612213, "grad_norm": 0.5289324571247183, "learning_rate": 4.093592862935929e-05, "loss": 0.553, "step": 9014 }, { "epoch": 0.2632039940439695, "grad_norm": 0.5385102422135474, "learning_rate": 4.093430656934307e-05, "loss": 0.579, "step": 9015 }, { "epoch": 0.2632331902718169, "grad_norm": 0.5630827640774314, "learning_rate": 4.0932684509326845e-05, "loss": 0.6649, "step": 9016 }, { "epoch": 0.26326238649966427, "grad_norm": 0.5910053386751587, "learning_rate": 4.093106244931063e-05, "loss": 0.7306, "step": 9017 }, { "epoch": 0.26329158272751163, "grad_norm": 0.5724924476405129, "learning_rate": 4.09294403892944e-05, "loss": 0.6713, "step": 9018 }, { "epoch": 0.263320778955359, "grad_norm": 0.5979001490021805, "learning_rate": 4.0927818329278184e-05, "loss": 0.7449, "step": 9019 }, { "epoch": 0.26334997518320635, "grad_norm": 0.5969347326223208, "learning_rate": 4.0926196269261966e-05, "loss": 0.7557, "step": 9020 }, { "epoch": 0.2633791714110537, "grad_norm": 0.5616015500356525, "learning_rate": 4.092457420924574e-05, "loss": 0.6671, "step": 9021 }, { "epoch": 0.2634083676389011, "grad_norm": 0.6232163043259746, "learning_rate": 4.092295214922952e-05, "loss": 0.6918, "step": 9022 }, { "epoch": 0.26343756386674844, "grad_norm": 0.583719288473198, "learning_rate": 4.0921330089213304e-05, "loss": 0.6928, "step": 9023 }, { "epoch": 0.2634667600945958, "grad_norm": 0.534170101031562, "learning_rate": 4.0919708029197086e-05, "loss": 0.5688, "step": 9024 }, { "epoch": 0.26349595632244316, "grad_norm": 0.5509244458617488, "learning_rate": 4.091808596918086e-05, "loss": 0.6355, "step": 9025 }, { "epoch": 0.2635251525502905, "grad_norm": 0.5453486598697473, "learning_rate": 4.091646390916464e-05, "loss": 0.6342, "step": 9026 }, { "epoch": 0.2635543487781379, "grad_norm": 0.5857603282166817, "learning_rate": 4.0914841849148424e-05, "loss": 0.7192, "step": 9027 }, { "epoch": 0.26358354500598524, "grad_norm": 0.5548144851572034, "learning_rate": 4.09132197891322e-05, "loss": 0.5824, "step": 9028 }, { "epoch": 0.2636127412338326, "grad_norm": 0.5091513876934622, "learning_rate": 4.091159772911598e-05, "loss": 0.5551, "step": 9029 }, { "epoch": 0.26364193746167996, "grad_norm": 0.5417846441750089, "learning_rate": 4.0909975669099756e-05, "loss": 0.5899, "step": 9030 }, { "epoch": 0.2636711336895273, "grad_norm": 0.6093783226557706, "learning_rate": 4.090835360908354e-05, "loss": 0.7724, "step": 9031 }, { "epoch": 0.2637003299173747, "grad_norm": 0.515504494867416, "learning_rate": 4.090673154906732e-05, "loss": 0.551, "step": 9032 }, { "epoch": 0.26372952614522205, "grad_norm": 0.5386210830566632, "learning_rate": 4.0905109489051095e-05, "loss": 0.6532, "step": 9033 }, { "epoch": 0.2637587223730694, "grad_norm": 0.5608945558144436, "learning_rate": 4.0903487429034876e-05, "loss": 0.6579, "step": 9034 }, { "epoch": 0.26378791860091677, "grad_norm": 0.5537130602804472, "learning_rate": 4.090186536901866e-05, "loss": 0.6029, "step": 9035 }, { "epoch": 0.26381711482876413, "grad_norm": 0.5380085662212245, "learning_rate": 4.090024330900243e-05, "loss": 0.5793, "step": 9036 }, { "epoch": 0.2638463110566115, "grad_norm": 0.5718343844725702, "learning_rate": 4.0898621248986215e-05, "loss": 0.6438, "step": 9037 }, { "epoch": 0.26387550728445885, "grad_norm": 0.5626532093737604, "learning_rate": 4.089699918896999e-05, "loss": 0.6292, "step": 9038 }, { "epoch": 0.2639047035123062, "grad_norm": 0.580589691762909, "learning_rate": 4.089537712895377e-05, "loss": 0.6713, "step": 9039 }, { "epoch": 0.2639338997401536, "grad_norm": 0.5572892087312893, "learning_rate": 4.0893755068937553e-05, "loss": 0.6582, "step": 9040 }, { "epoch": 0.26396309596800094, "grad_norm": 0.5401617860276922, "learning_rate": 4.089213300892133e-05, "loss": 0.6018, "step": 9041 }, { "epoch": 0.2639922921958483, "grad_norm": 0.528359831468166, "learning_rate": 4.089051094890512e-05, "loss": 0.5844, "step": 9042 }, { "epoch": 0.26402148842369566, "grad_norm": 0.6243026149471189, "learning_rate": 4.088888888888889e-05, "loss": 0.6786, "step": 9043 }, { "epoch": 0.264050684651543, "grad_norm": 0.5022987358674978, "learning_rate": 4.0887266828872674e-05, "loss": 0.5638, "step": 9044 }, { "epoch": 0.2640798808793904, "grad_norm": 0.512644823106271, "learning_rate": 4.088564476885645e-05, "loss": 0.5679, "step": 9045 }, { "epoch": 0.26410907710723774, "grad_norm": 0.5485879645311765, "learning_rate": 4.088402270884023e-05, "loss": 0.6082, "step": 9046 }, { "epoch": 0.2641382733350851, "grad_norm": 0.5548086759066785, "learning_rate": 4.088240064882401e-05, "loss": 0.6001, "step": 9047 }, { "epoch": 0.26416746956293247, "grad_norm": 0.5733430987790891, "learning_rate": 4.088077858880779e-05, "loss": 0.6269, "step": 9048 }, { "epoch": 0.2641966657907798, "grad_norm": 0.5550831059720214, "learning_rate": 4.087915652879157e-05, "loss": 0.6029, "step": 9049 }, { "epoch": 0.2642258620186272, "grad_norm": 0.6360846277989672, "learning_rate": 4.0877534468775344e-05, "loss": 0.6581, "step": 9050 }, { "epoch": 0.26425505824647455, "grad_norm": 0.6606394003774989, "learning_rate": 4.0875912408759126e-05, "loss": 0.6749, "step": 9051 }, { "epoch": 0.2642842544743219, "grad_norm": 0.6147606517725845, "learning_rate": 4.087429034874291e-05, "loss": 0.7319, "step": 9052 }, { "epoch": 0.2643134507021693, "grad_norm": 0.5584099558087082, "learning_rate": 4.087266828872668e-05, "loss": 0.6783, "step": 9053 }, { "epoch": 0.26434264693001663, "grad_norm": 0.580236543907631, "learning_rate": 4.0871046228710464e-05, "loss": 0.6879, "step": 9054 }, { "epoch": 0.264371843157864, "grad_norm": 0.5173291708010408, "learning_rate": 4.086942416869424e-05, "loss": 0.5723, "step": 9055 }, { "epoch": 0.26440103938571136, "grad_norm": 0.567000188077564, "learning_rate": 4.086780210867802e-05, "loss": 0.659, "step": 9056 }, { "epoch": 0.2644302356135587, "grad_norm": 0.5374724636790155, "learning_rate": 4.08661800486618e-05, "loss": 0.6265, "step": 9057 }, { "epoch": 0.2644594318414061, "grad_norm": 0.4940966829064472, "learning_rate": 4.086455798864558e-05, "loss": 0.5564, "step": 9058 }, { "epoch": 0.26448862806925344, "grad_norm": 0.5074831480035116, "learning_rate": 4.086293592862936e-05, "loss": 0.5785, "step": 9059 }, { "epoch": 0.2645178242971008, "grad_norm": 0.5835300122165512, "learning_rate": 4.086131386861314e-05, "loss": 0.6872, "step": 9060 }, { "epoch": 0.26454702052494816, "grad_norm": 0.5477281927249669, "learning_rate": 4.085969180859692e-05, "loss": 0.6747, "step": 9061 }, { "epoch": 0.2645762167527955, "grad_norm": 0.5656285304512212, "learning_rate": 4.0858069748580705e-05, "loss": 0.7005, "step": 9062 }, { "epoch": 0.2646054129806429, "grad_norm": 0.5612119198311011, "learning_rate": 4.085644768856448e-05, "loss": 0.6462, "step": 9063 }, { "epoch": 0.26463460920849025, "grad_norm": 0.5931115622084288, "learning_rate": 4.085482562854826e-05, "loss": 0.6977, "step": 9064 }, { "epoch": 0.2646638054363376, "grad_norm": 0.7468861732724431, "learning_rate": 4.0853203568532036e-05, "loss": 0.7161, "step": 9065 }, { "epoch": 0.26469300166418497, "grad_norm": 0.5423685451234366, "learning_rate": 4.085158150851582e-05, "loss": 0.6591, "step": 9066 }, { "epoch": 0.26472219789203233, "grad_norm": 0.5624794361392156, "learning_rate": 4.08499594484996e-05, "loss": 0.6633, "step": 9067 }, { "epoch": 0.2647513941198797, "grad_norm": 0.571907662969384, "learning_rate": 4.0848337388483375e-05, "loss": 0.731, "step": 9068 }, { "epoch": 0.26478059034772705, "grad_norm": 0.5571058341117658, "learning_rate": 4.084671532846716e-05, "loss": 0.6361, "step": 9069 }, { "epoch": 0.2648097865755744, "grad_norm": 0.5728813509293408, "learning_rate": 4.084509326845093e-05, "loss": 0.6165, "step": 9070 }, { "epoch": 0.2648389828034218, "grad_norm": 0.5525199929398213, "learning_rate": 4.0843471208434714e-05, "loss": 0.6451, "step": 9071 }, { "epoch": 0.26486817903126914, "grad_norm": 0.5527454707186424, "learning_rate": 4.0841849148418495e-05, "loss": 0.6566, "step": 9072 }, { "epoch": 0.2648973752591165, "grad_norm": 0.5299337123043383, "learning_rate": 4.084022708840227e-05, "loss": 0.5818, "step": 9073 }, { "epoch": 0.26492657148696386, "grad_norm": 0.6217807230367223, "learning_rate": 4.083860502838605e-05, "loss": 0.6193, "step": 9074 }, { "epoch": 0.2649557677148112, "grad_norm": 0.6387321403032731, "learning_rate": 4.083698296836983e-05, "loss": 0.7191, "step": 9075 }, { "epoch": 0.26498496394265864, "grad_norm": 0.5591564539002541, "learning_rate": 4.083536090835361e-05, "loss": 0.6664, "step": 9076 }, { "epoch": 0.265014160170506, "grad_norm": 0.5635800118160654, "learning_rate": 4.083373884833739e-05, "loss": 0.7085, "step": 9077 }, { "epoch": 0.26504335639835336, "grad_norm": 0.5729498698727159, "learning_rate": 4.0832116788321166e-05, "loss": 0.6917, "step": 9078 }, { "epoch": 0.2650725526262007, "grad_norm": 0.6132169629970702, "learning_rate": 4.083049472830495e-05, "loss": 0.792, "step": 9079 }, { "epoch": 0.2651017488540481, "grad_norm": 0.5417192815211301, "learning_rate": 4.082887266828873e-05, "loss": 0.6543, "step": 9080 }, { "epoch": 0.26513094508189544, "grad_norm": 0.5464776203365092, "learning_rate": 4.082725060827251e-05, "loss": 0.6427, "step": 9081 }, { "epoch": 0.2651601413097428, "grad_norm": 0.5737668312319878, "learning_rate": 4.082562854825629e-05, "loss": 0.6706, "step": 9082 }, { "epoch": 0.26518933753759016, "grad_norm": 0.5227760460473219, "learning_rate": 4.082400648824007e-05, "loss": 0.6034, "step": 9083 }, { "epoch": 0.2652185337654375, "grad_norm": 0.5352835027265096, "learning_rate": 4.082238442822385e-05, "loss": 0.5933, "step": 9084 }, { "epoch": 0.2652477299932849, "grad_norm": 0.5200927402933048, "learning_rate": 4.0820762368207624e-05, "loss": 0.5713, "step": 9085 }, { "epoch": 0.26527692622113225, "grad_norm": 0.5274769654006439, "learning_rate": 4.0819140308191406e-05, "loss": 0.5753, "step": 9086 }, { "epoch": 0.2653061224489796, "grad_norm": 0.5159764693013953, "learning_rate": 4.081751824817519e-05, "loss": 0.5891, "step": 9087 }, { "epoch": 0.26533531867682697, "grad_norm": 0.5632184024958583, "learning_rate": 4.081589618815896e-05, "loss": 0.6646, "step": 9088 }, { "epoch": 0.26536451490467433, "grad_norm": 0.5447083380844849, "learning_rate": 4.0814274128142745e-05, "loss": 0.6197, "step": 9089 }, { "epoch": 0.2653937111325217, "grad_norm": 0.5695600917120298, "learning_rate": 4.081265206812652e-05, "loss": 0.653, "step": 9090 }, { "epoch": 0.26542290736036905, "grad_norm": 0.5767535519138843, "learning_rate": 4.08110300081103e-05, "loss": 0.7018, "step": 9091 }, { "epoch": 0.2654521035882164, "grad_norm": 0.5728910712427099, "learning_rate": 4.080940794809408e-05, "loss": 0.6203, "step": 9092 }, { "epoch": 0.2654812998160638, "grad_norm": 0.5995052258218483, "learning_rate": 4.080778588807786e-05, "loss": 0.6856, "step": 9093 }, { "epoch": 0.26551049604391114, "grad_norm": 0.5546084621691068, "learning_rate": 4.080616382806164e-05, "loss": 0.6966, "step": 9094 }, { "epoch": 0.2655396922717585, "grad_norm": 0.5712063577205259, "learning_rate": 4.0804541768045415e-05, "loss": 0.6029, "step": 9095 }, { "epoch": 0.26556888849960586, "grad_norm": 0.5670364750324061, "learning_rate": 4.0802919708029197e-05, "loss": 0.7132, "step": 9096 }, { "epoch": 0.2655980847274532, "grad_norm": 0.5114630199230209, "learning_rate": 4.080129764801298e-05, "loss": 0.6039, "step": 9097 }, { "epoch": 0.2656272809553006, "grad_norm": 0.5971171515596848, "learning_rate": 4.079967558799675e-05, "loss": 0.7826, "step": 9098 }, { "epoch": 0.26565647718314794, "grad_norm": 0.5976239934579217, "learning_rate": 4.079805352798054e-05, "loss": 0.7157, "step": 9099 }, { "epoch": 0.2656856734109953, "grad_norm": 0.5343366591842408, "learning_rate": 4.079643146796432e-05, "loss": 0.6098, "step": 9100 }, { "epoch": 0.26571486963884267, "grad_norm": 0.5940806485657674, "learning_rate": 4.07948094079481e-05, "loss": 0.6733, "step": 9101 }, { "epoch": 0.26574406586669, "grad_norm": 0.570840789974244, "learning_rate": 4.079318734793188e-05, "loss": 0.5802, "step": 9102 }, { "epoch": 0.2657732620945374, "grad_norm": 0.5185916929008278, "learning_rate": 4.0791565287915655e-05, "loss": 0.5982, "step": 9103 }, { "epoch": 0.26580245832238475, "grad_norm": 0.5668904300534795, "learning_rate": 4.078994322789944e-05, "loss": 0.6426, "step": 9104 }, { "epoch": 0.2658316545502321, "grad_norm": 0.5952697359719663, "learning_rate": 4.078832116788321e-05, "loss": 0.751, "step": 9105 }, { "epoch": 0.26586085077807947, "grad_norm": 0.5393154157384749, "learning_rate": 4.0786699107866994e-05, "loss": 0.6101, "step": 9106 }, { "epoch": 0.26589004700592683, "grad_norm": 0.5615781761709272, "learning_rate": 4.0785077047850776e-05, "loss": 0.6336, "step": 9107 }, { "epoch": 0.2659192432337742, "grad_norm": 0.6038495111121537, "learning_rate": 4.078345498783455e-05, "loss": 0.6708, "step": 9108 }, { "epoch": 0.26594843946162156, "grad_norm": 0.553449161839745, "learning_rate": 4.078183292781833e-05, "loss": 0.5974, "step": 9109 }, { "epoch": 0.2659776356894689, "grad_norm": 0.5732242704485574, "learning_rate": 4.078021086780211e-05, "loss": 0.6899, "step": 9110 }, { "epoch": 0.2660068319173163, "grad_norm": 0.5232278019992591, "learning_rate": 4.077858880778589e-05, "loss": 0.559, "step": 9111 }, { "epoch": 0.26603602814516364, "grad_norm": 0.5477423423778536, "learning_rate": 4.077696674776967e-05, "loss": 0.6487, "step": 9112 }, { "epoch": 0.266065224373011, "grad_norm": 0.5198694243889272, "learning_rate": 4.0775344687753446e-05, "loss": 0.6165, "step": 9113 }, { "epoch": 0.26609442060085836, "grad_norm": 0.5391647331912179, "learning_rate": 4.077372262773723e-05, "loss": 0.6766, "step": 9114 }, { "epoch": 0.2661236168287057, "grad_norm": 0.6206026757610292, "learning_rate": 4.0772100567721e-05, "loss": 0.6891, "step": 9115 }, { "epoch": 0.2661528130565531, "grad_norm": 0.5436584783597528, "learning_rate": 4.0770478507704784e-05, "loss": 0.6253, "step": 9116 }, { "epoch": 0.26618200928440044, "grad_norm": 0.5785855552742892, "learning_rate": 4.0768856447688566e-05, "loss": 0.6139, "step": 9117 }, { "epoch": 0.2662112055122478, "grad_norm": 0.5905321428763415, "learning_rate": 4.076723438767235e-05, "loss": 0.7081, "step": 9118 }, { "epoch": 0.26624040174009517, "grad_norm": 0.6366520115342462, "learning_rate": 4.076561232765613e-05, "loss": 0.6358, "step": 9119 }, { "epoch": 0.26626959796794253, "grad_norm": 0.5741172558043731, "learning_rate": 4.0763990267639905e-05, "loss": 0.6392, "step": 9120 }, { "epoch": 0.2662987941957899, "grad_norm": 0.5702430283494812, "learning_rate": 4.0762368207623686e-05, "loss": 0.6889, "step": 9121 }, { "epoch": 0.26632799042363725, "grad_norm": 0.5444579317056708, "learning_rate": 4.076074614760747e-05, "loss": 0.6396, "step": 9122 }, { "epoch": 0.2663571866514846, "grad_norm": 0.5417304338563198, "learning_rate": 4.075912408759124e-05, "loss": 0.6522, "step": 9123 }, { "epoch": 0.266386382879332, "grad_norm": 0.5830484513279365, "learning_rate": 4.0757502027575025e-05, "loss": 0.6262, "step": 9124 }, { "epoch": 0.26641557910717933, "grad_norm": 0.5833889428126681, "learning_rate": 4.07558799675588e-05, "loss": 0.7397, "step": 9125 }, { "epoch": 0.2664447753350267, "grad_norm": 0.5851250899144335, "learning_rate": 4.075425790754258e-05, "loss": 0.6313, "step": 9126 }, { "epoch": 0.26647397156287406, "grad_norm": 0.5575307193564036, "learning_rate": 4.0752635847526363e-05, "loss": 0.6033, "step": 9127 }, { "epoch": 0.2665031677907214, "grad_norm": 0.5505668638769575, "learning_rate": 4.075101378751014e-05, "loss": 0.5938, "step": 9128 }, { "epoch": 0.2665323640185688, "grad_norm": 0.5221467686280925, "learning_rate": 4.074939172749392e-05, "loss": 0.5922, "step": 9129 }, { "epoch": 0.26656156024641614, "grad_norm": 0.6160452793974002, "learning_rate": 4.0747769667477695e-05, "loss": 0.6502, "step": 9130 }, { "epoch": 0.2665907564742635, "grad_norm": 0.5316695155673233, "learning_rate": 4.074614760746148e-05, "loss": 0.6229, "step": 9131 }, { "epoch": 0.26661995270211086, "grad_norm": 0.5522211030880556, "learning_rate": 4.074452554744526e-05, "loss": 0.6824, "step": 9132 }, { "epoch": 0.2666491489299582, "grad_norm": 0.6050832086607264, "learning_rate": 4.0742903487429034e-05, "loss": 0.6352, "step": 9133 }, { "epoch": 0.2666783451578056, "grad_norm": 0.5318279251441993, "learning_rate": 4.0741281427412815e-05, "loss": 0.6292, "step": 9134 }, { "epoch": 0.26670754138565295, "grad_norm": 0.5422306974583283, "learning_rate": 4.073965936739659e-05, "loss": 0.6394, "step": 9135 }, { "epoch": 0.26673673761350036, "grad_norm": 0.5644467155922691, "learning_rate": 4.073803730738037e-05, "loss": 0.6345, "step": 9136 }, { "epoch": 0.2667659338413477, "grad_norm": 0.566856454994166, "learning_rate": 4.0736415247364154e-05, "loss": 0.6613, "step": 9137 }, { "epoch": 0.2667951300691951, "grad_norm": 0.5142928321343749, "learning_rate": 4.0734793187347936e-05, "loss": 0.608, "step": 9138 }, { "epoch": 0.26682432629704245, "grad_norm": 0.563587537218091, "learning_rate": 4.073317112733172e-05, "loss": 0.6861, "step": 9139 }, { "epoch": 0.2668535225248898, "grad_norm": 0.5831047208809362, "learning_rate": 4.073154906731549e-05, "loss": 0.6324, "step": 9140 }, { "epoch": 0.26688271875273717, "grad_norm": 0.5785337727950407, "learning_rate": 4.0729927007299274e-05, "loss": 0.647, "step": 9141 }, { "epoch": 0.26691191498058453, "grad_norm": 0.547043439699129, "learning_rate": 4.072830494728305e-05, "loss": 0.6604, "step": 9142 }, { "epoch": 0.2669411112084319, "grad_norm": 0.567225365113394, "learning_rate": 4.072668288726683e-05, "loss": 0.647, "step": 9143 }, { "epoch": 0.26697030743627925, "grad_norm": 0.5739831022965086, "learning_rate": 4.072506082725061e-05, "loss": 0.6752, "step": 9144 }, { "epoch": 0.2669995036641266, "grad_norm": 0.55342639438965, "learning_rate": 4.072343876723439e-05, "loss": 0.6567, "step": 9145 }, { "epoch": 0.267028699891974, "grad_norm": 0.5548849897686546, "learning_rate": 4.072181670721817e-05, "loss": 0.6538, "step": 9146 }, { "epoch": 0.26705789611982134, "grad_norm": 0.5138861349409662, "learning_rate": 4.072019464720195e-05, "loss": 0.6106, "step": 9147 }, { "epoch": 0.2670870923476687, "grad_norm": 0.5518444625748631, "learning_rate": 4.0718572587185726e-05, "loss": 0.6491, "step": 9148 }, { "epoch": 0.26711628857551606, "grad_norm": 0.5133635907717851, "learning_rate": 4.071695052716951e-05, "loss": 0.5805, "step": 9149 }, { "epoch": 0.2671454848033634, "grad_norm": 0.5463425928161343, "learning_rate": 4.071532846715328e-05, "loss": 0.6594, "step": 9150 }, { "epoch": 0.2671746810312108, "grad_norm": 0.7689497738909321, "learning_rate": 4.0713706407137065e-05, "loss": 0.6444, "step": 9151 }, { "epoch": 0.26720387725905814, "grad_norm": 0.5952287823453624, "learning_rate": 4.0712084347120846e-05, "loss": 0.73, "step": 9152 }, { "epoch": 0.2672330734869055, "grad_norm": 0.5835261576413265, "learning_rate": 4.071046228710462e-05, "loss": 0.6852, "step": 9153 }, { "epoch": 0.26726226971475286, "grad_norm": 0.5535880830114289, "learning_rate": 4.07088402270884e-05, "loss": 0.6616, "step": 9154 }, { "epoch": 0.2672914659426002, "grad_norm": 0.5682305923773876, "learning_rate": 4.0707218167072185e-05, "loss": 0.7394, "step": 9155 }, { "epoch": 0.2673206621704476, "grad_norm": 0.582608377407994, "learning_rate": 4.070559610705597e-05, "loss": 0.7357, "step": 9156 }, { "epoch": 0.26734985839829495, "grad_norm": 0.5222835350182297, "learning_rate": 4.070397404703974e-05, "loss": 0.5578, "step": 9157 }, { "epoch": 0.2673790546261423, "grad_norm": 0.5489978413994806, "learning_rate": 4.0702351987023524e-05, "loss": 0.6466, "step": 9158 }, { "epoch": 0.26740825085398967, "grad_norm": 0.5949151476927392, "learning_rate": 4.0700729927007305e-05, "loss": 0.7024, "step": 9159 }, { "epoch": 0.26743744708183703, "grad_norm": 0.5807909849644399, "learning_rate": 4.069910786699108e-05, "loss": 0.7055, "step": 9160 }, { "epoch": 0.2674666433096844, "grad_norm": 0.5060938622506332, "learning_rate": 4.069748580697486e-05, "loss": 0.5777, "step": 9161 }, { "epoch": 0.26749583953753175, "grad_norm": 0.5767745133281349, "learning_rate": 4.069586374695864e-05, "loss": 0.6041, "step": 9162 }, { "epoch": 0.2675250357653791, "grad_norm": 0.5792762824871991, "learning_rate": 4.069424168694242e-05, "loss": 0.6618, "step": 9163 }, { "epoch": 0.2675542319932265, "grad_norm": 0.5321014721355071, "learning_rate": 4.06926196269262e-05, "loss": 0.622, "step": 9164 }, { "epoch": 0.26758342822107384, "grad_norm": 0.5790106712295316, "learning_rate": 4.0690997566909976e-05, "loss": 0.6836, "step": 9165 }, { "epoch": 0.2676126244489212, "grad_norm": 0.5899694334512677, "learning_rate": 4.068937550689376e-05, "loss": 0.6914, "step": 9166 }, { "epoch": 0.26764182067676856, "grad_norm": 0.5498871024207469, "learning_rate": 4.068775344687753e-05, "loss": 0.6607, "step": 9167 }, { "epoch": 0.2676710169046159, "grad_norm": 0.6002819726236149, "learning_rate": 4.0686131386861314e-05, "loss": 0.7075, "step": 9168 }, { "epoch": 0.2677002131324633, "grad_norm": 0.4962586297376378, "learning_rate": 4.0684509326845096e-05, "loss": 0.5518, "step": 9169 }, { "epoch": 0.26772940936031064, "grad_norm": 0.5691777614776248, "learning_rate": 4.068288726682887e-05, "loss": 0.6229, "step": 9170 }, { "epoch": 0.267758605588158, "grad_norm": 0.5735105855668533, "learning_rate": 4.068126520681265e-05, "loss": 0.6897, "step": 9171 }, { "epoch": 0.26778780181600537, "grad_norm": 0.5727685599007785, "learning_rate": 4.0679643146796434e-05, "loss": 0.6188, "step": 9172 }, { "epoch": 0.2678169980438527, "grad_norm": 0.5719296750211942, "learning_rate": 4.067802108678021e-05, "loss": 0.6724, "step": 9173 }, { "epoch": 0.2678461942717001, "grad_norm": 0.5706465981110379, "learning_rate": 4.0676399026764e-05, "loss": 0.684, "step": 9174 }, { "epoch": 0.26787539049954745, "grad_norm": 0.569657770256423, "learning_rate": 4.067477696674777e-05, "loss": 0.7074, "step": 9175 }, { "epoch": 0.2679045867273948, "grad_norm": 0.5867351394507395, "learning_rate": 4.0673154906731555e-05, "loss": 0.6457, "step": 9176 }, { "epoch": 0.2679337829552422, "grad_norm": 0.5630632797155088, "learning_rate": 4.067153284671533e-05, "loss": 0.6916, "step": 9177 }, { "epoch": 0.26796297918308953, "grad_norm": 0.5167786747220624, "learning_rate": 4.066991078669911e-05, "loss": 0.5493, "step": 9178 }, { "epoch": 0.2679921754109369, "grad_norm": 0.5925994451386354, "learning_rate": 4.066828872668289e-05, "loss": 0.7611, "step": 9179 }, { "epoch": 0.26802137163878426, "grad_norm": 0.5461343245167769, "learning_rate": 4.066666666666667e-05, "loss": 0.5523, "step": 9180 }, { "epoch": 0.2680505678666316, "grad_norm": 0.5280647657464241, "learning_rate": 4.066504460665045e-05, "loss": 0.6121, "step": 9181 }, { "epoch": 0.268079764094479, "grad_norm": 0.5494314412484185, "learning_rate": 4.0663422546634225e-05, "loss": 0.6227, "step": 9182 }, { "epoch": 0.26810896032232634, "grad_norm": 0.5709123172841342, "learning_rate": 4.0661800486618007e-05, "loss": 0.6914, "step": 9183 }, { "epoch": 0.2681381565501737, "grad_norm": 0.5290211612298243, "learning_rate": 4.066017842660179e-05, "loss": 0.5731, "step": 9184 }, { "epoch": 0.26816735277802106, "grad_norm": 0.5627713817356419, "learning_rate": 4.065855636658556e-05, "loss": 0.628, "step": 9185 }, { "epoch": 0.2681965490058684, "grad_norm": 0.5213540866180649, "learning_rate": 4.0656934306569345e-05, "loss": 0.5734, "step": 9186 }, { "epoch": 0.2682257452337158, "grad_norm": 0.5141394521714779, "learning_rate": 4.065531224655312e-05, "loss": 0.5537, "step": 9187 }, { "epoch": 0.26825494146156315, "grad_norm": 0.6065381328127639, "learning_rate": 4.06536901865369e-05, "loss": 0.7041, "step": 9188 }, { "epoch": 0.2682841376894105, "grad_norm": 0.5576252261369562, "learning_rate": 4.0652068126520684e-05, "loss": 0.6134, "step": 9189 }, { "epoch": 0.26831333391725787, "grad_norm": 0.6674943770603592, "learning_rate": 4.065044606650446e-05, "loss": 0.7664, "step": 9190 }, { "epoch": 0.26834253014510523, "grad_norm": 0.5989142772120889, "learning_rate": 4.064882400648824e-05, "loss": 0.7085, "step": 9191 }, { "epoch": 0.2683717263729526, "grad_norm": 0.5386596659428982, "learning_rate": 4.064720194647202e-05, "loss": 0.6275, "step": 9192 }, { "epoch": 0.26840092260079995, "grad_norm": 0.5500147933673232, "learning_rate": 4.0645579886455804e-05, "loss": 0.6318, "step": 9193 }, { "epoch": 0.2684301188286473, "grad_norm": 0.5188793790014332, "learning_rate": 4.0643957826439586e-05, "loss": 0.6303, "step": 9194 }, { "epoch": 0.2684593150564947, "grad_norm": 0.5644230046277879, "learning_rate": 4.064233576642336e-05, "loss": 0.6237, "step": 9195 }, { "epoch": 0.2684885112843421, "grad_norm": 0.5745362346157321, "learning_rate": 4.064071370640714e-05, "loss": 0.7017, "step": 9196 }, { "epoch": 0.26851770751218945, "grad_norm": 0.5702227419963608, "learning_rate": 4.063909164639092e-05, "loss": 0.5879, "step": 9197 }, { "epoch": 0.2685469037400368, "grad_norm": 0.5745983333626223, "learning_rate": 4.06374695863747e-05, "loss": 0.6472, "step": 9198 }, { "epoch": 0.2685760999678842, "grad_norm": 0.5098286763324437, "learning_rate": 4.063584752635848e-05, "loss": 0.6026, "step": 9199 }, { "epoch": 0.26860529619573154, "grad_norm": 0.5536414705280778, "learning_rate": 4.0634225466342256e-05, "loss": 0.6634, "step": 9200 }, { "epoch": 0.2686344924235789, "grad_norm": 0.5666005369871143, "learning_rate": 4.063260340632604e-05, "loss": 0.7091, "step": 9201 }, { "epoch": 0.26866368865142626, "grad_norm": 0.5364522115225744, "learning_rate": 4.063098134630981e-05, "loss": 0.5447, "step": 9202 }, { "epoch": 0.2686928848792736, "grad_norm": 0.5780369676117437, "learning_rate": 4.0629359286293594e-05, "loss": 0.6938, "step": 9203 }, { "epoch": 0.268722081107121, "grad_norm": 0.5684470910884899, "learning_rate": 4.0627737226277376e-05, "loss": 0.662, "step": 9204 }, { "epoch": 0.26875127733496834, "grad_norm": 0.534474077863263, "learning_rate": 4.062611516626115e-05, "loss": 0.5815, "step": 9205 }, { "epoch": 0.2687804735628157, "grad_norm": 0.5728699607846526, "learning_rate": 4.062449310624493e-05, "loss": 0.7158, "step": 9206 }, { "epoch": 0.26880966979066306, "grad_norm": 0.5244462370066204, "learning_rate": 4.062287104622871e-05, "loss": 0.5956, "step": 9207 }, { "epoch": 0.2688388660185104, "grad_norm": 0.5157914582578881, "learning_rate": 4.062124898621249e-05, "loss": 0.5629, "step": 9208 }, { "epoch": 0.2688680622463578, "grad_norm": 0.5328643952322798, "learning_rate": 4.061962692619627e-05, "loss": 0.5709, "step": 9209 }, { "epoch": 0.26889725847420515, "grad_norm": 0.5416130909134717, "learning_rate": 4.0618004866180046e-05, "loss": 0.6498, "step": 9210 }, { "epoch": 0.2689264547020525, "grad_norm": 0.5378632574468207, "learning_rate": 4.061638280616383e-05, "loss": 0.6139, "step": 9211 }, { "epoch": 0.26895565092989987, "grad_norm": 0.5887569577617788, "learning_rate": 4.061476074614761e-05, "loss": 0.7472, "step": 9212 }, { "epoch": 0.26898484715774723, "grad_norm": 0.5738546909548689, "learning_rate": 4.061313868613139e-05, "loss": 0.7175, "step": 9213 }, { "epoch": 0.2690140433855946, "grad_norm": 0.6117805741514771, "learning_rate": 4.0611516626115173e-05, "loss": 0.6868, "step": 9214 }, { "epoch": 0.26904323961344195, "grad_norm": 0.5263021660543832, "learning_rate": 4.060989456609895e-05, "loss": 0.6058, "step": 9215 }, { "epoch": 0.2690724358412893, "grad_norm": 0.5699844121300719, "learning_rate": 4.060827250608273e-05, "loss": 0.6546, "step": 9216 }, { "epoch": 0.2691016320691367, "grad_norm": 0.6205970681398997, "learning_rate": 4.0606650446066505e-05, "loss": 0.7342, "step": 9217 }, { "epoch": 0.26913082829698404, "grad_norm": 0.5894713023935492, "learning_rate": 4.060502838605029e-05, "loss": 0.6236, "step": 9218 }, { "epoch": 0.2691600245248314, "grad_norm": 0.5441310931561688, "learning_rate": 4.060340632603407e-05, "loss": 0.6999, "step": 9219 }, { "epoch": 0.26918922075267876, "grad_norm": 0.5119098740877954, "learning_rate": 4.0601784266017844e-05, "loss": 0.6191, "step": 9220 }, { "epoch": 0.2692184169805261, "grad_norm": 0.6317322547499491, "learning_rate": 4.0600162206001625e-05, "loss": 0.6287, "step": 9221 }, { "epoch": 0.2692476132083735, "grad_norm": 0.5380920242910223, "learning_rate": 4.05985401459854e-05, "loss": 0.6406, "step": 9222 }, { "epoch": 0.26927680943622084, "grad_norm": 0.507764667261919, "learning_rate": 4.059691808596918e-05, "loss": 0.57, "step": 9223 }, { "epoch": 0.2693060056640682, "grad_norm": 0.5719934232033445, "learning_rate": 4.0595296025952964e-05, "loss": 0.646, "step": 9224 }, { "epoch": 0.26933520189191557, "grad_norm": 0.585187086062133, "learning_rate": 4.059367396593674e-05, "loss": 0.7058, "step": 9225 }, { "epoch": 0.2693643981197629, "grad_norm": 0.5717161709974906, "learning_rate": 4.059205190592052e-05, "loss": 0.6353, "step": 9226 }, { "epoch": 0.2693935943476103, "grad_norm": 0.5675386026583414, "learning_rate": 4.0590429845904296e-05, "loss": 0.6577, "step": 9227 }, { "epoch": 0.26942279057545765, "grad_norm": 0.5510096743385595, "learning_rate": 4.058880778588808e-05, "loss": 0.6167, "step": 9228 }, { "epoch": 0.269451986803305, "grad_norm": 0.5385068956266337, "learning_rate": 4.058718572587186e-05, "loss": 0.6281, "step": 9229 }, { "epoch": 0.26948118303115237, "grad_norm": 0.6175691330121031, "learning_rate": 4.0585563665855634e-05, "loss": 0.6627, "step": 9230 }, { "epoch": 0.26951037925899973, "grad_norm": 0.5731047471507662, "learning_rate": 4.058394160583942e-05, "loss": 0.6511, "step": 9231 }, { "epoch": 0.2695395754868471, "grad_norm": 0.5319922675146312, "learning_rate": 4.05823195458232e-05, "loss": 0.6543, "step": 9232 }, { "epoch": 0.26956877171469446, "grad_norm": 0.5448619134139671, "learning_rate": 4.058069748580698e-05, "loss": 0.6371, "step": 9233 }, { "epoch": 0.2695979679425418, "grad_norm": 0.539476746549846, "learning_rate": 4.057907542579076e-05, "loss": 0.6072, "step": 9234 }, { "epoch": 0.2696271641703892, "grad_norm": 0.6485502713935635, "learning_rate": 4.0577453365774536e-05, "loss": 0.7157, "step": 9235 }, { "epoch": 0.26965636039823654, "grad_norm": 0.5903658560443296, "learning_rate": 4.057583130575832e-05, "loss": 0.6527, "step": 9236 }, { "epoch": 0.2696855566260839, "grad_norm": 0.6331104163968745, "learning_rate": 4.057420924574209e-05, "loss": 0.7809, "step": 9237 }, { "epoch": 0.26971475285393126, "grad_norm": 0.5435535289965227, "learning_rate": 4.0572587185725875e-05, "loss": 0.652, "step": 9238 }, { "epoch": 0.2697439490817786, "grad_norm": 0.5585402032136558, "learning_rate": 4.0570965125709657e-05, "loss": 0.6154, "step": 9239 }, { "epoch": 0.269773145309626, "grad_norm": 0.5306839488605404, "learning_rate": 4.056934306569343e-05, "loss": 0.6504, "step": 9240 }, { "epoch": 0.26980234153747334, "grad_norm": 0.6090782980134051, "learning_rate": 4.056772100567721e-05, "loss": 0.7135, "step": 9241 }, { "epoch": 0.2698315377653207, "grad_norm": 0.5620629756507347, "learning_rate": 4.056609894566099e-05, "loss": 0.6188, "step": 9242 }, { "epoch": 0.26986073399316807, "grad_norm": 0.5512919038158416, "learning_rate": 4.056447688564477e-05, "loss": 0.6691, "step": 9243 }, { "epoch": 0.26988993022101543, "grad_norm": 0.5406351165535678, "learning_rate": 4.056285482562855e-05, "loss": 0.6381, "step": 9244 }, { "epoch": 0.2699191264488628, "grad_norm": 0.601487485833154, "learning_rate": 4.056123276561233e-05, "loss": 0.6584, "step": 9245 }, { "epoch": 0.26994832267671015, "grad_norm": 0.5531036968526665, "learning_rate": 4.055961070559611e-05, "loss": 0.645, "step": 9246 }, { "epoch": 0.2699775189045575, "grad_norm": 0.5368973857862328, "learning_rate": 4.0557988645579884e-05, "loss": 0.599, "step": 9247 }, { "epoch": 0.2700067151324049, "grad_norm": 0.573001890273161, "learning_rate": 4.0556366585563665e-05, "loss": 0.709, "step": 9248 }, { "epoch": 0.27003591136025223, "grad_norm": 0.5549711512669193, "learning_rate": 4.055474452554745e-05, "loss": 0.6582, "step": 9249 }, { "epoch": 0.2700651075880996, "grad_norm": 0.5551454410763169, "learning_rate": 4.055312246553123e-05, "loss": 0.6522, "step": 9250 }, { "epoch": 0.27009430381594696, "grad_norm": 0.6016288804545931, "learning_rate": 4.055150040551501e-05, "loss": 0.6813, "step": 9251 }, { "epoch": 0.2701235000437943, "grad_norm": 0.48039922317571954, "learning_rate": 4.0549878345498786e-05, "loss": 0.5259, "step": 9252 }, { "epoch": 0.2701526962716417, "grad_norm": 0.5538532546817442, "learning_rate": 4.054825628548257e-05, "loss": 0.6517, "step": 9253 }, { "epoch": 0.27018189249948904, "grad_norm": 0.587098518481764, "learning_rate": 4.054663422546634e-05, "loss": 0.7351, "step": 9254 }, { "epoch": 0.2702110887273364, "grad_norm": 0.5573034829795801, "learning_rate": 4.0545012165450124e-05, "loss": 0.628, "step": 9255 }, { "epoch": 0.27024028495518376, "grad_norm": 0.5256107635495161, "learning_rate": 4.0543390105433906e-05, "loss": 0.619, "step": 9256 }, { "epoch": 0.2702694811830312, "grad_norm": 0.5570271591531761, "learning_rate": 4.054176804541768e-05, "loss": 0.6463, "step": 9257 }, { "epoch": 0.27029867741087854, "grad_norm": 0.562508362595416, "learning_rate": 4.054014598540146e-05, "loss": 0.6424, "step": 9258 }, { "epoch": 0.2703278736387259, "grad_norm": 0.5080990406419837, "learning_rate": 4.0538523925385244e-05, "loss": 0.5819, "step": 9259 }, { "epoch": 0.27035706986657326, "grad_norm": 0.5643697489627156, "learning_rate": 4.053690186536902e-05, "loss": 0.7059, "step": 9260 }, { "epoch": 0.2703862660944206, "grad_norm": 0.5635325778199013, "learning_rate": 4.05352798053528e-05, "loss": 0.7036, "step": 9261 }, { "epoch": 0.270415462322268, "grad_norm": 0.6198007202029077, "learning_rate": 4.0533657745336576e-05, "loss": 0.6796, "step": 9262 }, { "epoch": 0.27044465855011535, "grad_norm": 0.5425292763784586, "learning_rate": 4.053203568532036e-05, "loss": 0.5946, "step": 9263 }, { "epoch": 0.2704738547779627, "grad_norm": 0.6016852107913124, "learning_rate": 4.053041362530414e-05, "loss": 0.6591, "step": 9264 }, { "epoch": 0.27050305100581007, "grad_norm": 0.5800504526149272, "learning_rate": 4.0528791565287915e-05, "loss": 0.7077, "step": 9265 }, { "epoch": 0.27053224723365743, "grad_norm": 0.5256726004024661, "learning_rate": 4.0527169505271696e-05, "loss": 0.6055, "step": 9266 }, { "epoch": 0.2705614434615048, "grad_norm": 0.5357136553881703, "learning_rate": 4.052554744525547e-05, "loss": 0.6449, "step": 9267 }, { "epoch": 0.27059063968935215, "grad_norm": 0.5638977823723238, "learning_rate": 4.052392538523925e-05, "loss": 0.6881, "step": 9268 }, { "epoch": 0.2706198359171995, "grad_norm": 0.5544302448759726, "learning_rate": 4.0522303325223035e-05, "loss": 0.6529, "step": 9269 }, { "epoch": 0.2706490321450469, "grad_norm": 0.5476151191181309, "learning_rate": 4.0520681265206817e-05, "loss": 0.5433, "step": 9270 }, { "epoch": 0.27067822837289424, "grad_norm": 0.5438854700562815, "learning_rate": 4.05190592051906e-05, "loss": 0.6123, "step": 9271 }, { "epoch": 0.2707074246007416, "grad_norm": 0.5638007818923539, "learning_rate": 4.051743714517437e-05, "loss": 0.6404, "step": 9272 }, { "epoch": 0.27073662082858896, "grad_norm": 0.5283051636748765, "learning_rate": 4.0515815085158155e-05, "loss": 0.5671, "step": 9273 }, { "epoch": 0.2707658170564363, "grad_norm": 0.5160647267644246, "learning_rate": 4.051419302514193e-05, "loss": 0.5794, "step": 9274 }, { "epoch": 0.2707950132842837, "grad_norm": 0.5590946950130955, "learning_rate": 4.051257096512571e-05, "loss": 0.6129, "step": 9275 }, { "epoch": 0.27082420951213104, "grad_norm": 0.5331995215942786, "learning_rate": 4.0510948905109494e-05, "loss": 0.648, "step": 9276 }, { "epoch": 0.2708534057399784, "grad_norm": 0.5694265462144822, "learning_rate": 4.050932684509327e-05, "loss": 0.6306, "step": 9277 }, { "epoch": 0.27088260196782576, "grad_norm": 0.5878883125569949, "learning_rate": 4.050770478507705e-05, "loss": 0.6645, "step": 9278 }, { "epoch": 0.2709117981956731, "grad_norm": 0.551548006211074, "learning_rate": 4.050608272506083e-05, "loss": 0.6273, "step": 9279 }, { "epoch": 0.2709409944235205, "grad_norm": 0.5891437600314098, "learning_rate": 4.050446066504461e-05, "loss": 0.6856, "step": 9280 }, { "epoch": 0.27097019065136785, "grad_norm": 0.5988144886759073, "learning_rate": 4.050283860502839e-05, "loss": 0.6129, "step": 9281 }, { "epoch": 0.2709993868792152, "grad_norm": 0.5470804804656042, "learning_rate": 4.0501216545012164e-05, "loss": 0.6446, "step": 9282 }, { "epoch": 0.27102858310706257, "grad_norm": 0.5383415875840801, "learning_rate": 4.0499594484995946e-05, "loss": 0.6811, "step": 9283 }, { "epoch": 0.27105777933490993, "grad_norm": 0.5368938678041448, "learning_rate": 4.049797242497973e-05, "loss": 0.6219, "step": 9284 }, { "epoch": 0.2710869755627573, "grad_norm": 0.5744889640189561, "learning_rate": 4.04963503649635e-05, "loss": 0.6526, "step": 9285 }, { "epoch": 0.27111617179060465, "grad_norm": 0.6088381389459867, "learning_rate": 4.0494728304947284e-05, "loss": 0.7281, "step": 9286 }, { "epoch": 0.271145368018452, "grad_norm": 0.5129681952683216, "learning_rate": 4.049310624493106e-05, "loss": 0.5865, "step": 9287 }, { "epoch": 0.2711745642462994, "grad_norm": 0.5740009171173163, "learning_rate": 4.049148418491485e-05, "loss": 0.655, "step": 9288 }, { "epoch": 0.27120376047414674, "grad_norm": 0.5768679419222554, "learning_rate": 4.048986212489862e-05, "loss": 0.606, "step": 9289 }, { "epoch": 0.2712329567019941, "grad_norm": 0.532255262883589, "learning_rate": 4.0488240064882404e-05, "loss": 0.6386, "step": 9290 }, { "epoch": 0.27126215292984146, "grad_norm": 0.5366810078360721, "learning_rate": 4.0486618004866186e-05, "loss": 0.6375, "step": 9291 }, { "epoch": 0.2712913491576888, "grad_norm": 0.5363504831768393, "learning_rate": 4.048499594484996e-05, "loss": 0.6498, "step": 9292 }, { "epoch": 0.2713205453855362, "grad_norm": 0.5363279885128396, "learning_rate": 4.048337388483374e-05, "loss": 0.5956, "step": 9293 }, { "epoch": 0.27134974161338354, "grad_norm": 0.49015509824077946, "learning_rate": 4.048175182481752e-05, "loss": 0.5503, "step": 9294 }, { "epoch": 0.2713789378412309, "grad_norm": 0.552460462618545, "learning_rate": 4.04801297648013e-05, "loss": 0.6492, "step": 9295 }, { "epoch": 0.27140813406907827, "grad_norm": 0.5017456579441175, "learning_rate": 4.047850770478508e-05, "loss": 0.5897, "step": 9296 }, { "epoch": 0.2714373302969256, "grad_norm": 0.5491094320955187, "learning_rate": 4.0476885644768856e-05, "loss": 0.6536, "step": 9297 }, { "epoch": 0.271466526524773, "grad_norm": 0.556469473657803, "learning_rate": 4.047526358475264e-05, "loss": 0.593, "step": 9298 }, { "epoch": 0.27149572275262035, "grad_norm": 0.5526989325901279, "learning_rate": 4.047364152473641e-05, "loss": 0.6273, "step": 9299 }, { "epoch": 0.2715249189804677, "grad_norm": 0.5909232183851435, "learning_rate": 4.0472019464720195e-05, "loss": 0.6831, "step": 9300 }, { "epoch": 0.27155411520831507, "grad_norm": 0.5477494367079009, "learning_rate": 4.047039740470398e-05, "loss": 0.6307, "step": 9301 }, { "epoch": 0.27158331143616243, "grad_norm": 0.5583803590292908, "learning_rate": 4.046877534468775e-05, "loss": 0.6266, "step": 9302 }, { "epoch": 0.2716125076640098, "grad_norm": 0.5732056598998002, "learning_rate": 4.0467153284671533e-05, "loss": 0.7043, "step": 9303 }, { "epoch": 0.27164170389185716, "grad_norm": 0.5405912916159762, "learning_rate": 4.0465531224655315e-05, "loss": 0.6148, "step": 9304 }, { "epoch": 0.2716709001197045, "grad_norm": 0.557144743627693, "learning_rate": 4.046390916463909e-05, "loss": 0.6634, "step": 9305 }, { "epoch": 0.2717000963475519, "grad_norm": 0.5565464781133929, "learning_rate": 4.046228710462288e-05, "loss": 0.6354, "step": 9306 }, { "epoch": 0.27172929257539924, "grad_norm": 0.5145079769130082, "learning_rate": 4.0460665044606654e-05, "loss": 0.5444, "step": 9307 }, { "epoch": 0.2717584888032466, "grad_norm": 0.5467376625405221, "learning_rate": 4.0459042984590435e-05, "loss": 0.6558, "step": 9308 }, { "epoch": 0.27178768503109396, "grad_norm": 0.5198713309549964, "learning_rate": 4.045742092457421e-05, "loss": 0.6196, "step": 9309 }, { "epoch": 0.2718168812589413, "grad_norm": 0.5202905649224615, "learning_rate": 4.045579886455799e-05, "loss": 0.5889, "step": 9310 }, { "epoch": 0.2718460774867887, "grad_norm": 0.5433980668040483, "learning_rate": 4.0454176804541774e-05, "loss": 0.6344, "step": 9311 }, { "epoch": 0.27187527371463605, "grad_norm": 0.6234771506738379, "learning_rate": 4.045255474452555e-05, "loss": 0.6398, "step": 9312 }, { "epoch": 0.2719044699424834, "grad_norm": 0.5598259659383792, "learning_rate": 4.045093268450933e-05, "loss": 0.6868, "step": 9313 }, { "epoch": 0.27193366617033077, "grad_norm": 0.5704338868909444, "learning_rate": 4.0449310624493106e-05, "loss": 0.7584, "step": 9314 }, { "epoch": 0.27196286239817813, "grad_norm": 0.6147111401560215, "learning_rate": 4.044768856447689e-05, "loss": 0.6785, "step": 9315 }, { "epoch": 0.2719920586260255, "grad_norm": 0.6295402742366187, "learning_rate": 4.044606650446067e-05, "loss": 0.727, "step": 9316 }, { "epoch": 0.2720212548538729, "grad_norm": 0.5411301249200696, "learning_rate": 4.0444444444444444e-05, "loss": 0.6473, "step": 9317 }, { "epoch": 0.27205045108172027, "grad_norm": 0.5399956027587332, "learning_rate": 4.0442822384428226e-05, "loss": 0.6367, "step": 9318 }, { "epoch": 0.27207964730956763, "grad_norm": 0.5381623143458134, "learning_rate": 4.0441200324412e-05, "loss": 0.6389, "step": 9319 }, { "epoch": 0.272108843537415, "grad_norm": 0.5147662962851381, "learning_rate": 4.043957826439578e-05, "loss": 0.5679, "step": 9320 }, { "epoch": 0.27213803976526235, "grad_norm": 0.6304918253198422, "learning_rate": 4.0437956204379564e-05, "loss": 0.67, "step": 9321 }, { "epoch": 0.2721672359931097, "grad_norm": 0.5340094066198344, "learning_rate": 4.043633414436334e-05, "loss": 0.6577, "step": 9322 }, { "epoch": 0.2721964322209571, "grad_norm": 0.555276538212795, "learning_rate": 4.043471208434712e-05, "loss": 0.6364, "step": 9323 }, { "epoch": 0.27222562844880444, "grad_norm": 0.5328019069913961, "learning_rate": 4.04330900243309e-05, "loss": 0.635, "step": 9324 }, { "epoch": 0.2722548246766518, "grad_norm": 0.560887311858896, "learning_rate": 4.0431467964314685e-05, "loss": 0.6578, "step": 9325 }, { "epoch": 0.27228402090449916, "grad_norm": 0.5439524768440742, "learning_rate": 4.0429845904298467e-05, "loss": 0.639, "step": 9326 }, { "epoch": 0.2723132171323465, "grad_norm": 0.5634565020439665, "learning_rate": 4.042822384428224e-05, "loss": 0.6792, "step": 9327 }, { "epoch": 0.2723424133601939, "grad_norm": 0.5264454927537657, "learning_rate": 4.042660178426602e-05, "loss": 0.5917, "step": 9328 }, { "epoch": 0.27237160958804124, "grad_norm": 0.5393677897922525, "learning_rate": 4.04249797242498e-05, "loss": 0.6298, "step": 9329 }, { "epoch": 0.2724008058158886, "grad_norm": 0.577385833203926, "learning_rate": 4.042335766423358e-05, "loss": 0.6994, "step": 9330 }, { "epoch": 0.27243000204373596, "grad_norm": 0.5755073628652729, "learning_rate": 4.042173560421736e-05, "loss": 0.6617, "step": 9331 }, { "epoch": 0.2724591982715833, "grad_norm": 0.5634712179806168, "learning_rate": 4.042011354420114e-05, "loss": 0.6369, "step": 9332 }, { "epoch": 0.2724883944994307, "grad_norm": 0.5016672160143567, "learning_rate": 4.041849148418492e-05, "loss": 0.5459, "step": 9333 }, { "epoch": 0.27251759072727805, "grad_norm": 0.5454326152174499, "learning_rate": 4.0416869424168694e-05, "loss": 0.6465, "step": 9334 }, { "epoch": 0.2725467869551254, "grad_norm": 0.5506190650270829, "learning_rate": 4.0415247364152475e-05, "loss": 0.6424, "step": 9335 }, { "epoch": 0.27257598318297277, "grad_norm": 0.5767113608629632, "learning_rate": 4.041362530413626e-05, "loss": 0.7144, "step": 9336 }, { "epoch": 0.27260517941082013, "grad_norm": 0.5066992972642904, "learning_rate": 4.041200324412003e-05, "loss": 0.555, "step": 9337 }, { "epoch": 0.2726343756386675, "grad_norm": 0.6058335553729786, "learning_rate": 4.0410381184103814e-05, "loss": 0.6882, "step": 9338 }, { "epoch": 0.27266357186651485, "grad_norm": 0.549441193093992, "learning_rate": 4.040875912408759e-05, "loss": 0.6689, "step": 9339 }, { "epoch": 0.2726927680943622, "grad_norm": 0.5917881883833014, "learning_rate": 4.040713706407137e-05, "loss": 0.688, "step": 9340 }, { "epoch": 0.2727219643222096, "grad_norm": 0.5083790337046198, "learning_rate": 4.040551500405515e-05, "loss": 0.5989, "step": 9341 }, { "epoch": 0.27275116055005694, "grad_norm": 0.5823198352793811, "learning_rate": 4.040389294403893e-05, "loss": 0.6799, "step": 9342 }, { "epoch": 0.2727803567779043, "grad_norm": 0.610629098614544, "learning_rate": 4.040227088402271e-05, "loss": 0.7244, "step": 9343 }, { "epoch": 0.27280955300575166, "grad_norm": 0.5690361949757498, "learning_rate": 4.040064882400649e-05, "loss": 0.6984, "step": 9344 }, { "epoch": 0.272838749233599, "grad_norm": 0.550602473832227, "learning_rate": 4.039902676399027e-05, "loss": 0.6494, "step": 9345 }, { "epoch": 0.2728679454614464, "grad_norm": 0.5903937377417738, "learning_rate": 4.0397404703974054e-05, "loss": 0.7049, "step": 9346 }, { "epoch": 0.27289714168929374, "grad_norm": 0.5776659657978866, "learning_rate": 4.039578264395783e-05, "loss": 0.7208, "step": 9347 }, { "epoch": 0.2729263379171411, "grad_norm": 0.5189521938249242, "learning_rate": 4.039416058394161e-05, "loss": 0.6139, "step": 9348 }, { "epoch": 0.27295553414498847, "grad_norm": 0.5724425271696064, "learning_rate": 4.0392538523925386e-05, "loss": 0.627, "step": 9349 }, { "epoch": 0.2729847303728358, "grad_norm": 0.5772625300850347, "learning_rate": 4.039091646390917e-05, "loss": 0.7124, "step": 9350 }, { "epoch": 0.2730139266006832, "grad_norm": 0.5438797330160551, "learning_rate": 4.038929440389295e-05, "loss": 0.5799, "step": 9351 }, { "epoch": 0.27304312282853055, "grad_norm": 0.697741812339664, "learning_rate": 4.0387672343876725e-05, "loss": 0.69, "step": 9352 }, { "epoch": 0.2730723190563779, "grad_norm": 0.5589789268825385, "learning_rate": 4.0386050283860506e-05, "loss": 0.6507, "step": 9353 }, { "epoch": 0.27310151528422527, "grad_norm": 0.5675877987763087, "learning_rate": 4.038442822384428e-05, "loss": 0.6948, "step": 9354 }, { "epoch": 0.27313071151207263, "grad_norm": 0.5954033268411573, "learning_rate": 4.038280616382806e-05, "loss": 0.6426, "step": 9355 }, { "epoch": 0.27315990773992, "grad_norm": 0.5248391772446882, "learning_rate": 4.0381184103811845e-05, "loss": 0.596, "step": 9356 }, { "epoch": 0.27318910396776735, "grad_norm": 0.5552844534664978, "learning_rate": 4.037956204379562e-05, "loss": 0.6727, "step": 9357 }, { "epoch": 0.2732183001956147, "grad_norm": 0.6057699752167381, "learning_rate": 4.03779399837794e-05, "loss": 0.7711, "step": 9358 }, { "epoch": 0.2732474964234621, "grad_norm": 0.5089226991761627, "learning_rate": 4.0376317923763177e-05, "loss": 0.5707, "step": 9359 }, { "epoch": 0.27327669265130944, "grad_norm": 0.5114595321802924, "learning_rate": 4.037469586374696e-05, "loss": 0.5853, "step": 9360 }, { "epoch": 0.2733058888791568, "grad_norm": 0.5133972242533985, "learning_rate": 4.037307380373074e-05, "loss": 0.5956, "step": 9361 }, { "epoch": 0.27333508510700416, "grad_norm": 0.5152019857930251, "learning_rate": 4.0371451743714515e-05, "loss": 0.6214, "step": 9362 }, { "epoch": 0.2733642813348515, "grad_norm": 0.5432308319764718, "learning_rate": 4.0369829683698304e-05, "loss": 0.6118, "step": 9363 }, { "epoch": 0.2733934775626989, "grad_norm": 0.5383132217721776, "learning_rate": 4.036820762368208e-05, "loss": 0.6492, "step": 9364 }, { "epoch": 0.27342267379054624, "grad_norm": 0.5469927381233859, "learning_rate": 4.036658556366586e-05, "loss": 0.5938, "step": 9365 }, { "epoch": 0.2734518700183936, "grad_norm": 0.5070514479257493, "learning_rate": 4.0364963503649635e-05, "loss": 0.6177, "step": 9366 }, { "epoch": 0.27348106624624097, "grad_norm": 0.6041042964109834, "learning_rate": 4.036334144363342e-05, "loss": 0.7206, "step": 9367 }, { "epoch": 0.27351026247408833, "grad_norm": 0.560369356847318, "learning_rate": 4.03617193836172e-05, "loss": 0.718, "step": 9368 }, { "epoch": 0.2735394587019357, "grad_norm": 0.5718781217081389, "learning_rate": 4.0360097323600974e-05, "loss": 0.6284, "step": 9369 }, { "epoch": 0.27356865492978305, "grad_norm": 0.5568200674850924, "learning_rate": 4.0358475263584756e-05, "loss": 0.6939, "step": 9370 }, { "epoch": 0.2735978511576304, "grad_norm": 0.5948146670903983, "learning_rate": 4.035685320356854e-05, "loss": 0.736, "step": 9371 }, { "epoch": 0.2736270473854778, "grad_norm": 0.5403735984603046, "learning_rate": 4.035523114355231e-05, "loss": 0.5606, "step": 9372 }, { "epoch": 0.27365624361332513, "grad_norm": 0.5872021990575391, "learning_rate": 4.0353609083536094e-05, "loss": 0.6775, "step": 9373 }, { "epoch": 0.2736854398411725, "grad_norm": 0.5156410743821586, "learning_rate": 4.035198702351987e-05, "loss": 0.6042, "step": 9374 }, { "epoch": 0.27371463606901986, "grad_norm": 0.5517035625660094, "learning_rate": 4.035036496350365e-05, "loss": 0.6357, "step": 9375 }, { "epoch": 0.2737438322968672, "grad_norm": 0.5698282798357996, "learning_rate": 4.034874290348743e-05, "loss": 0.7131, "step": 9376 }, { "epoch": 0.27377302852471463, "grad_norm": 0.5290288248908046, "learning_rate": 4.034712084347121e-05, "loss": 0.6146, "step": 9377 }, { "epoch": 0.273802224752562, "grad_norm": 0.5188480108860664, "learning_rate": 4.034549878345499e-05, "loss": 0.5942, "step": 9378 }, { "epoch": 0.27383142098040936, "grad_norm": 0.5802770660558472, "learning_rate": 4.0343876723438764e-05, "loss": 0.6531, "step": 9379 }, { "epoch": 0.2738606172082567, "grad_norm": 0.5533461431732042, "learning_rate": 4.0342254663422546e-05, "loss": 0.6516, "step": 9380 }, { "epoch": 0.2738898134361041, "grad_norm": 0.5281370966562147, "learning_rate": 4.034063260340633e-05, "loss": 0.5742, "step": 9381 }, { "epoch": 0.27391900966395144, "grad_norm": 0.5790884484284082, "learning_rate": 4.033901054339011e-05, "loss": 0.669, "step": 9382 }, { "epoch": 0.2739482058917988, "grad_norm": 0.5477621832335448, "learning_rate": 4.033738848337389e-05, "loss": 0.6413, "step": 9383 }, { "epoch": 0.27397740211964616, "grad_norm": 0.5810351765955871, "learning_rate": 4.0335766423357666e-05, "loss": 0.6733, "step": 9384 }, { "epoch": 0.2740065983474935, "grad_norm": 0.550121666643325, "learning_rate": 4.033414436334145e-05, "loss": 0.6725, "step": 9385 }, { "epoch": 0.2740357945753409, "grad_norm": 0.5575897482419204, "learning_rate": 4.033252230332522e-05, "loss": 0.6685, "step": 9386 }, { "epoch": 0.27406499080318825, "grad_norm": 0.551359320453615, "learning_rate": 4.0330900243309005e-05, "loss": 0.664, "step": 9387 }, { "epoch": 0.2740941870310356, "grad_norm": 0.5235056584758374, "learning_rate": 4.032927818329279e-05, "loss": 0.5847, "step": 9388 }, { "epoch": 0.27412338325888297, "grad_norm": 0.7328287083195866, "learning_rate": 4.032765612327656e-05, "loss": 0.6792, "step": 9389 }, { "epoch": 0.27415257948673033, "grad_norm": 0.5475857229445853, "learning_rate": 4.0326034063260343e-05, "loss": 0.6399, "step": 9390 }, { "epoch": 0.2741817757145777, "grad_norm": 0.573313696265367, "learning_rate": 4.0324412003244125e-05, "loss": 0.6953, "step": 9391 }, { "epoch": 0.27421097194242505, "grad_norm": 0.5945864554910325, "learning_rate": 4.03227899432279e-05, "loss": 0.7052, "step": 9392 }, { "epoch": 0.2742401681702724, "grad_norm": 0.5314460842870908, "learning_rate": 4.032116788321168e-05, "loss": 0.6202, "step": 9393 }, { "epoch": 0.2742693643981198, "grad_norm": 0.5194036842479927, "learning_rate": 4.031954582319546e-05, "loss": 0.5399, "step": 9394 }, { "epoch": 0.27429856062596714, "grad_norm": 0.5426583202357883, "learning_rate": 4.031792376317924e-05, "loss": 0.6687, "step": 9395 }, { "epoch": 0.2743277568538145, "grad_norm": 0.6203112654866719, "learning_rate": 4.031630170316302e-05, "loss": 0.7256, "step": 9396 }, { "epoch": 0.27435695308166186, "grad_norm": 0.6331549528903105, "learning_rate": 4.0314679643146795e-05, "loss": 0.7225, "step": 9397 }, { "epoch": 0.2743861493095092, "grad_norm": 0.5446696729622159, "learning_rate": 4.031305758313058e-05, "loss": 0.6134, "step": 9398 }, { "epoch": 0.2744153455373566, "grad_norm": 0.5501735635414281, "learning_rate": 4.031143552311435e-05, "loss": 0.653, "step": 9399 }, { "epoch": 0.27444454176520394, "grad_norm": 0.5691485925456321, "learning_rate": 4.0309813463098134e-05, "loss": 0.6346, "step": 9400 }, { "epoch": 0.2744737379930513, "grad_norm": 0.5419778753179368, "learning_rate": 4.0308191403081916e-05, "loss": 0.5888, "step": 9401 }, { "epoch": 0.27450293422089866, "grad_norm": 0.5357205738068541, "learning_rate": 4.03065693430657e-05, "loss": 0.5787, "step": 9402 }, { "epoch": 0.274532130448746, "grad_norm": 0.5884139344058115, "learning_rate": 4.030494728304948e-05, "loss": 0.6186, "step": 9403 }, { "epoch": 0.2745613266765934, "grad_norm": 0.5480592270005287, "learning_rate": 4.0303325223033254e-05, "loss": 0.6646, "step": 9404 }, { "epoch": 0.27459052290444075, "grad_norm": 0.510062799318705, "learning_rate": 4.0301703163017036e-05, "loss": 0.565, "step": 9405 }, { "epoch": 0.2746197191322881, "grad_norm": 0.4922785360836902, "learning_rate": 4.030008110300081e-05, "loss": 0.5417, "step": 9406 }, { "epoch": 0.27464891536013547, "grad_norm": 0.5550379808753947, "learning_rate": 4.029845904298459e-05, "loss": 0.6424, "step": 9407 }, { "epoch": 0.27467811158798283, "grad_norm": 0.582556678646166, "learning_rate": 4.0296836982968374e-05, "loss": 0.6868, "step": 9408 }, { "epoch": 0.2747073078158302, "grad_norm": 0.5588878057778731, "learning_rate": 4.029521492295215e-05, "loss": 0.6923, "step": 9409 }, { "epoch": 0.27473650404367755, "grad_norm": 0.5729607134026602, "learning_rate": 4.029359286293593e-05, "loss": 0.6893, "step": 9410 }, { "epoch": 0.2747657002715249, "grad_norm": 0.5792416062930015, "learning_rate": 4.0291970802919706e-05, "loss": 0.6914, "step": 9411 }, { "epoch": 0.2747948964993723, "grad_norm": 0.5772094789513079, "learning_rate": 4.029034874290349e-05, "loss": 0.6749, "step": 9412 }, { "epoch": 0.27482409272721964, "grad_norm": 0.5400078835598195, "learning_rate": 4.028872668288727e-05, "loss": 0.6418, "step": 9413 }, { "epoch": 0.274853288955067, "grad_norm": 0.4967223860788906, "learning_rate": 4.0287104622871045e-05, "loss": 0.5486, "step": 9414 }, { "epoch": 0.27488248518291436, "grad_norm": 0.5259850755777277, "learning_rate": 4.0285482562854827e-05, "loss": 0.5677, "step": 9415 }, { "epoch": 0.2749116814107617, "grad_norm": 0.6147527291397573, "learning_rate": 4.028386050283861e-05, "loss": 0.7761, "step": 9416 }, { "epoch": 0.2749408776386091, "grad_norm": 0.5775174593096866, "learning_rate": 4.028223844282238e-05, "loss": 0.6661, "step": 9417 }, { "epoch": 0.27497007386645644, "grad_norm": 0.5420948060265752, "learning_rate": 4.0280616382806165e-05, "loss": 0.6294, "step": 9418 }, { "epoch": 0.2749992700943038, "grad_norm": 0.5543932779482471, "learning_rate": 4.027899432278994e-05, "loss": 0.6469, "step": 9419 }, { "epoch": 0.27502846632215117, "grad_norm": 0.6122036240890322, "learning_rate": 4.027737226277373e-05, "loss": 0.7181, "step": 9420 }, { "epoch": 0.2750576625499985, "grad_norm": 0.5792131880904778, "learning_rate": 4.0275750202757504e-05, "loss": 0.6579, "step": 9421 }, { "epoch": 0.2750868587778459, "grad_norm": 0.5434865768215987, "learning_rate": 4.0274128142741285e-05, "loss": 0.5821, "step": 9422 }, { "epoch": 0.27511605500569325, "grad_norm": 0.5403488808915465, "learning_rate": 4.027250608272507e-05, "loss": 0.6048, "step": 9423 }, { "epoch": 0.2751452512335406, "grad_norm": 0.5401271521891569, "learning_rate": 4.027088402270884e-05, "loss": 0.5993, "step": 9424 }, { "epoch": 0.27517444746138797, "grad_norm": 0.5944578627501464, "learning_rate": 4.0269261962692624e-05, "loss": 0.7099, "step": 9425 }, { "epoch": 0.27520364368923533, "grad_norm": 0.6136983661399901, "learning_rate": 4.02676399026764e-05, "loss": 0.7846, "step": 9426 }, { "epoch": 0.2752328399170827, "grad_norm": 0.5850490563426529, "learning_rate": 4.026601784266018e-05, "loss": 0.7471, "step": 9427 }, { "epoch": 0.27526203614493006, "grad_norm": 0.5625877858957941, "learning_rate": 4.026439578264396e-05, "loss": 0.6852, "step": 9428 }, { "epoch": 0.2752912323727774, "grad_norm": 0.5560481315041663, "learning_rate": 4.026277372262774e-05, "loss": 0.63, "step": 9429 }, { "epoch": 0.2753204286006248, "grad_norm": 0.5766520489435523, "learning_rate": 4.026115166261152e-05, "loss": 0.6567, "step": 9430 }, { "epoch": 0.27534962482847214, "grad_norm": 0.5605384686474607, "learning_rate": 4.0259529602595294e-05, "loss": 0.6448, "step": 9431 }, { "epoch": 0.2753788210563195, "grad_norm": 0.5377247042399388, "learning_rate": 4.0257907542579076e-05, "loss": 0.6594, "step": 9432 }, { "epoch": 0.27540801728416686, "grad_norm": 0.5840626029655805, "learning_rate": 4.025628548256286e-05, "loss": 0.7245, "step": 9433 }, { "epoch": 0.2754372135120142, "grad_norm": 0.5566296852496362, "learning_rate": 4.025466342254663e-05, "loss": 0.5824, "step": 9434 }, { "epoch": 0.2754664097398616, "grad_norm": 0.5622181781580622, "learning_rate": 4.0253041362530414e-05, "loss": 0.6233, "step": 9435 }, { "epoch": 0.27549560596770895, "grad_norm": 0.5573887872206913, "learning_rate": 4.0251419302514196e-05, "loss": 0.6255, "step": 9436 }, { "epoch": 0.2755248021955563, "grad_norm": 0.553482701725332, "learning_rate": 4.024979724249797e-05, "loss": 0.6479, "step": 9437 }, { "epoch": 0.2755539984234037, "grad_norm": 0.552806956187592, "learning_rate": 4.024817518248175e-05, "loss": 0.6531, "step": 9438 }, { "epoch": 0.2755831946512511, "grad_norm": 0.5443949882666467, "learning_rate": 4.0246553122465535e-05, "loss": 0.6344, "step": 9439 }, { "epoch": 0.27561239087909845, "grad_norm": 0.5583719214482409, "learning_rate": 4.0244931062449316e-05, "loss": 0.6483, "step": 9440 }, { "epoch": 0.2756415871069458, "grad_norm": 0.5535095690309576, "learning_rate": 4.024330900243309e-05, "loss": 0.6341, "step": 9441 }, { "epoch": 0.27567078333479317, "grad_norm": 0.5322448587665942, "learning_rate": 4.024168694241687e-05, "loss": 0.5562, "step": 9442 }, { "epoch": 0.27569997956264053, "grad_norm": 0.5676975378793702, "learning_rate": 4.0240064882400655e-05, "loss": 0.6667, "step": 9443 }, { "epoch": 0.2757291757904879, "grad_norm": 0.5980849137557336, "learning_rate": 4.023844282238443e-05, "loss": 0.6861, "step": 9444 }, { "epoch": 0.27575837201833525, "grad_norm": 0.602335005977056, "learning_rate": 4.023682076236821e-05, "loss": 0.7225, "step": 9445 }, { "epoch": 0.2757875682461826, "grad_norm": 0.6068263093473029, "learning_rate": 4.0235198702351987e-05, "loss": 0.7392, "step": 9446 }, { "epoch": 0.27581676447403, "grad_norm": 0.6306436368344147, "learning_rate": 4.023357664233577e-05, "loss": 0.7296, "step": 9447 }, { "epoch": 0.27584596070187734, "grad_norm": 0.5330952764636188, "learning_rate": 4.023195458231955e-05, "loss": 0.6469, "step": 9448 }, { "epoch": 0.2758751569297247, "grad_norm": 0.5699982366602133, "learning_rate": 4.0230332522303325e-05, "loss": 0.6781, "step": 9449 }, { "epoch": 0.27590435315757206, "grad_norm": 0.5546314946915484, "learning_rate": 4.022871046228711e-05, "loss": 0.6642, "step": 9450 }, { "epoch": 0.2759335493854194, "grad_norm": 0.5544911179662905, "learning_rate": 4.022708840227088e-05, "loss": 0.6368, "step": 9451 }, { "epoch": 0.2759627456132668, "grad_norm": 0.5912667831364405, "learning_rate": 4.0225466342254664e-05, "loss": 0.6428, "step": 9452 }, { "epoch": 0.27599194184111414, "grad_norm": 0.543550594786674, "learning_rate": 4.0223844282238445e-05, "loss": 0.66, "step": 9453 }, { "epoch": 0.2760211380689615, "grad_norm": 0.5521741773387034, "learning_rate": 4.022222222222222e-05, "loss": 0.6139, "step": 9454 }, { "epoch": 0.27605033429680886, "grad_norm": 0.5547761136931532, "learning_rate": 4.0220600162206e-05, "loss": 0.6641, "step": 9455 }, { "epoch": 0.2760795305246562, "grad_norm": 0.5285082018546066, "learning_rate": 4.021897810218978e-05, "loss": 0.6071, "step": 9456 }, { "epoch": 0.2761087267525036, "grad_norm": 0.5770818109792053, "learning_rate": 4.0217356042173566e-05, "loss": 0.7207, "step": 9457 }, { "epoch": 0.27613792298035095, "grad_norm": 0.5792934799118908, "learning_rate": 4.021573398215735e-05, "loss": 0.6755, "step": 9458 }, { "epoch": 0.2761671192081983, "grad_norm": 0.5880893547382035, "learning_rate": 4.021411192214112e-05, "loss": 0.6897, "step": 9459 }, { "epoch": 0.27619631543604567, "grad_norm": 0.5452160101958339, "learning_rate": 4.0212489862124904e-05, "loss": 0.627, "step": 9460 }, { "epoch": 0.27622551166389303, "grad_norm": 0.5747424762688665, "learning_rate": 4.021086780210868e-05, "loss": 0.6387, "step": 9461 }, { "epoch": 0.2762547078917404, "grad_norm": 0.5497862625377158, "learning_rate": 4.020924574209246e-05, "loss": 0.6155, "step": 9462 }, { "epoch": 0.27628390411958775, "grad_norm": 0.5774026910031155, "learning_rate": 4.020762368207624e-05, "loss": 0.6955, "step": 9463 }, { "epoch": 0.2763131003474351, "grad_norm": 0.5711129892008054, "learning_rate": 4.020600162206002e-05, "loss": 0.6758, "step": 9464 }, { "epoch": 0.2763422965752825, "grad_norm": 0.5837028570358425, "learning_rate": 4.02043795620438e-05, "loss": 0.6796, "step": 9465 }, { "epoch": 0.27637149280312984, "grad_norm": 0.5216109668521711, "learning_rate": 4.0202757502027574e-05, "loss": 0.6028, "step": 9466 }, { "epoch": 0.2764006890309772, "grad_norm": 0.6220631166049393, "learning_rate": 4.0201135442011356e-05, "loss": 0.7386, "step": 9467 }, { "epoch": 0.27642988525882456, "grad_norm": 0.5105318208542531, "learning_rate": 4.019951338199514e-05, "loss": 0.5563, "step": 9468 }, { "epoch": 0.2764590814866719, "grad_norm": 0.5202514332763882, "learning_rate": 4.019789132197891e-05, "loss": 0.6019, "step": 9469 }, { "epoch": 0.2764882777145193, "grad_norm": 0.6006892355157487, "learning_rate": 4.0196269261962695e-05, "loss": 0.6707, "step": 9470 }, { "epoch": 0.27651747394236664, "grad_norm": 0.7281885344061712, "learning_rate": 4.019464720194647e-05, "loss": 0.6909, "step": 9471 }, { "epoch": 0.276546670170214, "grad_norm": 0.5843755007674201, "learning_rate": 4.019302514193025e-05, "loss": 0.7515, "step": 9472 }, { "epoch": 0.27657586639806137, "grad_norm": 0.5451185175301592, "learning_rate": 4.019140308191403e-05, "loss": 0.6818, "step": 9473 }, { "epoch": 0.2766050626259087, "grad_norm": 0.5311048599342576, "learning_rate": 4.018978102189781e-05, "loss": 0.6058, "step": 9474 }, { "epoch": 0.2766342588537561, "grad_norm": 0.5993781789387999, "learning_rate": 4.018815896188159e-05, "loss": 0.7142, "step": 9475 }, { "epoch": 0.27666345508160345, "grad_norm": 0.5370524147641643, "learning_rate": 4.018653690186537e-05, "loss": 0.5932, "step": 9476 }, { "epoch": 0.2766926513094508, "grad_norm": 0.651937203525369, "learning_rate": 4.0184914841849153e-05, "loss": 0.8203, "step": 9477 }, { "epoch": 0.27672184753729817, "grad_norm": 0.5908977667341127, "learning_rate": 4.0183292781832935e-05, "loss": 0.6028, "step": 9478 }, { "epoch": 0.27675104376514553, "grad_norm": 0.5105847094625332, "learning_rate": 4.018167072181671e-05, "loss": 0.6204, "step": 9479 }, { "epoch": 0.2767802399929929, "grad_norm": 0.5121263496504187, "learning_rate": 4.018004866180049e-05, "loss": 0.585, "step": 9480 }, { "epoch": 0.27680943622084025, "grad_norm": 0.5377470179874729, "learning_rate": 4.017842660178427e-05, "loss": 0.6391, "step": 9481 }, { "epoch": 0.2768386324486876, "grad_norm": 0.5801643300216893, "learning_rate": 4.017680454176805e-05, "loss": 0.6919, "step": 9482 }, { "epoch": 0.276867828676535, "grad_norm": 0.5285132180705104, "learning_rate": 4.017518248175183e-05, "loss": 0.577, "step": 9483 }, { "epoch": 0.27689702490438234, "grad_norm": 0.5477213277955817, "learning_rate": 4.0173560421735605e-05, "loss": 0.6557, "step": 9484 }, { "epoch": 0.2769262211322297, "grad_norm": 0.5291093995165154, "learning_rate": 4.017193836171939e-05, "loss": 0.6184, "step": 9485 }, { "epoch": 0.27695541736007706, "grad_norm": 0.5359008307094162, "learning_rate": 4.017031630170316e-05, "loss": 0.5815, "step": 9486 }, { "epoch": 0.2769846135879244, "grad_norm": 0.5400160684770092, "learning_rate": 4.0168694241686944e-05, "loss": 0.6679, "step": 9487 }, { "epoch": 0.2770138098157718, "grad_norm": 0.5555387749619679, "learning_rate": 4.0167072181670726e-05, "loss": 0.677, "step": 9488 }, { "epoch": 0.27704300604361914, "grad_norm": 0.5529804833379232, "learning_rate": 4.01654501216545e-05, "loss": 0.6749, "step": 9489 }, { "epoch": 0.2770722022714665, "grad_norm": 0.5769312935749923, "learning_rate": 4.016382806163828e-05, "loss": 0.7579, "step": 9490 }, { "epoch": 0.27710139849931387, "grad_norm": 0.5478783334329063, "learning_rate": 4.016220600162206e-05, "loss": 0.6185, "step": 9491 }, { "epoch": 0.27713059472716123, "grad_norm": 0.52215021837564, "learning_rate": 4.016058394160584e-05, "loss": 0.576, "step": 9492 }, { "epoch": 0.2771597909550086, "grad_norm": 0.5447552256737975, "learning_rate": 4.015896188158962e-05, "loss": 0.6034, "step": 9493 }, { "epoch": 0.27718898718285595, "grad_norm": 0.5588594507949715, "learning_rate": 4.0157339821573396e-05, "loss": 0.6399, "step": 9494 }, { "epoch": 0.2772181834107033, "grad_norm": 0.569625893035317, "learning_rate": 4.0155717761557185e-05, "loss": 0.7078, "step": 9495 }, { "epoch": 0.2772473796385507, "grad_norm": 0.5171969512486724, "learning_rate": 4.015409570154096e-05, "loss": 0.6018, "step": 9496 }, { "epoch": 0.27727657586639803, "grad_norm": 0.535861152815548, "learning_rate": 4.015247364152474e-05, "loss": 0.6, "step": 9497 }, { "epoch": 0.27730577209424545, "grad_norm": 0.5498806700340002, "learning_rate": 4.0150851581508516e-05, "loss": 0.6378, "step": 9498 }, { "epoch": 0.2773349683220928, "grad_norm": 0.555100911033564, "learning_rate": 4.01492295214923e-05, "loss": 0.668, "step": 9499 }, { "epoch": 0.2773641645499402, "grad_norm": 0.5857603312642046, "learning_rate": 4.014760746147608e-05, "loss": 0.5877, "step": 9500 }, { "epoch": 0.27739336077778753, "grad_norm": 0.5891467325419244, "learning_rate": 4.0145985401459855e-05, "loss": 0.7136, "step": 9501 }, { "epoch": 0.2774225570056349, "grad_norm": 0.5334987590553534, "learning_rate": 4.0144363341443637e-05, "loss": 0.6531, "step": 9502 }, { "epoch": 0.27745175323348226, "grad_norm": 0.5520370586893641, "learning_rate": 4.014274128142742e-05, "loss": 0.6599, "step": 9503 }, { "epoch": 0.2774809494613296, "grad_norm": 0.5997145876076246, "learning_rate": 4.014111922141119e-05, "loss": 0.7119, "step": 9504 }, { "epoch": 0.277510145689177, "grad_norm": 0.5837369684339079, "learning_rate": 4.0139497161394975e-05, "loss": 0.7604, "step": 9505 }, { "epoch": 0.27753934191702434, "grad_norm": 0.5597118236118833, "learning_rate": 4.013787510137875e-05, "loss": 0.632, "step": 9506 }, { "epoch": 0.2775685381448717, "grad_norm": 0.5462753477494051, "learning_rate": 4.013625304136253e-05, "loss": 0.6473, "step": 9507 }, { "epoch": 0.27759773437271906, "grad_norm": 0.5537172145494884, "learning_rate": 4.0134630981346314e-05, "loss": 0.6461, "step": 9508 }, { "epoch": 0.2776269306005664, "grad_norm": 0.5433912930557578, "learning_rate": 4.013300892133009e-05, "loss": 0.6409, "step": 9509 }, { "epoch": 0.2776561268284138, "grad_norm": 0.5374503370943738, "learning_rate": 4.013138686131387e-05, "loss": 0.6294, "step": 9510 }, { "epoch": 0.27768532305626115, "grad_norm": 0.6021787803607473, "learning_rate": 4.0129764801297645e-05, "loss": 0.6546, "step": 9511 }, { "epoch": 0.2777145192841085, "grad_norm": 0.5778571514495757, "learning_rate": 4.012814274128143e-05, "loss": 0.7289, "step": 9512 }, { "epoch": 0.27774371551195587, "grad_norm": 0.9146411413680465, "learning_rate": 4.012652068126521e-05, "loss": 0.6345, "step": 9513 }, { "epoch": 0.27777291173980323, "grad_norm": 0.5854619105266035, "learning_rate": 4.012489862124899e-05, "loss": 0.7144, "step": 9514 }, { "epoch": 0.2778021079676506, "grad_norm": 0.5276355553382984, "learning_rate": 4.012327656123277e-05, "loss": 0.5782, "step": 9515 }, { "epoch": 0.27783130419549795, "grad_norm": 0.6060233537339298, "learning_rate": 4.012165450121655e-05, "loss": 0.695, "step": 9516 }, { "epoch": 0.2778605004233453, "grad_norm": 0.6394588986233705, "learning_rate": 4.012003244120033e-05, "loss": 0.7379, "step": 9517 }, { "epoch": 0.2778896966511927, "grad_norm": 0.5597020847452262, "learning_rate": 4.0118410381184104e-05, "loss": 0.6854, "step": 9518 }, { "epoch": 0.27791889287904004, "grad_norm": 0.5590827952266227, "learning_rate": 4.0116788321167886e-05, "loss": 0.6007, "step": 9519 }, { "epoch": 0.2779480891068874, "grad_norm": 0.6019964092031107, "learning_rate": 4.011516626115167e-05, "loss": 0.7107, "step": 9520 }, { "epoch": 0.27797728533473476, "grad_norm": 0.5803591125757906, "learning_rate": 4.011354420113544e-05, "loss": 0.7037, "step": 9521 }, { "epoch": 0.2780064815625821, "grad_norm": 0.5212547826069073, "learning_rate": 4.0111922141119224e-05, "loss": 0.5965, "step": 9522 }, { "epoch": 0.2780356777904295, "grad_norm": 0.5795240737231169, "learning_rate": 4.0110300081103006e-05, "loss": 0.6599, "step": 9523 }, { "epoch": 0.27806487401827684, "grad_norm": 0.5790262856466449, "learning_rate": 4.010867802108678e-05, "loss": 0.6875, "step": 9524 }, { "epoch": 0.2780940702461242, "grad_norm": 0.5343811634289045, "learning_rate": 4.010705596107056e-05, "loss": 0.6146, "step": 9525 }, { "epoch": 0.27812326647397156, "grad_norm": 0.5410102073069187, "learning_rate": 4.010543390105434e-05, "loss": 0.5782, "step": 9526 }, { "epoch": 0.2781524627018189, "grad_norm": 0.5205481558430382, "learning_rate": 4.010381184103812e-05, "loss": 0.6115, "step": 9527 }, { "epoch": 0.2781816589296663, "grad_norm": 0.5620836388196118, "learning_rate": 4.01021897810219e-05, "loss": 0.6481, "step": 9528 }, { "epoch": 0.27821085515751365, "grad_norm": 0.5333311600901391, "learning_rate": 4.0100567721005676e-05, "loss": 0.6457, "step": 9529 }, { "epoch": 0.278240051385361, "grad_norm": 0.5310866030432964, "learning_rate": 4.009894566098946e-05, "loss": 0.6306, "step": 9530 }, { "epoch": 0.27826924761320837, "grad_norm": 0.5274584116255903, "learning_rate": 4.009732360097323e-05, "loss": 0.5645, "step": 9531 }, { "epoch": 0.27829844384105573, "grad_norm": 0.5300461970656645, "learning_rate": 4.0095701540957015e-05, "loss": 0.5931, "step": 9532 }, { "epoch": 0.2783276400689031, "grad_norm": 0.5934831315201183, "learning_rate": 4.0094079480940797e-05, "loss": 0.6987, "step": 9533 }, { "epoch": 0.27835683629675045, "grad_norm": 0.577380609242838, "learning_rate": 4.009245742092458e-05, "loss": 0.7093, "step": 9534 }, { "epoch": 0.2783860325245978, "grad_norm": 0.5400903104875643, "learning_rate": 4.009083536090836e-05, "loss": 0.6641, "step": 9535 }, { "epoch": 0.2784152287524452, "grad_norm": 0.5414670341183668, "learning_rate": 4.0089213300892135e-05, "loss": 0.6157, "step": 9536 }, { "epoch": 0.27844442498029254, "grad_norm": 0.7444306309814634, "learning_rate": 4.008759124087592e-05, "loss": 0.695, "step": 9537 }, { "epoch": 0.2784736212081399, "grad_norm": 0.5231656416180458, "learning_rate": 4.008596918085969e-05, "loss": 0.5879, "step": 9538 }, { "epoch": 0.27850281743598726, "grad_norm": 0.5210094030816487, "learning_rate": 4.0084347120843474e-05, "loss": 0.5817, "step": 9539 }, { "epoch": 0.2785320136638346, "grad_norm": 0.6064444815806426, "learning_rate": 4.0082725060827255e-05, "loss": 0.6994, "step": 9540 }, { "epoch": 0.278561209891682, "grad_norm": 0.5665351812147931, "learning_rate": 4.008110300081103e-05, "loss": 0.6682, "step": 9541 }, { "epoch": 0.27859040611952934, "grad_norm": 0.5313116766689083, "learning_rate": 4.007948094079481e-05, "loss": 0.6359, "step": 9542 }, { "epoch": 0.2786196023473767, "grad_norm": 0.5490272701831235, "learning_rate": 4.007785888077859e-05, "loss": 0.6535, "step": 9543 }, { "epoch": 0.27864879857522407, "grad_norm": 0.5348326272682561, "learning_rate": 4.007623682076237e-05, "loss": 0.6039, "step": 9544 }, { "epoch": 0.2786779948030714, "grad_norm": 0.530936123190711, "learning_rate": 4.007461476074615e-05, "loss": 0.6173, "step": 9545 }, { "epoch": 0.2787071910309188, "grad_norm": 0.6367022023137262, "learning_rate": 4.0072992700729926e-05, "loss": 0.6749, "step": 9546 }, { "epoch": 0.27873638725876615, "grad_norm": 0.5509468600066989, "learning_rate": 4.007137064071371e-05, "loss": 0.6299, "step": 9547 }, { "epoch": 0.2787655834866135, "grad_norm": 0.49350273774132386, "learning_rate": 4.006974858069749e-05, "loss": 0.5191, "step": 9548 }, { "epoch": 0.27879477971446087, "grad_norm": 0.6026240160785639, "learning_rate": 4.0068126520681264e-05, "loss": 0.7477, "step": 9549 }, { "epoch": 0.27882397594230823, "grad_norm": 0.5263337915224566, "learning_rate": 4.0066504460665046e-05, "loss": 0.6368, "step": 9550 }, { "epoch": 0.2788531721701556, "grad_norm": 0.5367967413651027, "learning_rate": 4.006488240064882e-05, "loss": 0.6312, "step": 9551 }, { "epoch": 0.27888236839800296, "grad_norm": 0.5363982645574104, "learning_rate": 4.006326034063261e-05, "loss": 0.6247, "step": 9552 }, { "epoch": 0.2789115646258503, "grad_norm": 0.5562197663149506, "learning_rate": 4.0061638280616384e-05, "loss": 0.5744, "step": 9553 }, { "epoch": 0.2789407608536977, "grad_norm": 0.5684197580641063, "learning_rate": 4.0060016220600166e-05, "loss": 0.6983, "step": 9554 }, { "epoch": 0.27896995708154504, "grad_norm": 0.5040164938464522, "learning_rate": 4.005839416058395e-05, "loss": 0.6024, "step": 9555 }, { "epoch": 0.2789991533093924, "grad_norm": 0.5391581911525201, "learning_rate": 4.005677210056772e-05, "loss": 0.6317, "step": 9556 }, { "epoch": 0.27902834953723976, "grad_norm": 0.5514970072054548, "learning_rate": 4.0055150040551505e-05, "loss": 0.6489, "step": 9557 }, { "epoch": 0.2790575457650872, "grad_norm": 0.5375942738912666, "learning_rate": 4.005352798053528e-05, "loss": 0.5827, "step": 9558 }, { "epoch": 0.27908674199293454, "grad_norm": 0.5771026785653143, "learning_rate": 4.005190592051906e-05, "loss": 0.7387, "step": 9559 }, { "epoch": 0.2791159382207819, "grad_norm": 0.4995920769777385, "learning_rate": 4.005028386050284e-05, "loss": 0.5801, "step": 9560 }, { "epoch": 0.27914513444862926, "grad_norm": 0.5933441754881549, "learning_rate": 4.004866180048662e-05, "loss": 0.7235, "step": 9561 }, { "epoch": 0.2791743306764766, "grad_norm": 0.53745011198887, "learning_rate": 4.00470397404704e-05, "loss": 0.6097, "step": 9562 }, { "epoch": 0.279203526904324, "grad_norm": 0.5744873491971684, "learning_rate": 4.0045417680454175e-05, "loss": 0.6778, "step": 9563 }, { "epoch": 0.27923272313217135, "grad_norm": 0.5323077259262385, "learning_rate": 4.004379562043796e-05, "loss": 0.6297, "step": 9564 }, { "epoch": 0.2792619193600187, "grad_norm": 0.5240951107702261, "learning_rate": 4.004217356042174e-05, "loss": 0.5928, "step": 9565 }, { "epoch": 0.27929111558786607, "grad_norm": 0.543936590405809, "learning_rate": 4.0040551500405513e-05, "loss": 0.5951, "step": 9566 }, { "epoch": 0.27932031181571343, "grad_norm": 0.6185048622520923, "learning_rate": 4.0038929440389295e-05, "loss": 0.7643, "step": 9567 }, { "epoch": 0.2793495080435608, "grad_norm": 0.5863843930986602, "learning_rate": 4.003730738037308e-05, "loss": 0.7077, "step": 9568 }, { "epoch": 0.27937870427140815, "grad_norm": 0.5598033974285881, "learning_rate": 4.003568532035685e-05, "loss": 0.643, "step": 9569 }, { "epoch": 0.2794079004992555, "grad_norm": 0.5609785104661122, "learning_rate": 4.0034063260340634e-05, "loss": 0.659, "step": 9570 }, { "epoch": 0.2794370967271029, "grad_norm": 0.5199290379950349, "learning_rate": 4.0032441200324415e-05, "loss": 0.5939, "step": 9571 }, { "epoch": 0.27946629295495024, "grad_norm": 0.5801756409113034, "learning_rate": 4.00308191403082e-05, "loss": 0.6455, "step": 9572 }, { "epoch": 0.2794954891827976, "grad_norm": 0.551315820844856, "learning_rate": 4.002919708029197e-05, "loss": 0.6709, "step": 9573 }, { "epoch": 0.27952468541064496, "grad_norm": 0.5229233406527077, "learning_rate": 4.0027575020275754e-05, "loss": 0.5629, "step": 9574 }, { "epoch": 0.2795538816384923, "grad_norm": 0.5669436398393488, "learning_rate": 4.0025952960259536e-05, "loss": 0.6195, "step": 9575 }, { "epoch": 0.2795830778663397, "grad_norm": 0.6186332857644704, "learning_rate": 4.002433090024331e-05, "loss": 0.7098, "step": 9576 }, { "epoch": 0.27961227409418704, "grad_norm": 0.5671885884225002, "learning_rate": 4.002270884022709e-05, "loss": 0.6275, "step": 9577 }, { "epoch": 0.2796414703220344, "grad_norm": 0.5248855658684848, "learning_rate": 4.002108678021087e-05, "loss": 0.5802, "step": 9578 }, { "epoch": 0.27967066654988176, "grad_norm": 0.5766693002388933, "learning_rate": 4.001946472019465e-05, "loss": 0.7171, "step": 9579 }, { "epoch": 0.2796998627777291, "grad_norm": 0.5828890643460178, "learning_rate": 4.001784266017843e-05, "loss": 0.6961, "step": 9580 }, { "epoch": 0.2797290590055765, "grad_norm": 0.5691461086494969, "learning_rate": 4.0016220600162206e-05, "loss": 0.7106, "step": 9581 }, { "epoch": 0.27975825523342385, "grad_norm": 0.5911493489648179, "learning_rate": 4.001459854014599e-05, "loss": 0.7047, "step": 9582 }, { "epoch": 0.2797874514612712, "grad_norm": 0.5306727427764669, "learning_rate": 4.001297648012976e-05, "loss": 0.6095, "step": 9583 }, { "epoch": 0.27981664768911857, "grad_norm": 0.5254141946755001, "learning_rate": 4.0011354420113544e-05, "loss": 0.636, "step": 9584 }, { "epoch": 0.27984584391696593, "grad_norm": 0.5417229767458948, "learning_rate": 4.0009732360097326e-05, "loss": 0.5979, "step": 9585 }, { "epoch": 0.2798750401448133, "grad_norm": 0.5847183986770298, "learning_rate": 4.00081103000811e-05, "loss": 0.6987, "step": 9586 }, { "epoch": 0.27990423637266065, "grad_norm": 0.5877295100504644, "learning_rate": 4.000648824006488e-05, "loss": 0.6587, "step": 9587 }, { "epoch": 0.279933432600508, "grad_norm": 0.5410000836409085, "learning_rate": 4.000486618004866e-05, "loss": 0.6362, "step": 9588 }, { "epoch": 0.2799626288283554, "grad_norm": 0.5357969326122037, "learning_rate": 4.000324412003244e-05, "loss": 0.631, "step": 9589 }, { "epoch": 0.27999182505620274, "grad_norm": 0.5560142210721901, "learning_rate": 4.000162206001623e-05, "loss": 0.6319, "step": 9590 }, { "epoch": 0.2800210212840501, "grad_norm": 0.5092127539441861, "learning_rate": 4e-05, "loss": 0.5334, "step": 9591 }, { "epoch": 0.28005021751189746, "grad_norm": 0.5682539606570153, "learning_rate": 3.9998377939983785e-05, "loss": 0.6725, "step": 9592 }, { "epoch": 0.2800794137397448, "grad_norm": 0.5856471450781011, "learning_rate": 3.999675587996756e-05, "loss": 0.6996, "step": 9593 }, { "epoch": 0.2801086099675922, "grad_norm": 0.5053285563110907, "learning_rate": 3.999513381995134e-05, "loss": 0.5971, "step": 9594 }, { "epoch": 0.28013780619543954, "grad_norm": 0.5643873353005355, "learning_rate": 3.9993511759935124e-05, "loss": 0.6206, "step": 9595 }, { "epoch": 0.2801670024232869, "grad_norm": 0.6437568939844424, "learning_rate": 3.99918896999189e-05, "loss": 0.6596, "step": 9596 }, { "epoch": 0.28019619865113427, "grad_norm": 0.5375693665641164, "learning_rate": 3.999026763990268e-05, "loss": 0.6408, "step": 9597 }, { "epoch": 0.2802253948789816, "grad_norm": 0.6097579843486242, "learning_rate": 3.9988645579886455e-05, "loss": 0.6738, "step": 9598 }, { "epoch": 0.280254591106829, "grad_norm": 0.5547274799466367, "learning_rate": 3.998702351987024e-05, "loss": 0.6334, "step": 9599 }, { "epoch": 0.28028378733467635, "grad_norm": 0.5466896512108362, "learning_rate": 3.998540145985402e-05, "loss": 0.6368, "step": 9600 }, { "epoch": 0.2803129835625237, "grad_norm": 0.5387029206237794, "learning_rate": 3.9983779399837794e-05, "loss": 0.611, "step": 9601 }, { "epoch": 0.28034217979037107, "grad_norm": 0.5732068778095332, "learning_rate": 3.9982157339821576e-05, "loss": 0.6574, "step": 9602 }, { "epoch": 0.28037137601821843, "grad_norm": 0.5432327118697758, "learning_rate": 3.998053527980535e-05, "loss": 0.6303, "step": 9603 }, { "epoch": 0.2804005722460658, "grad_norm": 0.5426232198188987, "learning_rate": 3.997891321978913e-05, "loss": 0.6152, "step": 9604 }, { "epoch": 0.28042976847391315, "grad_norm": 0.5834091466871787, "learning_rate": 3.9977291159772914e-05, "loss": 0.7123, "step": 9605 }, { "epoch": 0.2804589647017605, "grad_norm": 0.6010877629294877, "learning_rate": 3.997566909975669e-05, "loss": 0.6485, "step": 9606 }, { "epoch": 0.2804881609296079, "grad_norm": 0.5724540985402983, "learning_rate": 3.997404703974047e-05, "loss": 0.6793, "step": 9607 }, { "epoch": 0.28051735715745524, "grad_norm": 0.5341682660278185, "learning_rate": 3.997242497972425e-05, "loss": 0.631, "step": 9608 }, { "epoch": 0.2805465533853026, "grad_norm": 0.5988924612864507, "learning_rate": 3.9970802919708034e-05, "loss": 0.7135, "step": 9609 }, { "epoch": 0.28057574961314996, "grad_norm": 0.5096828067585588, "learning_rate": 3.996918085969181e-05, "loss": 0.5185, "step": 9610 }, { "epoch": 0.2806049458409973, "grad_norm": 0.6598292125387927, "learning_rate": 3.996755879967559e-05, "loss": 0.6614, "step": 9611 }, { "epoch": 0.2806341420688447, "grad_norm": 0.6049773480462545, "learning_rate": 3.996593673965937e-05, "loss": 0.7396, "step": 9612 }, { "epoch": 0.28066333829669204, "grad_norm": 0.5504390643863603, "learning_rate": 3.996431467964315e-05, "loss": 0.6273, "step": 9613 }, { "epoch": 0.2806925345245394, "grad_norm": 0.5312182042952438, "learning_rate": 3.996269261962693e-05, "loss": 0.6431, "step": 9614 }, { "epoch": 0.28072173075238677, "grad_norm": 0.5652317222443328, "learning_rate": 3.996107055961071e-05, "loss": 0.7006, "step": 9615 }, { "epoch": 0.28075092698023413, "grad_norm": 0.5918043118045353, "learning_rate": 3.9959448499594486e-05, "loss": 0.6315, "step": 9616 }, { "epoch": 0.2807801232080815, "grad_norm": 0.5682796277738834, "learning_rate": 3.995782643957827e-05, "loss": 0.6851, "step": 9617 }, { "epoch": 0.28080931943592885, "grad_norm": 0.5733765915168061, "learning_rate": 3.995620437956204e-05, "loss": 0.6968, "step": 9618 }, { "epoch": 0.28083851566377627, "grad_norm": 0.5543856652401332, "learning_rate": 3.9954582319545825e-05, "loss": 0.6881, "step": 9619 }, { "epoch": 0.28086771189162363, "grad_norm": 0.5288764956777658, "learning_rate": 3.9952960259529607e-05, "loss": 0.6151, "step": 9620 }, { "epoch": 0.280896908119471, "grad_norm": 0.5849278478817975, "learning_rate": 3.995133819951338e-05, "loss": 0.694, "step": 9621 }, { "epoch": 0.28092610434731835, "grad_norm": 0.5479901321459903, "learning_rate": 3.994971613949716e-05, "loss": 0.6082, "step": 9622 }, { "epoch": 0.2809553005751657, "grad_norm": 0.999008494478608, "learning_rate": 3.994809407948094e-05, "loss": 0.6735, "step": 9623 }, { "epoch": 0.2809844968030131, "grad_norm": 0.5652756593637082, "learning_rate": 3.994647201946472e-05, "loss": 0.6526, "step": 9624 }, { "epoch": 0.28101369303086043, "grad_norm": 0.5717511355937533, "learning_rate": 3.99448499594485e-05, "loss": 0.7126, "step": 9625 }, { "epoch": 0.2810428892587078, "grad_norm": 0.5648253469129247, "learning_rate": 3.994322789943228e-05, "loss": 0.6493, "step": 9626 }, { "epoch": 0.28107208548655516, "grad_norm": 0.5967474634144904, "learning_rate": 3.9941605839416065e-05, "loss": 0.6977, "step": 9627 }, { "epoch": 0.2811012817144025, "grad_norm": 0.530513982825468, "learning_rate": 3.993998377939984e-05, "loss": 0.5993, "step": 9628 }, { "epoch": 0.2811304779422499, "grad_norm": 0.6574071496822732, "learning_rate": 3.993836171938362e-05, "loss": 0.7283, "step": 9629 }, { "epoch": 0.28115967417009724, "grad_norm": 0.5826809559588858, "learning_rate": 3.99367396593674e-05, "loss": 0.6485, "step": 9630 }, { "epoch": 0.2811888703979446, "grad_norm": 0.5683576141989991, "learning_rate": 3.993511759935118e-05, "loss": 0.6655, "step": 9631 }, { "epoch": 0.28121806662579196, "grad_norm": 0.551286345192859, "learning_rate": 3.993349553933496e-05, "loss": 0.6642, "step": 9632 }, { "epoch": 0.2812472628536393, "grad_norm": 0.5419715368154804, "learning_rate": 3.9931873479318736e-05, "loss": 0.5869, "step": 9633 }, { "epoch": 0.2812764590814867, "grad_norm": 0.536108062193973, "learning_rate": 3.993025141930252e-05, "loss": 0.6395, "step": 9634 }, { "epoch": 0.28130565530933405, "grad_norm": 0.5619219702423596, "learning_rate": 3.99286293592863e-05, "loss": 0.7119, "step": 9635 }, { "epoch": 0.2813348515371814, "grad_norm": 0.5918352166783284, "learning_rate": 3.9927007299270074e-05, "loss": 0.6882, "step": 9636 }, { "epoch": 0.28136404776502877, "grad_norm": 0.537383873283351, "learning_rate": 3.9925385239253856e-05, "loss": 0.6288, "step": 9637 }, { "epoch": 0.28139324399287613, "grad_norm": 0.4947273824326828, "learning_rate": 3.992376317923763e-05, "loss": 0.5617, "step": 9638 }, { "epoch": 0.2814224402207235, "grad_norm": 0.5786921822163765, "learning_rate": 3.992214111922141e-05, "loss": 0.6835, "step": 9639 }, { "epoch": 0.28145163644857085, "grad_norm": 0.5868871972256661, "learning_rate": 3.9920519059205194e-05, "loss": 0.7115, "step": 9640 }, { "epoch": 0.2814808326764182, "grad_norm": 0.5566096114091433, "learning_rate": 3.991889699918897e-05, "loss": 0.6388, "step": 9641 }, { "epoch": 0.2815100289042656, "grad_norm": 0.5373632013553544, "learning_rate": 3.991727493917275e-05, "loss": 0.6227, "step": 9642 }, { "epoch": 0.28153922513211294, "grad_norm": 0.5815771674832978, "learning_rate": 3.9915652879156526e-05, "loss": 0.6742, "step": 9643 }, { "epoch": 0.2815684213599603, "grad_norm": 0.5850021966614525, "learning_rate": 3.991403081914031e-05, "loss": 0.6972, "step": 9644 }, { "epoch": 0.28159761758780766, "grad_norm": 0.5532488210361699, "learning_rate": 3.991240875912409e-05, "loss": 0.6248, "step": 9645 }, { "epoch": 0.281626813815655, "grad_norm": 0.5553281649649298, "learning_rate": 3.991078669910787e-05, "loss": 0.6334, "step": 9646 }, { "epoch": 0.2816560100435024, "grad_norm": 0.5573877302105241, "learning_rate": 3.990916463909165e-05, "loss": 0.6021, "step": 9647 }, { "epoch": 0.28168520627134974, "grad_norm": 0.5380437465385647, "learning_rate": 3.990754257907543e-05, "loss": 0.5908, "step": 9648 }, { "epoch": 0.2817144024991971, "grad_norm": 0.5296714674319886, "learning_rate": 3.990592051905921e-05, "loss": 0.5885, "step": 9649 }, { "epoch": 0.28174359872704446, "grad_norm": 0.5296280390816885, "learning_rate": 3.9904298459042985e-05, "loss": 0.6129, "step": 9650 }, { "epoch": 0.2817727949548918, "grad_norm": 0.5273899009759082, "learning_rate": 3.990267639902677e-05, "loss": 0.6287, "step": 9651 }, { "epoch": 0.2818019911827392, "grad_norm": 0.5287814455236808, "learning_rate": 3.990105433901055e-05, "loss": 0.628, "step": 9652 }, { "epoch": 0.28183118741058655, "grad_norm": 0.6280655813813432, "learning_rate": 3.9899432278994323e-05, "loss": 0.6697, "step": 9653 }, { "epoch": 0.2818603836384339, "grad_norm": 0.5091342582057065, "learning_rate": 3.9897810218978105e-05, "loss": 0.5806, "step": 9654 }, { "epoch": 0.28188957986628127, "grad_norm": 0.6349713051755057, "learning_rate": 3.989618815896188e-05, "loss": 0.7139, "step": 9655 }, { "epoch": 0.28191877609412863, "grad_norm": 0.5469970563677018, "learning_rate": 3.989456609894566e-05, "loss": 0.6442, "step": 9656 }, { "epoch": 0.281947972321976, "grad_norm": 0.5489089942142109, "learning_rate": 3.9892944038929444e-05, "loss": 0.6401, "step": 9657 }, { "epoch": 0.28197716854982335, "grad_norm": 0.5430056285006134, "learning_rate": 3.989132197891322e-05, "loss": 0.6529, "step": 9658 }, { "epoch": 0.2820063647776707, "grad_norm": 0.5199777614166283, "learning_rate": 3.9889699918897e-05, "loss": 0.5846, "step": 9659 }, { "epoch": 0.2820355610055181, "grad_norm": 0.5619535296047716, "learning_rate": 3.988807785888078e-05, "loss": 0.7091, "step": 9660 }, { "epoch": 0.28206475723336544, "grad_norm": 0.5199348590391596, "learning_rate": 3.988645579886456e-05, "loss": 0.5842, "step": 9661 }, { "epoch": 0.2820939534612128, "grad_norm": 0.5408059180519559, "learning_rate": 3.988483373884834e-05, "loss": 0.6567, "step": 9662 }, { "epoch": 0.28212314968906016, "grad_norm": 0.5548447214864903, "learning_rate": 3.9883211678832114e-05, "loss": 0.6544, "step": 9663 }, { "epoch": 0.2821523459169075, "grad_norm": 0.548942804539879, "learning_rate": 3.9881589618815896e-05, "loss": 0.6189, "step": 9664 }, { "epoch": 0.2821815421447549, "grad_norm": 0.5442481096535436, "learning_rate": 3.987996755879968e-05, "loss": 0.6366, "step": 9665 }, { "epoch": 0.28221073837260224, "grad_norm": 0.5739703215117804, "learning_rate": 3.987834549878346e-05, "loss": 0.6811, "step": 9666 }, { "epoch": 0.2822399346004496, "grad_norm": 0.5412374805495254, "learning_rate": 3.987672343876724e-05, "loss": 0.6184, "step": 9667 }, { "epoch": 0.28226913082829697, "grad_norm": 0.5667353382449383, "learning_rate": 3.9875101378751016e-05, "loss": 0.6267, "step": 9668 }, { "epoch": 0.2822983270561443, "grad_norm": 0.49102272172289785, "learning_rate": 3.98734793187348e-05, "loss": 0.5384, "step": 9669 }, { "epoch": 0.2823275232839917, "grad_norm": 0.6063700665844518, "learning_rate": 3.987185725871857e-05, "loss": 0.699, "step": 9670 }, { "epoch": 0.28235671951183905, "grad_norm": 0.5255831323582776, "learning_rate": 3.9870235198702355e-05, "loss": 0.588, "step": 9671 }, { "epoch": 0.2823859157396864, "grad_norm": 0.511064264590831, "learning_rate": 3.9868613138686136e-05, "loss": 0.5456, "step": 9672 }, { "epoch": 0.28241511196753377, "grad_norm": 0.5175556241050118, "learning_rate": 3.986699107866991e-05, "loss": 0.5836, "step": 9673 }, { "epoch": 0.28244430819538113, "grad_norm": 0.537583887656665, "learning_rate": 3.986536901865369e-05, "loss": 0.6138, "step": 9674 }, { "epoch": 0.2824735044232285, "grad_norm": 0.5713304339755986, "learning_rate": 3.986374695863747e-05, "loss": 0.6419, "step": 9675 }, { "epoch": 0.28250270065107586, "grad_norm": 0.5398116519512272, "learning_rate": 3.986212489862125e-05, "loss": 0.5971, "step": 9676 }, { "epoch": 0.2825318968789232, "grad_norm": 0.6199906845853047, "learning_rate": 3.986050283860503e-05, "loss": 0.7376, "step": 9677 }, { "epoch": 0.2825610931067706, "grad_norm": 0.5744566233030212, "learning_rate": 3.9858880778588807e-05, "loss": 0.6959, "step": 9678 }, { "epoch": 0.282590289334618, "grad_norm": 0.5735603057387056, "learning_rate": 3.985725871857259e-05, "loss": 0.6839, "step": 9679 }, { "epoch": 0.28261948556246536, "grad_norm": 0.5629471736571936, "learning_rate": 3.985563665855637e-05, "loss": 0.6553, "step": 9680 }, { "epoch": 0.2826486817903127, "grad_norm": 0.5502184150173612, "learning_rate": 3.9854014598540145e-05, "loss": 0.5979, "step": 9681 }, { "epoch": 0.2826778780181601, "grad_norm": 0.5346874000392221, "learning_rate": 3.985239253852393e-05, "loss": 0.6591, "step": 9682 }, { "epoch": 0.28270707424600744, "grad_norm": 0.5579869810603939, "learning_rate": 3.98507704785077e-05, "loss": 0.6423, "step": 9683 }, { "epoch": 0.2827362704738548, "grad_norm": 0.6710218350835399, "learning_rate": 3.984914841849149e-05, "loss": 0.717, "step": 9684 }, { "epoch": 0.28276546670170216, "grad_norm": 0.5263976970212222, "learning_rate": 3.9847526358475265e-05, "loss": 0.5764, "step": 9685 }, { "epoch": 0.2827946629295495, "grad_norm": 0.6286609137407674, "learning_rate": 3.984590429845905e-05, "loss": 0.7434, "step": 9686 }, { "epoch": 0.2828238591573969, "grad_norm": 0.5190258543813949, "learning_rate": 3.984428223844283e-05, "loss": 0.5542, "step": 9687 }, { "epoch": 0.28285305538524425, "grad_norm": 0.5392297281371131, "learning_rate": 3.9842660178426604e-05, "loss": 0.6344, "step": 9688 }, { "epoch": 0.2828822516130916, "grad_norm": 0.587084574033765, "learning_rate": 3.9841038118410386e-05, "loss": 0.6025, "step": 9689 }, { "epoch": 0.28291144784093897, "grad_norm": 0.5348556832380743, "learning_rate": 3.983941605839416e-05, "loss": 0.5937, "step": 9690 }, { "epoch": 0.28294064406878633, "grad_norm": 0.5680159986719343, "learning_rate": 3.983779399837794e-05, "loss": 0.6949, "step": 9691 }, { "epoch": 0.2829698402966337, "grad_norm": 0.5678636186304167, "learning_rate": 3.9836171938361724e-05, "loss": 0.654, "step": 9692 }, { "epoch": 0.28299903652448105, "grad_norm": 0.602590033794397, "learning_rate": 3.98345498783455e-05, "loss": 0.7041, "step": 9693 }, { "epoch": 0.2830282327523284, "grad_norm": 0.5825178373076657, "learning_rate": 3.983292781832928e-05, "loss": 0.6915, "step": 9694 }, { "epoch": 0.2830574289801758, "grad_norm": 0.5996572191070548, "learning_rate": 3.9831305758313056e-05, "loss": 0.7062, "step": 9695 }, { "epoch": 0.28308662520802313, "grad_norm": 0.6029908947340534, "learning_rate": 3.982968369829684e-05, "loss": 0.7684, "step": 9696 }, { "epoch": 0.2831158214358705, "grad_norm": 0.5637130370881938, "learning_rate": 3.982806163828062e-05, "loss": 0.6904, "step": 9697 }, { "epoch": 0.28314501766371786, "grad_norm": 0.5103030140116295, "learning_rate": 3.9826439578264394e-05, "loss": 0.5702, "step": 9698 }, { "epoch": 0.2831742138915652, "grad_norm": 0.5614814200612939, "learning_rate": 3.9824817518248176e-05, "loss": 0.6847, "step": 9699 }, { "epoch": 0.2832034101194126, "grad_norm": 0.5743792140289581, "learning_rate": 3.982319545823195e-05, "loss": 0.6987, "step": 9700 }, { "epoch": 0.28323260634725994, "grad_norm": 0.5546206674439046, "learning_rate": 3.982157339821573e-05, "loss": 0.654, "step": 9701 }, { "epoch": 0.2832618025751073, "grad_norm": 0.5303404739095464, "learning_rate": 3.9819951338199515e-05, "loss": 0.6416, "step": 9702 }, { "epoch": 0.28329099880295466, "grad_norm": 0.5591592533095255, "learning_rate": 3.9818329278183296e-05, "loss": 0.6634, "step": 9703 }, { "epoch": 0.283320195030802, "grad_norm": 0.5434331959172205, "learning_rate": 3.981670721816708e-05, "loss": 0.6238, "step": 9704 }, { "epoch": 0.2833493912586494, "grad_norm": 0.6012140067468128, "learning_rate": 3.981508515815085e-05, "loss": 0.6492, "step": 9705 }, { "epoch": 0.28337858748649675, "grad_norm": 0.7900983739959314, "learning_rate": 3.9813463098134635e-05, "loss": 0.73, "step": 9706 }, { "epoch": 0.2834077837143441, "grad_norm": 0.5438275166191345, "learning_rate": 3.981184103811842e-05, "loss": 0.6101, "step": 9707 }, { "epoch": 0.28343697994219147, "grad_norm": 0.5241232435595367, "learning_rate": 3.981021897810219e-05, "loss": 0.5697, "step": 9708 }, { "epoch": 0.28346617617003883, "grad_norm": 0.5763134957385714, "learning_rate": 3.980859691808597e-05, "loss": 0.6719, "step": 9709 }, { "epoch": 0.2834953723978862, "grad_norm": 0.5308030049009227, "learning_rate": 3.980697485806975e-05, "loss": 0.5941, "step": 9710 }, { "epoch": 0.28352456862573355, "grad_norm": 0.5551511817195284, "learning_rate": 3.980535279805353e-05, "loss": 0.6428, "step": 9711 }, { "epoch": 0.2835537648535809, "grad_norm": 0.5728809510824902, "learning_rate": 3.980373073803731e-05, "loss": 0.6986, "step": 9712 }, { "epoch": 0.2835829610814283, "grad_norm": 0.5324713440963514, "learning_rate": 3.980210867802109e-05, "loss": 0.5871, "step": 9713 }, { "epoch": 0.28361215730927564, "grad_norm": 0.5797408458433122, "learning_rate": 3.980048661800487e-05, "loss": 0.6586, "step": 9714 }, { "epoch": 0.283641353537123, "grad_norm": 0.5458428902704938, "learning_rate": 3.9798864557988644e-05, "loss": 0.6295, "step": 9715 }, { "epoch": 0.28367054976497036, "grad_norm": 0.5756904694684135, "learning_rate": 3.9797242497972425e-05, "loss": 0.7117, "step": 9716 }, { "epoch": 0.2836997459928177, "grad_norm": 0.5159344391479791, "learning_rate": 3.979562043795621e-05, "loss": 0.6153, "step": 9717 }, { "epoch": 0.2837289422206651, "grad_norm": 0.6032180301843315, "learning_rate": 3.979399837793998e-05, "loss": 0.7943, "step": 9718 }, { "epoch": 0.28375813844851244, "grad_norm": 0.5256463922819143, "learning_rate": 3.9792376317923764e-05, "loss": 0.606, "step": 9719 }, { "epoch": 0.2837873346763598, "grad_norm": 0.5703059945820049, "learning_rate": 3.979075425790754e-05, "loss": 0.6523, "step": 9720 }, { "epoch": 0.28381653090420716, "grad_norm": 0.5440163666026433, "learning_rate": 3.978913219789132e-05, "loss": 0.6937, "step": 9721 }, { "epoch": 0.2838457271320545, "grad_norm": 0.6242658408656125, "learning_rate": 3.978751013787511e-05, "loss": 0.6696, "step": 9722 }, { "epoch": 0.2838749233599019, "grad_norm": 0.5116569473689305, "learning_rate": 3.9785888077858884e-05, "loss": 0.5902, "step": 9723 }, { "epoch": 0.28390411958774925, "grad_norm": 0.5845998338255494, "learning_rate": 3.9784266017842666e-05, "loss": 0.7275, "step": 9724 }, { "epoch": 0.2839333158155966, "grad_norm": 0.5413008004233368, "learning_rate": 3.978264395782644e-05, "loss": 0.6092, "step": 9725 }, { "epoch": 0.28396251204344397, "grad_norm": 0.5705566629549547, "learning_rate": 3.978102189781022e-05, "loss": 0.6831, "step": 9726 }, { "epoch": 0.28399170827129133, "grad_norm": 0.53039047777063, "learning_rate": 3.9779399837794004e-05, "loss": 0.5883, "step": 9727 }, { "epoch": 0.2840209044991387, "grad_norm": 0.577151669135521, "learning_rate": 3.977777777777778e-05, "loss": 0.7315, "step": 9728 }, { "epoch": 0.28405010072698605, "grad_norm": 0.5513666825235702, "learning_rate": 3.977615571776156e-05, "loss": 0.6499, "step": 9729 }, { "epoch": 0.2840792969548334, "grad_norm": 0.5518974941826156, "learning_rate": 3.9774533657745336e-05, "loss": 0.6255, "step": 9730 }, { "epoch": 0.2841084931826808, "grad_norm": 0.5993486180712321, "learning_rate": 3.977291159772912e-05, "loss": 0.7472, "step": 9731 }, { "epoch": 0.28413768941052814, "grad_norm": 0.5667336299442848, "learning_rate": 3.97712895377129e-05, "loss": 0.7143, "step": 9732 }, { "epoch": 0.2841668856383755, "grad_norm": 0.5502700163035738, "learning_rate": 3.9769667477696675e-05, "loss": 0.6919, "step": 9733 }, { "epoch": 0.28419608186622286, "grad_norm": 0.5619090816731778, "learning_rate": 3.9768045417680456e-05, "loss": 0.6538, "step": 9734 }, { "epoch": 0.2842252780940702, "grad_norm": 0.4982875791639435, "learning_rate": 3.976642335766423e-05, "loss": 0.534, "step": 9735 }, { "epoch": 0.2842544743219176, "grad_norm": 0.5318249419892831, "learning_rate": 3.976480129764801e-05, "loss": 0.5805, "step": 9736 }, { "epoch": 0.28428367054976494, "grad_norm": 0.5913297484697518, "learning_rate": 3.9763179237631795e-05, "loss": 0.6359, "step": 9737 }, { "epoch": 0.2843128667776123, "grad_norm": 0.6173922875008998, "learning_rate": 3.976155717761557e-05, "loss": 0.7351, "step": 9738 }, { "epoch": 0.2843420630054597, "grad_norm": 0.5856970595952741, "learning_rate": 3.975993511759935e-05, "loss": 0.6579, "step": 9739 }, { "epoch": 0.2843712592333071, "grad_norm": 0.5555564746731266, "learning_rate": 3.975831305758313e-05, "loss": 0.6458, "step": 9740 }, { "epoch": 0.28440045546115444, "grad_norm": 0.5424232833152413, "learning_rate": 3.9756690997566915e-05, "loss": 0.6371, "step": 9741 }, { "epoch": 0.2844296516890018, "grad_norm": 0.5595590224750309, "learning_rate": 3.975506893755069e-05, "loss": 0.7192, "step": 9742 }, { "epoch": 0.28445884791684917, "grad_norm": 0.5326764260884952, "learning_rate": 3.975344687753447e-05, "loss": 0.6216, "step": 9743 }, { "epoch": 0.28448804414469653, "grad_norm": 0.5681634252082652, "learning_rate": 3.9751824817518254e-05, "loss": 0.7254, "step": 9744 }, { "epoch": 0.2845172403725439, "grad_norm": 0.5950573344316089, "learning_rate": 3.975020275750203e-05, "loss": 0.7327, "step": 9745 }, { "epoch": 0.28454643660039125, "grad_norm": 0.5546917691828552, "learning_rate": 3.974858069748581e-05, "loss": 0.6749, "step": 9746 }, { "epoch": 0.2845756328282386, "grad_norm": 0.5264183510169149, "learning_rate": 3.974695863746959e-05, "loss": 0.6118, "step": 9747 }, { "epoch": 0.284604829056086, "grad_norm": 0.5741585320123107, "learning_rate": 3.974533657745337e-05, "loss": 0.6898, "step": 9748 }, { "epoch": 0.28463402528393333, "grad_norm": 0.5371617196765613, "learning_rate": 3.974371451743715e-05, "loss": 0.6189, "step": 9749 }, { "epoch": 0.2846632215117807, "grad_norm": 0.5291578085354208, "learning_rate": 3.9742092457420924e-05, "loss": 0.652, "step": 9750 }, { "epoch": 0.28469241773962806, "grad_norm": 0.5755225227786986, "learning_rate": 3.9740470397404706e-05, "loss": 0.652, "step": 9751 }, { "epoch": 0.2847216139674754, "grad_norm": 0.5443836076939645, "learning_rate": 3.973884833738849e-05, "loss": 0.6211, "step": 9752 }, { "epoch": 0.2847508101953228, "grad_norm": 0.5562011450074317, "learning_rate": 3.973722627737226e-05, "loss": 0.6498, "step": 9753 }, { "epoch": 0.28478000642317014, "grad_norm": 0.5839113714917986, "learning_rate": 3.9735604217356044e-05, "loss": 0.6857, "step": 9754 }, { "epoch": 0.2848092026510175, "grad_norm": 0.5079612300472207, "learning_rate": 3.973398215733982e-05, "loss": 0.5667, "step": 9755 }, { "epoch": 0.28483839887886486, "grad_norm": 0.5525797257654548, "learning_rate": 3.97323600973236e-05, "loss": 0.6735, "step": 9756 }, { "epoch": 0.2848675951067122, "grad_norm": 0.5336642994079703, "learning_rate": 3.973073803730738e-05, "loss": 0.609, "step": 9757 }, { "epoch": 0.2848967913345596, "grad_norm": 0.506261613355285, "learning_rate": 3.972911597729116e-05, "loss": 0.5478, "step": 9758 }, { "epoch": 0.28492598756240695, "grad_norm": 0.566549656068114, "learning_rate": 3.9727493917274946e-05, "loss": 0.6198, "step": 9759 }, { "epoch": 0.2849551837902543, "grad_norm": 0.5453160494390388, "learning_rate": 3.972587185725872e-05, "loss": 0.6072, "step": 9760 }, { "epoch": 0.28498438001810167, "grad_norm": 0.5459309063044955, "learning_rate": 3.97242497972425e-05, "loss": 0.6324, "step": 9761 }, { "epoch": 0.28501357624594903, "grad_norm": 0.5646647892700395, "learning_rate": 3.972262773722628e-05, "loss": 0.6438, "step": 9762 }, { "epoch": 0.2850427724737964, "grad_norm": 0.5043546150714359, "learning_rate": 3.972100567721006e-05, "loss": 0.5189, "step": 9763 }, { "epoch": 0.28507196870164375, "grad_norm": 0.5909356760227866, "learning_rate": 3.971938361719384e-05, "loss": 0.6751, "step": 9764 }, { "epoch": 0.2851011649294911, "grad_norm": 0.5098030020464851, "learning_rate": 3.9717761557177617e-05, "loss": 0.5697, "step": 9765 }, { "epoch": 0.2851303611573385, "grad_norm": 0.588707494042938, "learning_rate": 3.97161394971614e-05, "loss": 0.7143, "step": 9766 }, { "epoch": 0.28515955738518584, "grad_norm": 0.5116458314556377, "learning_rate": 3.971451743714518e-05, "loss": 0.5605, "step": 9767 }, { "epoch": 0.2851887536130332, "grad_norm": 0.5755466291881216, "learning_rate": 3.9712895377128955e-05, "loss": 0.6529, "step": 9768 }, { "epoch": 0.28521794984088056, "grad_norm": 0.6391289621066716, "learning_rate": 3.971127331711274e-05, "loss": 0.7748, "step": 9769 }, { "epoch": 0.2852471460687279, "grad_norm": 0.5972104353275023, "learning_rate": 3.970965125709651e-05, "loss": 0.6578, "step": 9770 }, { "epoch": 0.2852763422965753, "grad_norm": 0.541595759794679, "learning_rate": 3.9708029197080294e-05, "loss": 0.6434, "step": 9771 }, { "epoch": 0.28530553852442264, "grad_norm": 0.5176281119771307, "learning_rate": 3.9706407137064075e-05, "loss": 0.5726, "step": 9772 }, { "epoch": 0.28533473475227, "grad_norm": 0.5313221360267872, "learning_rate": 3.970478507704785e-05, "loss": 0.5946, "step": 9773 }, { "epoch": 0.28536393098011736, "grad_norm": 0.5259641392685858, "learning_rate": 3.970316301703163e-05, "loss": 0.6456, "step": 9774 }, { "epoch": 0.2853931272079647, "grad_norm": 0.520072072398595, "learning_rate": 3.970154095701541e-05, "loss": 0.5669, "step": 9775 }, { "epoch": 0.2854223234358121, "grad_norm": 0.5537815363227205, "learning_rate": 3.969991889699919e-05, "loss": 0.6524, "step": 9776 }, { "epoch": 0.28545151966365945, "grad_norm": 0.5422714402590836, "learning_rate": 3.969829683698297e-05, "loss": 0.6606, "step": 9777 }, { "epoch": 0.2854807158915068, "grad_norm": 0.5562488578493052, "learning_rate": 3.969667477696675e-05, "loss": 0.6729, "step": 9778 }, { "epoch": 0.28550991211935417, "grad_norm": 0.5678383433110012, "learning_rate": 3.9695052716950534e-05, "loss": 0.7038, "step": 9779 }, { "epoch": 0.28553910834720153, "grad_norm": 0.5629234218486895, "learning_rate": 3.969343065693431e-05, "loss": 0.701, "step": 9780 }, { "epoch": 0.2855683045750489, "grad_norm": 0.5357888942847593, "learning_rate": 3.969180859691809e-05, "loss": 0.6513, "step": 9781 }, { "epoch": 0.28559750080289625, "grad_norm": 0.5035671612105491, "learning_rate": 3.9690186536901866e-05, "loss": 0.5585, "step": 9782 }, { "epoch": 0.2856266970307436, "grad_norm": 0.5581213502529067, "learning_rate": 3.968856447688565e-05, "loss": 0.6658, "step": 9783 }, { "epoch": 0.285655893258591, "grad_norm": 0.5481867437813225, "learning_rate": 3.968694241686943e-05, "loss": 0.589, "step": 9784 }, { "epoch": 0.28568508948643834, "grad_norm": 0.550353121504291, "learning_rate": 3.9685320356853204e-05, "loss": 0.607, "step": 9785 }, { "epoch": 0.2857142857142857, "grad_norm": 0.5862167726035238, "learning_rate": 3.9683698296836986e-05, "loss": 0.7102, "step": 9786 }, { "epoch": 0.28574348194213306, "grad_norm": 0.5508719934960076, "learning_rate": 3.968207623682076e-05, "loss": 0.6659, "step": 9787 }, { "epoch": 0.2857726781699804, "grad_norm": 0.5323094528964325, "learning_rate": 3.968045417680454e-05, "loss": 0.6082, "step": 9788 }, { "epoch": 0.2858018743978278, "grad_norm": 0.5816078241516845, "learning_rate": 3.9678832116788325e-05, "loss": 0.6777, "step": 9789 }, { "epoch": 0.28583107062567514, "grad_norm": 0.5297733719054935, "learning_rate": 3.96772100567721e-05, "loss": 0.6166, "step": 9790 }, { "epoch": 0.2858602668535225, "grad_norm": 0.5505447087223345, "learning_rate": 3.967558799675588e-05, "loss": 0.6024, "step": 9791 }, { "epoch": 0.28588946308136987, "grad_norm": 0.5230089577176432, "learning_rate": 3.967396593673966e-05, "loss": 0.6429, "step": 9792 }, { "epoch": 0.2859186593092172, "grad_norm": 0.5143143559727581, "learning_rate": 3.967234387672344e-05, "loss": 0.5827, "step": 9793 }, { "epoch": 0.2859478555370646, "grad_norm": 0.544668063384766, "learning_rate": 3.967072181670722e-05, "loss": 0.664, "step": 9794 }, { "epoch": 0.28597705176491195, "grad_norm": 0.5671020614738721, "learning_rate": 3.9669099756690995e-05, "loss": 0.623, "step": 9795 }, { "epoch": 0.2860062479927593, "grad_norm": 0.5449233458796822, "learning_rate": 3.9667477696674777e-05, "loss": 0.6366, "step": 9796 }, { "epoch": 0.28603544422060667, "grad_norm": 0.5352620828638466, "learning_rate": 3.966585563665856e-05, "loss": 0.6085, "step": 9797 }, { "epoch": 0.28606464044845403, "grad_norm": 0.5384996833096651, "learning_rate": 3.966423357664234e-05, "loss": 0.6168, "step": 9798 }, { "epoch": 0.28609383667630145, "grad_norm": 0.5929356703031184, "learning_rate": 3.966261151662612e-05, "loss": 0.6365, "step": 9799 }, { "epoch": 0.2861230329041488, "grad_norm": 0.5466922233262237, "learning_rate": 3.96609894566099e-05, "loss": 0.627, "step": 9800 }, { "epoch": 0.28615222913199617, "grad_norm": 0.5466337128863806, "learning_rate": 3.965936739659368e-05, "loss": 0.5948, "step": 9801 }, { "epoch": 0.28618142535984353, "grad_norm": 0.4931632641501497, "learning_rate": 3.9657745336577454e-05, "loss": 0.5591, "step": 9802 }, { "epoch": 0.2862106215876909, "grad_norm": 0.5244992531278555, "learning_rate": 3.9656123276561235e-05, "loss": 0.5974, "step": 9803 }, { "epoch": 0.28623981781553826, "grad_norm": 0.5563025109882651, "learning_rate": 3.965450121654502e-05, "loss": 0.6344, "step": 9804 }, { "epoch": 0.2862690140433856, "grad_norm": 0.5634486924795411, "learning_rate": 3.965287915652879e-05, "loss": 0.6502, "step": 9805 }, { "epoch": 0.286298210271233, "grad_norm": 0.5734626085327044, "learning_rate": 3.9651257096512574e-05, "loss": 0.6794, "step": 9806 }, { "epoch": 0.28632740649908034, "grad_norm": 0.5816237795693787, "learning_rate": 3.964963503649635e-05, "loss": 0.7056, "step": 9807 }, { "epoch": 0.2863566027269277, "grad_norm": 0.5321713076048792, "learning_rate": 3.964801297648013e-05, "loss": 0.6108, "step": 9808 }, { "epoch": 0.28638579895477506, "grad_norm": 0.5346887236533848, "learning_rate": 3.964639091646391e-05, "loss": 0.6255, "step": 9809 }, { "epoch": 0.2864149951826224, "grad_norm": 0.5470583787820742, "learning_rate": 3.964476885644769e-05, "loss": 0.6371, "step": 9810 }, { "epoch": 0.2864441914104698, "grad_norm": 0.5522968714182374, "learning_rate": 3.964314679643147e-05, "loss": 0.6153, "step": 9811 }, { "epoch": 0.28647338763831715, "grad_norm": 0.5218581118893153, "learning_rate": 3.964152473641525e-05, "loss": 0.5829, "step": 9812 }, { "epoch": 0.2865025838661645, "grad_norm": 0.5446401955221291, "learning_rate": 3.9639902676399026e-05, "loss": 0.6641, "step": 9813 }, { "epoch": 0.28653178009401187, "grad_norm": 0.5165043293441132, "learning_rate": 3.963828061638281e-05, "loss": 0.6061, "step": 9814 }, { "epoch": 0.28656097632185923, "grad_norm": 0.5246336060407281, "learning_rate": 3.963665855636658e-05, "loss": 0.5883, "step": 9815 }, { "epoch": 0.2865901725497066, "grad_norm": 0.5886841108974187, "learning_rate": 3.963503649635037e-05, "loss": 0.6374, "step": 9816 }, { "epoch": 0.28661936877755395, "grad_norm": 0.5611019581170408, "learning_rate": 3.9633414436334146e-05, "loss": 0.7157, "step": 9817 }, { "epoch": 0.2866485650054013, "grad_norm": 0.5483799065435273, "learning_rate": 3.963179237631793e-05, "loss": 0.6187, "step": 9818 }, { "epoch": 0.2866777612332487, "grad_norm": 0.548938843420763, "learning_rate": 3.963017031630171e-05, "loss": 0.6734, "step": 9819 }, { "epoch": 0.28670695746109603, "grad_norm": 0.5913497550950615, "learning_rate": 3.9628548256285485e-05, "loss": 0.7344, "step": 9820 }, { "epoch": 0.2867361536889434, "grad_norm": 0.5569175511361344, "learning_rate": 3.9626926196269266e-05, "loss": 0.6381, "step": 9821 }, { "epoch": 0.28676534991679076, "grad_norm": 0.5179496521169613, "learning_rate": 3.962530413625304e-05, "loss": 0.6124, "step": 9822 }, { "epoch": 0.2867945461446381, "grad_norm": 0.5190601397981728, "learning_rate": 3.962368207623682e-05, "loss": 0.59, "step": 9823 }, { "epoch": 0.2868237423724855, "grad_norm": 0.5681074754173883, "learning_rate": 3.9622060016220605e-05, "loss": 0.6225, "step": 9824 }, { "epoch": 0.28685293860033284, "grad_norm": 0.5342253682328968, "learning_rate": 3.962043795620438e-05, "loss": 0.6064, "step": 9825 }, { "epoch": 0.2868821348281802, "grad_norm": 0.6067421466076004, "learning_rate": 3.961881589618816e-05, "loss": 0.6445, "step": 9826 }, { "epoch": 0.28691133105602756, "grad_norm": 0.5628176165471407, "learning_rate": 3.961719383617194e-05, "loss": 0.6416, "step": 9827 }, { "epoch": 0.2869405272838749, "grad_norm": 0.5760850538086356, "learning_rate": 3.961557177615572e-05, "loss": 0.6977, "step": 9828 }, { "epoch": 0.2869697235117223, "grad_norm": 0.5441360957199851, "learning_rate": 3.96139497161395e-05, "loss": 0.6186, "step": 9829 }, { "epoch": 0.28699891973956965, "grad_norm": 0.5481728610953468, "learning_rate": 3.9612327656123275e-05, "loss": 0.6232, "step": 9830 }, { "epoch": 0.287028115967417, "grad_norm": 0.5836078340295099, "learning_rate": 3.961070559610706e-05, "loss": 0.686, "step": 9831 }, { "epoch": 0.28705731219526437, "grad_norm": 0.5748524210784909, "learning_rate": 3.960908353609083e-05, "loss": 0.6643, "step": 9832 }, { "epoch": 0.28708650842311173, "grad_norm": 0.5305078133278205, "learning_rate": 3.9607461476074614e-05, "loss": 0.6334, "step": 9833 }, { "epoch": 0.2871157046509591, "grad_norm": 0.55257918368238, "learning_rate": 3.9605839416058395e-05, "loss": 0.6222, "step": 9834 }, { "epoch": 0.28714490087880645, "grad_norm": 0.6110391795245426, "learning_rate": 3.960421735604218e-05, "loss": 0.6947, "step": 9835 }, { "epoch": 0.2871740971066538, "grad_norm": 0.5474053795934989, "learning_rate": 3.960259529602596e-05, "loss": 0.6248, "step": 9836 }, { "epoch": 0.2872032933345012, "grad_norm": 0.5376223392572914, "learning_rate": 3.9600973236009734e-05, "loss": 0.6517, "step": 9837 }, { "epoch": 0.28723248956234854, "grad_norm": 0.6920962220122281, "learning_rate": 3.9599351175993516e-05, "loss": 0.6566, "step": 9838 }, { "epoch": 0.2872616857901959, "grad_norm": 0.5370373687729718, "learning_rate": 3.95977291159773e-05, "loss": 0.6408, "step": 9839 }, { "epoch": 0.28729088201804326, "grad_norm": 0.6053086367507685, "learning_rate": 3.959610705596107e-05, "loss": 0.6781, "step": 9840 }, { "epoch": 0.2873200782458906, "grad_norm": 0.5871100892978163, "learning_rate": 3.9594484995944854e-05, "loss": 0.6977, "step": 9841 }, { "epoch": 0.287349274473738, "grad_norm": 0.5760233615478791, "learning_rate": 3.959286293592863e-05, "loss": 0.7207, "step": 9842 }, { "epoch": 0.28737847070158534, "grad_norm": 0.5449145212640699, "learning_rate": 3.959124087591241e-05, "loss": 0.6297, "step": 9843 }, { "epoch": 0.2874076669294327, "grad_norm": 0.5996727203977301, "learning_rate": 3.958961881589619e-05, "loss": 0.6568, "step": 9844 }, { "epoch": 0.28743686315728006, "grad_norm": 0.49722959539641925, "learning_rate": 3.958799675587997e-05, "loss": 0.5609, "step": 9845 }, { "epoch": 0.2874660593851274, "grad_norm": 0.5442589612886696, "learning_rate": 3.958637469586375e-05, "loss": 0.6193, "step": 9846 }, { "epoch": 0.2874952556129748, "grad_norm": 0.5624377145060234, "learning_rate": 3.9584752635847524e-05, "loss": 0.5645, "step": 9847 }, { "epoch": 0.28752445184082215, "grad_norm": 0.5729736453772816, "learning_rate": 3.9583130575831306e-05, "loss": 0.6405, "step": 9848 }, { "epoch": 0.2875536480686695, "grad_norm": 0.5672309621566577, "learning_rate": 3.958150851581509e-05, "loss": 0.6772, "step": 9849 }, { "epoch": 0.28758284429651687, "grad_norm": 0.5308088810656117, "learning_rate": 3.957988645579886e-05, "loss": 0.5943, "step": 9850 }, { "epoch": 0.28761204052436423, "grad_norm": 0.5818910193052668, "learning_rate": 3.9578264395782645e-05, "loss": 0.7188, "step": 9851 }, { "epoch": 0.2876412367522116, "grad_norm": 0.5309514780669797, "learning_rate": 3.957664233576642e-05, "loss": 0.5351, "step": 9852 }, { "epoch": 0.28767043298005895, "grad_norm": 0.514361706096432, "learning_rate": 3.95750202757502e-05, "loss": 0.5845, "step": 9853 }, { "epoch": 0.2876996292079063, "grad_norm": 0.5597118284677862, "learning_rate": 3.957339821573398e-05, "loss": 0.6489, "step": 9854 }, { "epoch": 0.2877288254357537, "grad_norm": 0.5594763201328198, "learning_rate": 3.9571776155717765e-05, "loss": 0.6474, "step": 9855 }, { "epoch": 0.28775802166360104, "grad_norm": 0.5343282445411052, "learning_rate": 3.957015409570155e-05, "loss": 0.5704, "step": 9856 }, { "epoch": 0.2877872178914484, "grad_norm": 0.6342034693121861, "learning_rate": 3.956853203568532e-05, "loss": 0.8013, "step": 9857 }, { "epoch": 0.28781641411929576, "grad_norm": 0.5143345890409339, "learning_rate": 3.9566909975669104e-05, "loss": 0.5734, "step": 9858 }, { "epoch": 0.2878456103471431, "grad_norm": 0.5322238558841852, "learning_rate": 3.9565287915652885e-05, "loss": 0.6017, "step": 9859 }, { "epoch": 0.28787480657499054, "grad_norm": 0.5600391186880895, "learning_rate": 3.956366585563666e-05, "loss": 0.6759, "step": 9860 }, { "epoch": 0.2879040028028379, "grad_norm": 0.5550267668298583, "learning_rate": 3.956204379562044e-05, "loss": 0.6472, "step": 9861 }, { "epoch": 0.28793319903068526, "grad_norm": 0.6078542099161606, "learning_rate": 3.956042173560422e-05, "loss": 0.7088, "step": 9862 }, { "epoch": 0.2879623952585326, "grad_norm": 0.5022200433274455, "learning_rate": 3.9558799675588e-05, "loss": 0.5305, "step": 9863 }, { "epoch": 0.28799159148638, "grad_norm": 0.551634862034528, "learning_rate": 3.955717761557178e-05, "loss": 0.6461, "step": 9864 }, { "epoch": 0.28802078771422734, "grad_norm": 0.572761096008923, "learning_rate": 3.9555555555555556e-05, "loss": 0.6494, "step": 9865 }, { "epoch": 0.2880499839420747, "grad_norm": 0.5912310782083852, "learning_rate": 3.955393349553934e-05, "loss": 0.6236, "step": 9866 }, { "epoch": 0.28807918016992207, "grad_norm": 0.5795762544850936, "learning_rate": 3.955231143552311e-05, "loss": 0.6543, "step": 9867 }, { "epoch": 0.28810837639776943, "grad_norm": 0.5322858400136665, "learning_rate": 3.9550689375506894e-05, "loss": 0.6312, "step": 9868 }, { "epoch": 0.2881375726256168, "grad_norm": 0.5536382888983759, "learning_rate": 3.9549067315490676e-05, "loss": 0.6421, "step": 9869 }, { "epoch": 0.28816676885346415, "grad_norm": 0.562949195851888, "learning_rate": 3.954744525547445e-05, "loss": 0.6404, "step": 9870 }, { "epoch": 0.2881959650813115, "grad_norm": 0.6048238941790383, "learning_rate": 3.954582319545823e-05, "loss": 0.5243, "step": 9871 }, { "epoch": 0.2882251613091589, "grad_norm": 0.5500847377404058, "learning_rate": 3.954420113544201e-05, "loss": 0.6601, "step": 9872 }, { "epoch": 0.28825435753700623, "grad_norm": 16.07487477559816, "learning_rate": 3.9542579075425796e-05, "loss": 1.0645, "step": 9873 }, { "epoch": 0.2882835537648536, "grad_norm": 0.5498267273370827, "learning_rate": 3.954095701540957e-05, "loss": 0.6472, "step": 9874 }, { "epoch": 0.28831274999270096, "grad_norm": 0.5682214923434279, "learning_rate": 3.953933495539335e-05, "loss": 0.6892, "step": 9875 }, { "epoch": 0.2883419462205483, "grad_norm": 0.5771480457357765, "learning_rate": 3.9537712895377135e-05, "loss": 0.6793, "step": 9876 }, { "epoch": 0.2883711424483957, "grad_norm": 0.5411315485184238, "learning_rate": 3.953609083536091e-05, "loss": 0.636, "step": 9877 }, { "epoch": 0.28840033867624304, "grad_norm": 0.5584732345834277, "learning_rate": 3.953446877534469e-05, "loss": 0.6293, "step": 9878 }, { "epoch": 0.2884295349040904, "grad_norm": 0.7399828201447782, "learning_rate": 3.953284671532847e-05, "loss": 0.6872, "step": 9879 }, { "epoch": 0.28845873113193776, "grad_norm": 0.6434654139875692, "learning_rate": 3.953122465531225e-05, "loss": 0.7411, "step": 9880 }, { "epoch": 0.2884879273597851, "grad_norm": 0.5353352936260379, "learning_rate": 3.952960259529603e-05, "loss": 0.5794, "step": 9881 }, { "epoch": 0.2885171235876325, "grad_norm": 0.5109467009831725, "learning_rate": 3.9527980535279805e-05, "loss": 0.5569, "step": 9882 }, { "epoch": 0.28854631981547985, "grad_norm": 0.4971582014725692, "learning_rate": 3.952635847526359e-05, "loss": 0.5708, "step": 9883 }, { "epoch": 0.2885755160433272, "grad_norm": 0.60650582096909, "learning_rate": 3.952473641524737e-05, "loss": 0.7219, "step": 9884 }, { "epoch": 0.28860471227117457, "grad_norm": 0.57211724460038, "learning_rate": 3.952311435523114e-05, "loss": 0.6636, "step": 9885 }, { "epoch": 0.28863390849902193, "grad_norm": 0.5739978989254775, "learning_rate": 3.9521492295214925e-05, "loss": 0.6996, "step": 9886 }, { "epoch": 0.2886631047268693, "grad_norm": 1.7851696264945203, "learning_rate": 3.95198702351987e-05, "loss": 0.696, "step": 9887 }, { "epoch": 0.28869230095471665, "grad_norm": 0.5244145338718618, "learning_rate": 3.951824817518248e-05, "loss": 0.5773, "step": 9888 }, { "epoch": 0.288721497182564, "grad_norm": 0.5638673140612563, "learning_rate": 3.9516626115166264e-05, "loss": 0.6663, "step": 9889 }, { "epoch": 0.2887506934104114, "grad_norm": 0.5427083889901929, "learning_rate": 3.951500405515004e-05, "loss": 0.6044, "step": 9890 }, { "epoch": 0.28877988963825874, "grad_norm": 0.5831090404466898, "learning_rate": 3.951338199513382e-05, "loss": 0.6742, "step": 9891 }, { "epoch": 0.2888090858661061, "grad_norm": 0.7971643926498881, "learning_rate": 3.95117599351176e-05, "loss": 0.6978, "step": 9892 }, { "epoch": 0.28883828209395346, "grad_norm": 0.5131733616536515, "learning_rate": 3.9510137875101384e-05, "loss": 0.5429, "step": 9893 }, { "epoch": 0.2888674783218008, "grad_norm": 0.5667418686113719, "learning_rate": 3.950851581508516e-05, "loss": 0.6296, "step": 9894 }, { "epoch": 0.2888966745496482, "grad_norm": 0.5632434392230942, "learning_rate": 3.950689375506894e-05, "loss": 0.6494, "step": 9895 }, { "epoch": 0.28892587077749554, "grad_norm": 0.6422740165058181, "learning_rate": 3.950527169505272e-05, "loss": 0.6685, "step": 9896 }, { "epoch": 0.2889550670053429, "grad_norm": 0.5903872624177136, "learning_rate": 3.95036496350365e-05, "loss": 0.7194, "step": 9897 }, { "epoch": 0.28898426323319026, "grad_norm": 0.610430687142352, "learning_rate": 3.950202757502028e-05, "loss": 0.7523, "step": 9898 }, { "epoch": 0.2890134594610376, "grad_norm": 0.5543939703690977, "learning_rate": 3.9500405515004054e-05, "loss": 0.6678, "step": 9899 }, { "epoch": 0.289042655688885, "grad_norm": 0.5824778214343108, "learning_rate": 3.9498783454987836e-05, "loss": 0.7379, "step": 9900 }, { "epoch": 0.28907185191673235, "grad_norm": 0.7001804201229417, "learning_rate": 3.949716139497162e-05, "loss": 0.6816, "step": 9901 }, { "epoch": 0.2891010481445797, "grad_norm": 0.5274524342822837, "learning_rate": 3.949553933495539e-05, "loss": 0.5707, "step": 9902 }, { "epoch": 0.28913024437242707, "grad_norm": 0.5642404365140135, "learning_rate": 3.9493917274939174e-05, "loss": 0.671, "step": 9903 }, { "epoch": 0.28915944060027443, "grad_norm": 0.5482995432329838, "learning_rate": 3.9492295214922956e-05, "loss": 0.6243, "step": 9904 }, { "epoch": 0.2891886368281218, "grad_norm": 0.5725722598932779, "learning_rate": 3.949067315490673e-05, "loss": 0.6212, "step": 9905 }, { "epoch": 0.28921783305596915, "grad_norm": 0.574652983406461, "learning_rate": 3.948905109489051e-05, "loss": 0.6818, "step": 9906 }, { "epoch": 0.2892470292838165, "grad_norm": 0.5755005471398895, "learning_rate": 3.948742903487429e-05, "loss": 0.6763, "step": 9907 }, { "epoch": 0.2892762255116639, "grad_norm": 0.621851027978248, "learning_rate": 3.948580697485807e-05, "loss": 0.7331, "step": 9908 }, { "epoch": 0.28930542173951124, "grad_norm": 0.580074603146898, "learning_rate": 3.948418491484185e-05, "loss": 0.6409, "step": 9909 }, { "epoch": 0.2893346179673586, "grad_norm": 0.5684817885452992, "learning_rate": 3.9482562854825626e-05, "loss": 0.6935, "step": 9910 }, { "epoch": 0.28936381419520596, "grad_norm": 0.5901522280993526, "learning_rate": 3.9480940794809415e-05, "loss": 0.749, "step": 9911 }, { "epoch": 0.2893930104230533, "grad_norm": 0.5766184053834723, "learning_rate": 3.947931873479319e-05, "loss": 0.6688, "step": 9912 }, { "epoch": 0.2894222066509007, "grad_norm": 0.5144140881016398, "learning_rate": 3.947769667477697e-05, "loss": 0.563, "step": 9913 }, { "epoch": 0.28945140287874804, "grad_norm": 0.5156823577251074, "learning_rate": 3.947607461476075e-05, "loss": 0.6029, "step": 9914 }, { "epoch": 0.2894805991065954, "grad_norm": 0.5489923317918521, "learning_rate": 3.947445255474453e-05, "loss": 0.6473, "step": 9915 }, { "epoch": 0.28950979533444277, "grad_norm": 0.5993484718651763, "learning_rate": 3.947283049472831e-05, "loss": 0.692, "step": 9916 }, { "epoch": 0.2895389915622901, "grad_norm": 0.5078324764780328, "learning_rate": 3.9471208434712085e-05, "loss": 0.5578, "step": 9917 }, { "epoch": 0.2895681877901375, "grad_norm": 0.5328279317503107, "learning_rate": 3.946958637469587e-05, "loss": 0.5866, "step": 9918 }, { "epoch": 0.28959738401798485, "grad_norm": 0.5822315503105743, "learning_rate": 3.946796431467964e-05, "loss": 0.6563, "step": 9919 }, { "epoch": 0.28962658024583227, "grad_norm": 0.601745301287256, "learning_rate": 3.9466342254663424e-05, "loss": 0.7571, "step": 9920 }, { "epoch": 0.2896557764736796, "grad_norm": 0.5535012166564309, "learning_rate": 3.9464720194647205e-05, "loss": 0.6415, "step": 9921 }, { "epoch": 0.289684972701527, "grad_norm": 0.632757104365118, "learning_rate": 3.946309813463098e-05, "loss": 0.6226, "step": 9922 }, { "epoch": 0.28971416892937435, "grad_norm": 0.5664996396502836, "learning_rate": 3.946147607461476e-05, "loss": 0.6406, "step": 9923 }, { "epoch": 0.2897433651572217, "grad_norm": 0.5463743189383381, "learning_rate": 3.9459854014598544e-05, "loss": 0.604, "step": 9924 }, { "epoch": 0.28977256138506907, "grad_norm": 0.5105065223505117, "learning_rate": 3.945823195458232e-05, "loss": 0.5729, "step": 9925 }, { "epoch": 0.28980175761291643, "grad_norm": 0.5344608597629256, "learning_rate": 3.94566098945661e-05, "loss": 0.6466, "step": 9926 }, { "epoch": 0.2898309538407638, "grad_norm": 0.5682966497782881, "learning_rate": 3.9454987834549876e-05, "loss": 0.6707, "step": 9927 }, { "epoch": 0.28986015006861116, "grad_norm": 0.6325799186245645, "learning_rate": 3.945336577453366e-05, "loss": 0.7148, "step": 9928 }, { "epoch": 0.2898893462964585, "grad_norm": 0.5195177770516162, "learning_rate": 3.945174371451744e-05, "loss": 0.5802, "step": 9929 }, { "epoch": 0.2899185425243059, "grad_norm": 0.5781375446325049, "learning_rate": 3.945012165450122e-05, "loss": 0.6686, "step": 9930 }, { "epoch": 0.28994773875215324, "grad_norm": 0.5348036986667561, "learning_rate": 3.9448499594485e-05, "loss": 0.6247, "step": 9931 }, { "epoch": 0.2899769349800006, "grad_norm": 0.5664409846179665, "learning_rate": 3.944687753446878e-05, "loss": 0.6611, "step": 9932 }, { "epoch": 0.29000613120784796, "grad_norm": 0.6019044923248172, "learning_rate": 3.944525547445256e-05, "loss": 0.7005, "step": 9933 }, { "epoch": 0.2900353274356953, "grad_norm": 0.5295079781850986, "learning_rate": 3.9443633414436335e-05, "loss": 0.5759, "step": 9934 }, { "epoch": 0.2900645236635427, "grad_norm": 0.5531600265970033, "learning_rate": 3.9442011354420116e-05, "loss": 0.6505, "step": 9935 }, { "epoch": 0.29009371989139004, "grad_norm": 0.5327331203070313, "learning_rate": 3.94403892944039e-05, "loss": 0.6677, "step": 9936 }, { "epoch": 0.2901229161192374, "grad_norm": 0.5480562577281877, "learning_rate": 3.943876723438767e-05, "loss": 0.6032, "step": 9937 }, { "epoch": 0.29015211234708477, "grad_norm": 0.5593971280800234, "learning_rate": 3.9437145174371455e-05, "loss": 0.6804, "step": 9938 }, { "epoch": 0.29018130857493213, "grad_norm": 0.56225191107422, "learning_rate": 3.943552311435523e-05, "loss": 0.607, "step": 9939 }, { "epoch": 0.2902105048027795, "grad_norm": 0.5833621036156452, "learning_rate": 3.943390105433901e-05, "loss": 0.7089, "step": 9940 }, { "epoch": 0.29023970103062685, "grad_norm": 0.6080760422358915, "learning_rate": 3.943227899432279e-05, "loss": 0.7682, "step": 9941 }, { "epoch": 0.2902688972584742, "grad_norm": 0.5327331229020748, "learning_rate": 3.943065693430657e-05, "loss": 0.6648, "step": 9942 }, { "epoch": 0.2902980934863216, "grad_norm": 0.5981688966168637, "learning_rate": 3.942903487429035e-05, "loss": 0.6486, "step": 9943 }, { "epoch": 0.29032728971416893, "grad_norm": 0.5504931996407498, "learning_rate": 3.9427412814274125e-05, "loss": 0.6412, "step": 9944 }, { "epoch": 0.2903564859420163, "grad_norm": 0.5784937475970242, "learning_rate": 3.942579075425791e-05, "loss": 0.6648, "step": 9945 }, { "epoch": 0.29038568216986366, "grad_norm": 0.5579937669903455, "learning_rate": 3.942416869424169e-05, "loss": 0.6529, "step": 9946 }, { "epoch": 0.290414878397711, "grad_norm": 0.5759814742893699, "learning_rate": 3.9422546634225464e-05, "loss": 0.664, "step": 9947 }, { "epoch": 0.2904440746255584, "grad_norm": 0.5412058055675458, "learning_rate": 3.942092457420925e-05, "loss": 0.6444, "step": 9948 }, { "epoch": 0.29047327085340574, "grad_norm": 0.6255694131025963, "learning_rate": 3.941930251419303e-05, "loss": 0.7517, "step": 9949 }, { "epoch": 0.2905024670812531, "grad_norm": 0.548362717787525, "learning_rate": 3.941768045417681e-05, "loss": 0.633, "step": 9950 }, { "epoch": 0.29053166330910046, "grad_norm": 0.5518071623374183, "learning_rate": 3.941605839416059e-05, "loss": 0.6306, "step": 9951 }, { "epoch": 0.2905608595369478, "grad_norm": 0.5635775568950604, "learning_rate": 3.9414436334144366e-05, "loss": 0.6202, "step": 9952 }, { "epoch": 0.2905900557647952, "grad_norm": 0.5642075467515324, "learning_rate": 3.941281427412815e-05, "loss": 0.6939, "step": 9953 }, { "epoch": 0.29061925199264255, "grad_norm": 0.5294555353549901, "learning_rate": 3.941119221411192e-05, "loss": 0.6087, "step": 9954 }, { "epoch": 0.2906484482204899, "grad_norm": 0.5394251703771441, "learning_rate": 3.9409570154095704e-05, "loss": 0.6145, "step": 9955 }, { "epoch": 0.29067764444833727, "grad_norm": 0.5229869053525786, "learning_rate": 3.9407948094079486e-05, "loss": 0.5684, "step": 9956 }, { "epoch": 0.29070684067618463, "grad_norm": 0.5175551838289187, "learning_rate": 3.940632603406326e-05, "loss": 0.571, "step": 9957 }, { "epoch": 0.290736036904032, "grad_norm": 0.7526413880366022, "learning_rate": 3.940470397404704e-05, "loss": 0.6766, "step": 9958 }, { "epoch": 0.29076523313187935, "grad_norm": 0.5532369563209676, "learning_rate": 3.940308191403082e-05, "loss": 0.6244, "step": 9959 }, { "epoch": 0.2907944293597267, "grad_norm": 0.6182784032058224, "learning_rate": 3.94014598540146e-05, "loss": 0.7628, "step": 9960 }, { "epoch": 0.2908236255875741, "grad_norm": 0.5700365358981877, "learning_rate": 3.939983779399838e-05, "loss": 0.6556, "step": 9961 }, { "epoch": 0.29085282181542144, "grad_norm": 0.5919660731033903, "learning_rate": 3.9398215733982156e-05, "loss": 0.6969, "step": 9962 }, { "epoch": 0.2908820180432688, "grad_norm": 0.5447946771119871, "learning_rate": 3.939659367396594e-05, "loss": 0.6262, "step": 9963 }, { "epoch": 0.29091121427111616, "grad_norm": 0.5414085225964221, "learning_rate": 3.939497161394971e-05, "loss": 0.6252, "step": 9964 }, { "epoch": 0.2909404104989635, "grad_norm": 0.5175548191062909, "learning_rate": 3.9393349553933495e-05, "loss": 0.6176, "step": 9965 }, { "epoch": 0.2909696067268109, "grad_norm": 0.5294291350455123, "learning_rate": 3.9391727493917276e-05, "loss": 0.6026, "step": 9966 }, { "epoch": 0.29099880295465824, "grad_norm": 0.5831977081068103, "learning_rate": 3.939010543390106e-05, "loss": 0.6554, "step": 9967 }, { "epoch": 0.2910279991825056, "grad_norm": 0.5659292675646653, "learning_rate": 3.938848337388484e-05, "loss": 0.6022, "step": 9968 }, { "epoch": 0.29105719541035296, "grad_norm": 0.5859649112543618, "learning_rate": 3.9386861313868615e-05, "loss": 0.7201, "step": 9969 }, { "epoch": 0.2910863916382003, "grad_norm": 0.5122939555521264, "learning_rate": 3.93852392538524e-05, "loss": 0.5884, "step": 9970 }, { "epoch": 0.2911155878660477, "grad_norm": 0.5870088236148986, "learning_rate": 3.938361719383618e-05, "loss": 0.6524, "step": 9971 }, { "epoch": 0.29114478409389505, "grad_norm": 0.621456386585595, "learning_rate": 3.938199513381995e-05, "loss": 0.762, "step": 9972 }, { "epoch": 0.2911739803217424, "grad_norm": 0.5332541713108692, "learning_rate": 3.9380373073803735e-05, "loss": 0.6462, "step": 9973 }, { "epoch": 0.29120317654958977, "grad_norm": 0.5676732476352758, "learning_rate": 3.937875101378751e-05, "loss": 0.7072, "step": 9974 }, { "epoch": 0.29123237277743713, "grad_norm": 0.5196167387382538, "learning_rate": 3.937712895377129e-05, "loss": 0.6166, "step": 9975 }, { "epoch": 0.2912615690052845, "grad_norm": 0.501110714819606, "learning_rate": 3.9375506893755074e-05, "loss": 0.5687, "step": 9976 }, { "epoch": 0.29129076523313185, "grad_norm": 0.5675669095457416, "learning_rate": 3.937388483373885e-05, "loss": 0.7156, "step": 9977 }, { "epoch": 0.2913199614609792, "grad_norm": 0.5863677718140086, "learning_rate": 3.937226277372263e-05, "loss": 0.6954, "step": 9978 }, { "epoch": 0.2913491576888266, "grad_norm": 0.5645681924986834, "learning_rate": 3.9370640713706405e-05, "loss": 0.6966, "step": 9979 }, { "epoch": 0.291378353916674, "grad_norm": 0.5379957424486166, "learning_rate": 3.936901865369019e-05, "loss": 0.5985, "step": 9980 }, { "epoch": 0.29140755014452135, "grad_norm": 0.5374477744220775, "learning_rate": 3.936739659367397e-05, "loss": 0.5673, "step": 9981 }, { "epoch": 0.2914367463723687, "grad_norm": 0.5571766964997013, "learning_rate": 3.9365774533657744e-05, "loss": 0.6219, "step": 9982 }, { "epoch": 0.2914659426002161, "grad_norm": 0.5666977045033288, "learning_rate": 3.9364152473641526e-05, "loss": 0.7, "step": 9983 }, { "epoch": 0.29149513882806344, "grad_norm": 0.5663216241297843, "learning_rate": 3.93625304136253e-05, "loss": 0.6642, "step": 9984 }, { "epoch": 0.2915243350559108, "grad_norm": 0.5329221888715414, "learning_rate": 3.936090835360908e-05, "loss": 0.6666, "step": 9985 }, { "epoch": 0.29155353128375816, "grad_norm": 0.5538615484336336, "learning_rate": 3.9359286293592864e-05, "loss": 0.6832, "step": 9986 }, { "epoch": 0.2915827275116055, "grad_norm": 0.6000534919991946, "learning_rate": 3.9357664233576646e-05, "loss": 0.6804, "step": 9987 }, { "epoch": 0.2916119237394529, "grad_norm": 0.6079482400454187, "learning_rate": 3.935604217356043e-05, "loss": 0.6926, "step": 9988 }, { "epoch": 0.29164111996730024, "grad_norm": 0.5545452909601034, "learning_rate": 3.93544201135442e-05, "loss": 0.6668, "step": 9989 }, { "epoch": 0.2916703161951476, "grad_norm": 0.5628741932837994, "learning_rate": 3.9352798053527984e-05, "loss": 0.6754, "step": 9990 }, { "epoch": 0.29169951242299497, "grad_norm": 0.536047610977856, "learning_rate": 3.9351175993511766e-05, "loss": 0.6778, "step": 9991 }, { "epoch": 0.2917287086508423, "grad_norm": 0.5674841956081003, "learning_rate": 3.934955393349554e-05, "loss": 0.6644, "step": 9992 }, { "epoch": 0.2917579048786897, "grad_norm": 0.5872665216158068, "learning_rate": 3.934793187347932e-05, "loss": 0.7345, "step": 9993 }, { "epoch": 0.29178710110653705, "grad_norm": 0.5556780421307512, "learning_rate": 3.93463098134631e-05, "loss": 0.6506, "step": 9994 }, { "epoch": 0.2918162973343844, "grad_norm": 0.5823847602525692, "learning_rate": 3.934468775344688e-05, "loss": 0.6528, "step": 9995 }, { "epoch": 0.2918454935622318, "grad_norm": 0.6308333668009664, "learning_rate": 3.934306569343066e-05, "loss": 0.743, "step": 9996 }, { "epoch": 0.29187468979007913, "grad_norm": 0.5225072877338691, "learning_rate": 3.9341443633414436e-05, "loss": 0.5972, "step": 9997 }, { "epoch": 0.2919038860179265, "grad_norm": 0.6898915037648591, "learning_rate": 3.933982157339822e-05, "loss": 0.7039, "step": 9998 }, { "epoch": 0.29193308224577386, "grad_norm": 0.5374107814171779, "learning_rate": 3.933819951338199e-05, "loss": 0.6182, "step": 9999 }, { "epoch": 0.2919622784736212, "grad_norm": 0.5724026324752611, "learning_rate": 3.9336577453365775e-05, "loss": 0.698, "step": 10000 }, { "epoch": 0.2919914747014686, "grad_norm": 0.5630220291493386, "learning_rate": 3.933495539334956e-05, "loss": 0.5751, "step": 10001 }, { "epoch": 0.29202067092931594, "grad_norm": 0.5249484483212835, "learning_rate": 3.933333333333333e-05, "loss": 0.5803, "step": 10002 }, { "epoch": 0.2920498671571633, "grad_norm": 0.5873964558993168, "learning_rate": 3.9331711273317113e-05, "loss": 0.7416, "step": 10003 }, { "epoch": 0.29207906338501066, "grad_norm": 0.5519556904722046, "learning_rate": 3.933008921330089e-05, "loss": 0.6673, "step": 10004 }, { "epoch": 0.292108259612858, "grad_norm": 0.5588160472356198, "learning_rate": 3.932846715328468e-05, "loss": 0.6654, "step": 10005 }, { "epoch": 0.2921374558407054, "grad_norm": 0.49656710640225166, "learning_rate": 3.932684509326845e-05, "loss": 0.5551, "step": 10006 }, { "epoch": 0.29216665206855275, "grad_norm": 0.5176575772088398, "learning_rate": 3.9325223033252234e-05, "loss": 0.6125, "step": 10007 }, { "epoch": 0.2921958482964001, "grad_norm": 0.5730049194696887, "learning_rate": 3.9323600973236015e-05, "loss": 0.7005, "step": 10008 }, { "epoch": 0.29222504452424747, "grad_norm": 0.5381559837538403, "learning_rate": 3.932197891321979e-05, "loss": 0.6345, "step": 10009 }, { "epoch": 0.29225424075209483, "grad_norm": 0.5432036078022728, "learning_rate": 3.932035685320357e-05, "loss": 0.6224, "step": 10010 }, { "epoch": 0.2922834369799422, "grad_norm": 0.5174424901231977, "learning_rate": 3.9318734793187354e-05, "loss": 0.5748, "step": 10011 }, { "epoch": 0.29231263320778955, "grad_norm": 0.5657166514694887, "learning_rate": 3.931711273317113e-05, "loss": 0.6337, "step": 10012 }, { "epoch": 0.2923418294356369, "grad_norm": 0.5491127889471662, "learning_rate": 3.931549067315491e-05, "loss": 0.6729, "step": 10013 }, { "epoch": 0.2923710256634843, "grad_norm": 0.544292403691201, "learning_rate": 3.9313868613138686e-05, "loss": 0.6051, "step": 10014 }, { "epoch": 0.29240022189133164, "grad_norm": 0.547842880582305, "learning_rate": 3.931224655312247e-05, "loss": 0.562, "step": 10015 }, { "epoch": 0.292429418119179, "grad_norm": 0.5469195483034567, "learning_rate": 3.931062449310625e-05, "loss": 0.6545, "step": 10016 }, { "epoch": 0.29245861434702636, "grad_norm": 0.5194135679370308, "learning_rate": 3.9309002433090024e-05, "loss": 0.5997, "step": 10017 }, { "epoch": 0.2924878105748737, "grad_norm": 0.5553238329459267, "learning_rate": 3.9307380373073806e-05, "loss": 0.7078, "step": 10018 }, { "epoch": 0.2925170068027211, "grad_norm": 0.5534881161635424, "learning_rate": 3.930575831305758e-05, "loss": 0.6523, "step": 10019 }, { "epoch": 0.29254620303056844, "grad_norm": 0.5585700987023595, "learning_rate": 3.930413625304136e-05, "loss": 0.6636, "step": 10020 }, { "epoch": 0.2925753992584158, "grad_norm": 0.6885410832723386, "learning_rate": 3.9302514193025145e-05, "loss": 0.6909, "step": 10021 }, { "epoch": 0.29260459548626316, "grad_norm": 0.8955843831742852, "learning_rate": 3.930089213300892e-05, "loss": 0.6562, "step": 10022 }, { "epoch": 0.2926337917141105, "grad_norm": 0.56808029248207, "learning_rate": 3.92992700729927e-05, "loss": 0.6568, "step": 10023 }, { "epoch": 0.2926629879419579, "grad_norm": 0.5683274406947246, "learning_rate": 3.929764801297648e-05, "loss": 0.7, "step": 10024 }, { "epoch": 0.29269218416980525, "grad_norm": 0.5441742186073638, "learning_rate": 3.9296025952960265e-05, "loss": 0.5837, "step": 10025 }, { "epoch": 0.2927213803976526, "grad_norm": 0.8972626886375388, "learning_rate": 3.929440389294404e-05, "loss": 0.6125, "step": 10026 }, { "epoch": 0.29275057662549997, "grad_norm": 0.5519716844362486, "learning_rate": 3.929278183292782e-05, "loss": 0.6628, "step": 10027 }, { "epoch": 0.29277977285334733, "grad_norm": 0.5476914592038484, "learning_rate": 3.92911597729116e-05, "loss": 0.6619, "step": 10028 }, { "epoch": 0.2928089690811947, "grad_norm": 0.6110985207025798, "learning_rate": 3.928953771289538e-05, "loss": 0.6969, "step": 10029 }, { "epoch": 0.29283816530904205, "grad_norm": 0.5273284344755257, "learning_rate": 3.928791565287916e-05, "loss": 0.6305, "step": 10030 }, { "epoch": 0.2928673615368894, "grad_norm": 0.5275915977133351, "learning_rate": 3.9286293592862935e-05, "loss": 0.545, "step": 10031 }, { "epoch": 0.2928965577647368, "grad_norm": 0.5460362728665462, "learning_rate": 3.928467153284672e-05, "loss": 0.637, "step": 10032 }, { "epoch": 0.29292575399258414, "grad_norm": 0.5046102449471036, "learning_rate": 3.92830494728305e-05, "loss": 0.5685, "step": 10033 }, { "epoch": 0.2929549502204315, "grad_norm": 0.5222969064916007, "learning_rate": 3.9281427412814274e-05, "loss": 0.5819, "step": 10034 }, { "epoch": 0.29298414644827886, "grad_norm": 0.5447317943494907, "learning_rate": 3.9279805352798055e-05, "loss": 0.5365, "step": 10035 }, { "epoch": 0.2930133426761262, "grad_norm": 0.5204793245760672, "learning_rate": 3.927818329278184e-05, "loss": 0.5703, "step": 10036 }, { "epoch": 0.2930425389039736, "grad_norm": 0.5593268926516787, "learning_rate": 3.927656123276561e-05, "loss": 0.7008, "step": 10037 }, { "epoch": 0.29307173513182094, "grad_norm": 0.5773135489219248, "learning_rate": 3.9274939172749394e-05, "loss": 0.6717, "step": 10038 }, { "epoch": 0.2931009313596683, "grad_norm": 0.5314153499323581, "learning_rate": 3.927331711273317e-05, "loss": 0.6403, "step": 10039 }, { "epoch": 0.29313012758751567, "grad_norm": 0.6419871179878085, "learning_rate": 3.927169505271695e-05, "loss": 0.7528, "step": 10040 }, { "epoch": 0.2931593238153631, "grad_norm": 0.5608843910682073, "learning_rate": 3.927007299270073e-05, "loss": 0.6796, "step": 10041 }, { "epoch": 0.29318852004321044, "grad_norm": 0.5313030117854475, "learning_rate": 3.926845093268451e-05, "loss": 0.6149, "step": 10042 }, { "epoch": 0.2932177162710578, "grad_norm": 0.6195994068879712, "learning_rate": 3.9266828872668296e-05, "loss": 0.7972, "step": 10043 }, { "epoch": 0.29324691249890517, "grad_norm": 0.5666909799147241, "learning_rate": 3.926520681265207e-05, "loss": 0.7152, "step": 10044 }, { "epoch": 0.2932761087267525, "grad_norm": 0.5172230767217747, "learning_rate": 3.926358475263585e-05, "loss": 0.6189, "step": 10045 }, { "epoch": 0.2933053049545999, "grad_norm": 0.5526370231503777, "learning_rate": 3.926196269261963e-05, "loss": 0.6993, "step": 10046 }, { "epoch": 0.29333450118244725, "grad_norm": 0.585989875011047, "learning_rate": 3.926034063260341e-05, "loss": 0.663, "step": 10047 }, { "epoch": 0.2933636974102946, "grad_norm": 0.574126937681269, "learning_rate": 3.925871857258719e-05, "loss": 0.6746, "step": 10048 }, { "epoch": 0.29339289363814197, "grad_norm": 0.5673274936381408, "learning_rate": 3.9257096512570966e-05, "loss": 0.6802, "step": 10049 }, { "epoch": 0.29342208986598933, "grad_norm": 0.5467239059616446, "learning_rate": 3.925547445255475e-05, "loss": 0.648, "step": 10050 }, { "epoch": 0.2934512860938367, "grad_norm": 0.5235957728555451, "learning_rate": 3.925385239253852e-05, "loss": 0.6416, "step": 10051 }, { "epoch": 0.29348048232168406, "grad_norm": 0.5348966642202981, "learning_rate": 3.9252230332522305e-05, "loss": 0.6036, "step": 10052 }, { "epoch": 0.2935096785495314, "grad_norm": 0.5393813075876943, "learning_rate": 3.9250608272506086e-05, "loss": 0.6599, "step": 10053 }, { "epoch": 0.2935388747773788, "grad_norm": 0.5714407863675378, "learning_rate": 3.924898621248986e-05, "loss": 0.5997, "step": 10054 }, { "epoch": 0.29356807100522614, "grad_norm": 0.6344247505129684, "learning_rate": 3.924736415247364e-05, "loss": 0.7135, "step": 10055 }, { "epoch": 0.2935972672330735, "grad_norm": 0.5328444915662967, "learning_rate": 3.9245742092457425e-05, "loss": 0.5745, "step": 10056 }, { "epoch": 0.29362646346092086, "grad_norm": 0.5425372702328637, "learning_rate": 3.92441200324412e-05, "loss": 0.646, "step": 10057 }, { "epoch": 0.2936556596887682, "grad_norm": 0.5720271670338404, "learning_rate": 3.924249797242498e-05, "loss": 0.6268, "step": 10058 }, { "epoch": 0.2936848559166156, "grad_norm": 0.5540147079720095, "learning_rate": 3.9240875912408757e-05, "loss": 0.6354, "step": 10059 }, { "epoch": 0.29371405214446294, "grad_norm": 0.5993769749264861, "learning_rate": 3.923925385239254e-05, "loss": 0.7085, "step": 10060 }, { "epoch": 0.2937432483723103, "grad_norm": 0.5257887404387885, "learning_rate": 3.923763179237632e-05, "loss": 0.5901, "step": 10061 }, { "epoch": 0.29377244460015767, "grad_norm": 0.5693994470838287, "learning_rate": 3.92360097323601e-05, "loss": 0.6599, "step": 10062 }, { "epoch": 0.29380164082800503, "grad_norm": 0.5469193705980094, "learning_rate": 3.9234387672343884e-05, "loss": 0.6319, "step": 10063 }, { "epoch": 0.2938308370558524, "grad_norm": 0.5057959459980725, "learning_rate": 3.923276561232766e-05, "loss": 0.6221, "step": 10064 }, { "epoch": 0.29386003328369975, "grad_norm": 0.5670123029049605, "learning_rate": 3.923114355231144e-05, "loss": 0.6471, "step": 10065 }, { "epoch": 0.2938892295115471, "grad_norm": 0.5862472024651008, "learning_rate": 3.9229521492295215e-05, "loss": 0.6336, "step": 10066 }, { "epoch": 0.2939184257393945, "grad_norm": 0.5841896805566414, "learning_rate": 3.9227899432279e-05, "loss": 0.7078, "step": 10067 }, { "epoch": 0.29394762196724183, "grad_norm": 0.5974420951594951, "learning_rate": 3.922627737226278e-05, "loss": 0.6974, "step": 10068 }, { "epoch": 0.2939768181950892, "grad_norm": 0.5364304969068122, "learning_rate": 3.9224655312246554e-05, "loss": 0.5956, "step": 10069 }, { "epoch": 0.29400601442293656, "grad_norm": 0.5456283004546117, "learning_rate": 3.9223033252230336e-05, "loss": 0.6586, "step": 10070 }, { "epoch": 0.2940352106507839, "grad_norm": 0.5950918572262485, "learning_rate": 3.922141119221411e-05, "loss": 0.7054, "step": 10071 }, { "epoch": 0.2940644068786313, "grad_norm": 0.6043018214643604, "learning_rate": 3.921978913219789e-05, "loss": 0.7162, "step": 10072 }, { "epoch": 0.29409360310647864, "grad_norm": 0.6456492847464063, "learning_rate": 3.9218167072181674e-05, "loss": 0.7757, "step": 10073 }, { "epoch": 0.294122799334326, "grad_norm": 0.538842758787196, "learning_rate": 3.921654501216545e-05, "loss": 0.6102, "step": 10074 }, { "epoch": 0.29415199556217336, "grad_norm": 0.9296025668365024, "learning_rate": 3.921492295214923e-05, "loss": 0.5884, "step": 10075 }, { "epoch": 0.2941811917900207, "grad_norm": 0.5177567512952773, "learning_rate": 3.9213300892133006e-05, "loss": 0.5638, "step": 10076 }, { "epoch": 0.2942103880178681, "grad_norm": 0.5832241943159245, "learning_rate": 3.921167883211679e-05, "loss": 0.661, "step": 10077 }, { "epoch": 0.29423958424571545, "grad_norm": 0.5376221376797782, "learning_rate": 3.921005677210057e-05, "loss": 0.6556, "step": 10078 }, { "epoch": 0.2942687804735628, "grad_norm": 0.5358604386608403, "learning_rate": 3.9208434712084344e-05, "loss": 0.6285, "step": 10079 }, { "epoch": 0.29429797670141017, "grad_norm": 0.5330917403457017, "learning_rate": 3.920681265206813e-05, "loss": 0.6169, "step": 10080 }, { "epoch": 0.29432717292925753, "grad_norm": 0.549348290563102, "learning_rate": 3.920519059205191e-05, "loss": 0.6393, "step": 10081 }, { "epoch": 0.2943563691571049, "grad_norm": 0.6183783538317892, "learning_rate": 3.920356853203569e-05, "loss": 0.6636, "step": 10082 }, { "epoch": 0.29438556538495225, "grad_norm": 0.663300300972716, "learning_rate": 3.920194647201947e-05, "loss": 0.8155, "step": 10083 }, { "epoch": 0.2944147616127996, "grad_norm": 0.511124600618318, "learning_rate": 3.9200324412003246e-05, "loss": 0.609, "step": 10084 }, { "epoch": 0.294443957840647, "grad_norm": 0.5239730822241353, "learning_rate": 3.919870235198703e-05, "loss": 0.6244, "step": 10085 }, { "epoch": 0.29447315406849434, "grad_norm": 0.5481644717211465, "learning_rate": 3.91970802919708e-05, "loss": 0.6204, "step": 10086 }, { "epoch": 0.2945023502963417, "grad_norm": 0.6870057080081612, "learning_rate": 3.9195458231954585e-05, "loss": 0.7269, "step": 10087 }, { "epoch": 0.29453154652418906, "grad_norm": 0.590638767930795, "learning_rate": 3.919383617193837e-05, "loss": 0.6931, "step": 10088 }, { "epoch": 0.2945607427520364, "grad_norm": 0.5417617483910683, "learning_rate": 3.919221411192214e-05, "loss": 0.5975, "step": 10089 }, { "epoch": 0.2945899389798838, "grad_norm": 0.591421672631181, "learning_rate": 3.9190592051905923e-05, "loss": 0.6449, "step": 10090 }, { "epoch": 0.29461913520773114, "grad_norm": 0.5774521529354506, "learning_rate": 3.91889699918897e-05, "loss": 0.7198, "step": 10091 }, { "epoch": 0.2946483314355785, "grad_norm": 0.546312387320711, "learning_rate": 3.918734793187348e-05, "loss": 0.6471, "step": 10092 }, { "epoch": 0.29467752766342586, "grad_norm": 0.5368699921085401, "learning_rate": 3.918572587185726e-05, "loss": 0.6015, "step": 10093 }, { "epoch": 0.2947067238912732, "grad_norm": 0.5427661816664076, "learning_rate": 3.918410381184104e-05, "loss": 0.6738, "step": 10094 }, { "epoch": 0.2947359201191206, "grad_norm": 0.5809792806076013, "learning_rate": 3.918248175182482e-05, "loss": 0.6287, "step": 10095 }, { "epoch": 0.29476511634696795, "grad_norm": 0.5567987723819341, "learning_rate": 3.9180859691808594e-05, "loss": 0.6107, "step": 10096 }, { "epoch": 0.2947943125748153, "grad_norm": 0.586597234600934, "learning_rate": 3.9179237631792375e-05, "loss": 0.6707, "step": 10097 }, { "epoch": 0.29482350880266267, "grad_norm": 0.5627622511043961, "learning_rate": 3.917761557177616e-05, "loss": 0.5932, "step": 10098 }, { "epoch": 0.29485270503051003, "grad_norm": 0.5069556067024461, "learning_rate": 3.917599351175994e-05, "loss": 0.5792, "step": 10099 }, { "epoch": 0.2948819012583574, "grad_norm": 0.5706717009971818, "learning_rate": 3.917437145174372e-05, "loss": 0.6952, "step": 10100 }, { "epoch": 0.2949110974862048, "grad_norm": 0.6064127239610113, "learning_rate": 3.9172749391727496e-05, "loss": 0.748, "step": 10101 }, { "epoch": 0.29494029371405217, "grad_norm": 0.5437437547828007, "learning_rate": 3.917112733171128e-05, "loss": 0.6143, "step": 10102 }, { "epoch": 0.29496948994189953, "grad_norm": 0.5614563710149246, "learning_rate": 3.916950527169506e-05, "loss": 0.6635, "step": 10103 }, { "epoch": 0.2949986861697469, "grad_norm": 0.5977338645655343, "learning_rate": 3.9167883211678834e-05, "loss": 0.691, "step": 10104 }, { "epoch": 0.29502788239759425, "grad_norm": 0.569169380597648, "learning_rate": 3.9166261151662616e-05, "loss": 0.7021, "step": 10105 }, { "epoch": 0.2950570786254416, "grad_norm": 0.590695312410481, "learning_rate": 3.916463909164639e-05, "loss": 0.6911, "step": 10106 }, { "epoch": 0.295086274853289, "grad_norm": 0.5568618416688632, "learning_rate": 3.916301703163017e-05, "loss": 0.6783, "step": 10107 }, { "epoch": 0.29511547108113634, "grad_norm": 0.5252515921868685, "learning_rate": 3.9161394971613955e-05, "loss": 0.6024, "step": 10108 }, { "epoch": 0.2951446673089837, "grad_norm": 0.6028649221186301, "learning_rate": 3.915977291159773e-05, "loss": 0.7199, "step": 10109 }, { "epoch": 0.29517386353683106, "grad_norm": 0.5691183971697655, "learning_rate": 3.915815085158151e-05, "loss": 0.6575, "step": 10110 }, { "epoch": 0.2952030597646784, "grad_norm": 0.5633240311308881, "learning_rate": 3.9156528791565286e-05, "loss": 0.5868, "step": 10111 }, { "epoch": 0.2952322559925258, "grad_norm": 0.5609431479189348, "learning_rate": 3.915490673154907e-05, "loss": 0.666, "step": 10112 }, { "epoch": 0.29526145222037314, "grad_norm": 0.5534906502859928, "learning_rate": 3.915328467153285e-05, "loss": 0.6142, "step": 10113 }, { "epoch": 0.2952906484482205, "grad_norm": 0.540483854939288, "learning_rate": 3.9151662611516625e-05, "loss": 0.6625, "step": 10114 }, { "epoch": 0.29531984467606787, "grad_norm": 0.5391413351484471, "learning_rate": 3.9150040551500407e-05, "loss": 0.5974, "step": 10115 }, { "epoch": 0.2953490409039152, "grad_norm": 0.5064847879831698, "learning_rate": 3.914841849148418e-05, "loss": 0.5717, "step": 10116 }, { "epoch": 0.2953782371317626, "grad_norm": 0.5381505737941702, "learning_rate": 3.914679643146796e-05, "loss": 0.6896, "step": 10117 }, { "epoch": 0.29540743335960995, "grad_norm": 0.5411749559688797, "learning_rate": 3.9145174371451745e-05, "loss": 0.6263, "step": 10118 }, { "epoch": 0.2954366295874573, "grad_norm": 0.5377701552804327, "learning_rate": 3.914355231143553e-05, "loss": 0.638, "step": 10119 }, { "epoch": 0.2954658258153047, "grad_norm": 0.5567936281221125, "learning_rate": 3.914193025141931e-05, "loss": 0.6634, "step": 10120 }, { "epoch": 0.29549502204315203, "grad_norm": 0.5142553974873726, "learning_rate": 3.9140308191403084e-05, "loss": 0.5784, "step": 10121 }, { "epoch": 0.2955242182709994, "grad_norm": 0.5672903592886225, "learning_rate": 3.9138686131386865e-05, "loss": 0.7125, "step": 10122 }, { "epoch": 0.29555341449884676, "grad_norm": 0.5489169703392559, "learning_rate": 3.913706407137065e-05, "loss": 0.6425, "step": 10123 }, { "epoch": 0.2955826107266941, "grad_norm": 0.5978262076189053, "learning_rate": 3.913544201135442e-05, "loss": 0.7328, "step": 10124 }, { "epoch": 0.2956118069545415, "grad_norm": 0.5213264128200897, "learning_rate": 3.9133819951338204e-05, "loss": 0.5858, "step": 10125 }, { "epoch": 0.29564100318238884, "grad_norm": 0.5385250038686983, "learning_rate": 3.913219789132198e-05, "loss": 0.6382, "step": 10126 }, { "epoch": 0.2956701994102362, "grad_norm": 0.5471540636213464, "learning_rate": 3.913057583130576e-05, "loss": 0.6085, "step": 10127 }, { "epoch": 0.29569939563808356, "grad_norm": 0.6080679526784104, "learning_rate": 3.912895377128954e-05, "loss": 0.7275, "step": 10128 }, { "epoch": 0.2957285918659309, "grad_norm": 0.5927485596948717, "learning_rate": 3.912733171127332e-05, "loss": 0.7092, "step": 10129 }, { "epoch": 0.2957577880937783, "grad_norm": 0.5529768653518199, "learning_rate": 3.91257096512571e-05, "loss": 0.6655, "step": 10130 }, { "epoch": 0.29578698432162565, "grad_norm": 0.5188637702641705, "learning_rate": 3.9124087591240874e-05, "loss": 0.5593, "step": 10131 }, { "epoch": 0.295816180549473, "grad_norm": 0.5818263891726909, "learning_rate": 3.9122465531224656e-05, "loss": 0.7024, "step": 10132 }, { "epoch": 0.29584537677732037, "grad_norm": 0.5401305223572349, "learning_rate": 3.912084347120844e-05, "loss": 0.6185, "step": 10133 }, { "epoch": 0.29587457300516773, "grad_norm": 0.5621368375056514, "learning_rate": 3.911922141119221e-05, "loss": 0.6643, "step": 10134 }, { "epoch": 0.2959037692330151, "grad_norm": 0.5345819983858404, "learning_rate": 3.9117599351175994e-05, "loss": 0.5934, "step": 10135 }, { "epoch": 0.29593296546086245, "grad_norm": 0.5205576125248814, "learning_rate": 3.911597729115977e-05, "loss": 0.5299, "step": 10136 }, { "epoch": 0.2959621616887098, "grad_norm": 0.5673471737191241, "learning_rate": 3.911435523114356e-05, "loss": 0.6611, "step": 10137 }, { "epoch": 0.2959913579165572, "grad_norm": 0.6032798007131034, "learning_rate": 3.911273317112733e-05, "loss": 0.672, "step": 10138 }, { "epoch": 0.29602055414440454, "grad_norm": 0.5697726570760608, "learning_rate": 3.9111111111111115e-05, "loss": 0.6752, "step": 10139 }, { "epoch": 0.2960497503722519, "grad_norm": 0.5626636773944849, "learning_rate": 3.9109489051094896e-05, "loss": 0.6806, "step": 10140 }, { "epoch": 0.29607894660009926, "grad_norm": 0.5291084821588604, "learning_rate": 3.910786699107867e-05, "loss": 0.6476, "step": 10141 }, { "epoch": 0.2961081428279466, "grad_norm": 0.5856073079277047, "learning_rate": 3.910624493106245e-05, "loss": 0.7168, "step": 10142 }, { "epoch": 0.296137339055794, "grad_norm": 0.610621590300088, "learning_rate": 3.910462287104623e-05, "loss": 0.6748, "step": 10143 }, { "epoch": 0.29616653528364134, "grad_norm": 0.5976589374604642, "learning_rate": 3.910300081103001e-05, "loss": 0.6364, "step": 10144 }, { "epoch": 0.2961957315114887, "grad_norm": 0.5189004369631265, "learning_rate": 3.910137875101379e-05, "loss": 0.5966, "step": 10145 }, { "epoch": 0.29622492773933606, "grad_norm": 0.6714316861837685, "learning_rate": 3.909975669099757e-05, "loss": 0.6738, "step": 10146 }, { "epoch": 0.2962541239671834, "grad_norm": 0.5966313668736652, "learning_rate": 3.909813463098135e-05, "loss": 0.8261, "step": 10147 }, { "epoch": 0.2962833201950308, "grad_norm": 0.5810577698236518, "learning_rate": 3.909651257096513e-05, "loss": 0.6839, "step": 10148 }, { "epoch": 0.29631251642287815, "grad_norm": 0.5658593179738193, "learning_rate": 3.9094890510948905e-05, "loss": 0.6156, "step": 10149 }, { "epoch": 0.2963417126507255, "grad_norm": 0.5367199413113497, "learning_rate": 3.909326845093269e-05, "loss": 0.6319, "step": 10150 }, { "epoch": 0.29637090887857287, "grad_norm": 0.5412978680167413, "learning_rate": 3.909164639091646e-05, "loss": 0.6656, "step": 10151 }, { "epoch": 0.29640010510642023, "grad_norm": 0.5268307435021573, "learning_rate": 3.9090024330900244e-05, "loss": 0.5871, "step": 10152 }, { "epoch": 0.2964293013342676, "grad_norm": 0.5329543064484805, "learning_rate": 3.9088402270884025e-05, "loss": 0.6069, "step": 10153 }, { "epoch": 0.29645849756211495, "grad_norm": 0.5159762882815953, "learning_rate": 3.90867802108678e-05, "loss": 0.5566, "step": 10154 }, { "epoch": 0.2964876937899623, "grad_norm": 0.5625079672560189, "learning_rate": 3.908515815085158e-05, "loss": 0.6312, "step": 10155 }, { "epoch": 0.2965168900178097, "grad_norm": 0.6100520969423431, "learning_rate": 3.9083536090835364e-05, "loss": 0.6503, "step": 10156 }, { "epoch": 0.29654608624565704, "grad_norm": 0.5741231336859666, "learning_rate": 3.9081914030819146e-05, "loss": 0.6771, "step": 10157 }, { "epoch": 0.2965752824735044, "grad_norm": 0.5929286472505048, "learning_rate": 3.908029197080292e-05, "loss": 0.633, "step": 10158 }, { "epoch": 0.29660447870135176, "grad_norm": 0.5570874255361903, "learning_rate": 3.90786699107867e-05, "loss": 0.6435, "step": 10159 }, { "epoch": 0.2966336749291991, "grad_norm": 0.5723031972414873, "learning_rate": 3.9077047850770484e-05, "loss": 0.7105, "step": 10160 }, { "epoch": 0.29666287115704654, "grad_norm": 0.5445440444631611, "learning_rate": 3.907542579075426e-05, "loss": 0.6716, "step": 10161 }, { "epoch": 0.2966920673848939, "grad_norm": 0.5987567456439713, "learning_rate": 3.907380373073804e-05, "loss": 0.6499, "step": 10162 }, { "epoch": 0.29672126361274126, "grad_norm": 0.5399953119431103, "learning_rate": 3.9072181670721816e-05, "loss": 0.585, "step": 10163 }, { "epoch": 0.2967504598405886, "grad_norm": 0.5734315955289315, "learning_rate": 3.90705596107056e-05, "loss": 0.696, "step": 10164 }, { "epoch": 0.296779656068436, "grad_norm": 0.5607652230205099, "learning_rate": 3.906893755068938e-05, "loss": 0.6079, "step": 10165 }, { "epoch": 0.29680885229628334, "grad_norm": 0.6092976425041626, "learning_rate": 3.9067315490673154e-05, "loss": 0.7104, "step": 10166 }, { "epoch": 0.2968380485241307, "grad_norm": 0.5191781221281283, "learning_rate": 3.9065693430656936e-05, "loss": 0.5995, "step": 10167 }, { "epoch": 0.29686724475197807, "grad_norm": 0.5815580652060616, "learning_rate": 3.906407137064072e-05, "loss": 0.6586, "step": 10168 }, { "epoch": 0.2968964409798254, "grad_norm": 0.6280976411172156, "learning_rate": 3.906244931062449e-05, "loss": 0.8379, "step": 10169 }, { "epoch": 0.2969256372076728, "grad_norm": 0.5289547250956965, "learning_rate": 3.9060827250608275e-05, "loss": 0.6329, "step": 10170 }, { "epoch": 0.29695483343552015, "grad_norm": 0.5624189913387463, "learning_rate": 3.905920519059205e-05, "loss": 0.6852, "step": 10171 }, { "epoch": 0.2969840296633675, "grad_norm": 0.5772334762648258, "learning_rate": 3.905758313057583e-05, "loss": 0.635, "step": 10172 }, { "epoch": 0.29701322589121487, "grad_norm": 0.5112871641852937, "learning_rate": 3.905596107055961e-05, "loss": 0.5587, "step": 10173 }, { "epoch": 0.29704242211906223, "grad_norm": 0.5577329523850775, "learning_rate": 3.905433901054339e-05, "loss": 0.6452, "step": 10174 }, { "epoch": 0.2970716183469096, "grad_norm": 0.5794002443268151, "learning_rate": 3.905271695052718e-05, "loss": 0.7397, "step": 10175 }, { "epoch": 0.29710081457475696, "grad_norm": 0.511356516025696, "learning_rate": 3.905109489051095e-05, "loss": 0.6198, "step": 10176 }, { "epoch": 0.2971300108026043, "grad_norm": 0.6173175132865855, "learning_rate": 3.9049472830494733e-05, "loss": 0.8008, "step": 10177 }, { "epoch": 0.2971592070304517, "grad_norm": 0.5673478980857132, "learning_rate": 3.904785077047851e-05, "loss": 0.6617, "step": 10178 }, { "epoch": 0.29718840325829904, "grad_norm": 0.5664843242402968, "learning_rate": 3.904622871046229e-05, "loss": 0.6943, "step": 10179 }, { "epoch": 0.2972175994861464, "grad_norm": 0.5572833963403893, "learning_rate": 3.904460665044607e-05, "loss": 0.6316, "step": 10180 }, { "epoch": 0.29724679571399376, "grad_norm": 0.5289836809599681, "learning_rate": 3.904298459042985e-05, "loss": 0.6466, "step": 10181 }, { "epoch": 0.2972759919418411, "grad_norm": 0.5899991994255834, "learning_rate": 3.904136253041363e-05, "loss": 0.7248, "step": 10182 }, { "epoch": 0.2973051881696885, "grad_norm": 0.6257962139625154, "learning_rate": 3.9039740470397404e-05, "loss": 0.7668, "step": 10183 }, { "epoch": 0.29733438439753584, "grad_norm": 0.5812244327518717, "learning_rate": 3.9038118410381185e-05, "loss": 0.7212, "step": 10184 }, { "epoch": 0.2973635806253832, "grad_norm": 0.5470593938493367, "learning_rate": 3.903649635036497e-05, "loss": 0.6714, "step": 10185 }, { "epoch": 0.29739277685323057, "grad_norm": 0.522745407856514, "learning_rate": 3.903487429034874e-05, "loss": 0.6099, "step": 10186 }, { "epoch": 0.29742197308107793, "grad_norm": 0.521699182216461, "learning_rate": 3.9033252230332524e-05, "loss": 0.6382, "step": 10187 }, { "epoch": 0.2974511693089253, "grad_norm": 0.5414988699574994, "learning_rate": 3.90316301703163e-05, "loss": 0.6763, "step": 10188 }, { "epoch": 0.29748036553677265, "grad_norm": 0.7044514290917349, "learning_rate": 3.903000811030008e-05, "loss": 0.7193, "step": 10189 }, { "epoch": 0.29750956176462, "grad_norm": 0.5496734834029158, "learning_rate": 3.902838605028386e-05, "loss": 0.5996, "step": 10190 }, { "epoch": 0.2975387579924674, "grad_norm": 0.546235796366668, "learning_rate": 3.902676399026764e-05, "loss": 0.5885, "step": 10191 }, { "epoch": 0.29756795422031473, "grad_norm": 0.5683823962477329, "learning_rate": 3.902514193025142e-05, "loss": 0.6549, "step": 10192 }, { "epoch": 0.2975971504481621, "grad_norm": 0.5567368031207666, "learning_rate": 3.90235198702352e-05, "loss": 0.6729, "step": 10193 }, { "epoch": 0.29762634667600946, "grad_norm": 0.5508143863895881, "learning_rate": 3.902189781021898e-05, "loss": 0.6702, "step": 10194 }, { "epoch": 0.2976555429038568, "grad_norm": 0.6212239257042727, "learning_rate": 3.9020275750202765e-05, "loss": 0.7897, "step": 10195 }, { "epoch": 0.2976847391317042, "grad_norm": 0.6198558247099837, "learning_rate": 3.901865369018654e-05, "loss": 0.6975, "step": 10196 }, { "epoch": 0.29771393535955154, "grad_norm": 0.5411798253613759, "learning_rate": 3.901703163017032e-05, "loss": 0.6663, "step": 10197 }, { "epoch": 0.2977431315873989, "grad_norm": 0.6657336711773512, "learning_rate": 3.9015409570154096e-05, "loss": 0.7085, "step": 10198 }, { "epoch": 0.29777232781524626, "grad_norm": 0.589282626373865, "learning_rate": 3.901378751013788e-05, "loss": 0.6799, "step": 10199 }, { "epoch": 0.2978015240430936, "grad_norm": 0.547608461764562, "learning_rate": 3.901216545012166e-05, "loss": 0.625, "step": 10200 }, { "epoch": 0.297830720270941, "grad_norm": 0.5539970763206601, "learning_rate": 3.9010543390105435e-05, "loss": 0.6432, "step": 10201 }, { "epoch": 0.29785991649878835, "grad_norm": 0.5228938259045778, "learning_rate": 3.9008921330089217e-05, "loss": 0.5697, "step": 10202 }, { "epoch": 0.2978891127266357, "grad_norm": 0.564848841402978, "learning_rate": 3.900729927007299e-05, "loss": 0.6595, "step": 10203 }, { "epoch": 0.29791830895448307, "grad_norm": 0.5729761382835376, "learning_rate": 3.900567721005677e-05, "loss": 0.6634, "step": 10204 }, { "epoch": 0.29794750518233043, "grad_norm": 0.5401490989375505, "learning_rate": 3.9004055150040555e-05, "loss": 0.6131, "step": 10205 }, { "epoch": 0.2979767014101778, "grad_norm": 0.5441737434772187, "learning_rate": 3.900243309002433e-05, "loss": 0.6243, "step": 10206 }, { "epoch": 0.29800589763802515, "grad_norm": 0.5630215584316001, "learning_rate": 3.900081103000811e-05, "loss": 0.6599, "step": 10207 }, { "epoch": 0.2980350938658725, "grad_norm": 0.5323622626401452, "learning_rate": 3.899918896999189e-05, "loss": 0.6244, "step": 10208 }, { "epoch": 0.2980642900937199, "grad_norm": 0.5524754986381993, "learning_rate": 3.899756690997567e-05, "loss": 0.6202, "step": 10209 }, { "epoch": 0.29809348632156724, "grad_norm": 0.5908851254115641, "learning_rate": 3.899594484995945e-05, "loss": 0.6917, "step": 10210 }, { "epoch": 0.2981226825494146, "grad_norm": 0.5287888218637564, "learning_rate": 3.8994322789943225e-05, "loss": 0.5675, "step": 10211 }, { "epoch": 0.29815187877726196, "grad_norm": 0.5422292612726183, "learning_rate": 3.899270072992701e-05, "loss": 0.5848, "step": 10212 }, { "epoch": 0.2981810750051093, "grad_norm": 0.5046776442305436, "learning_rate": 3.899107866991079e-05, "loss": 0.5522, "step": 10213 }, { "epoch": 0.2982102712329567, "grad_norm": 0.5404671069463701, "learning_rate": 3.898945660989457e-05, "loss": 0.6374, "step": 10214 }, { "epoch": 0.29823946746080404, "grad_norm": 0.5743418299762434, "learning_rate": 3.898783454987835e-05, "loss": 0.6389, "step": 10215 }, { "epoch": 0.2982686636886514, "grad_norm": 0.5339286869852835, "learning_rate": 3.898621248986213e-05, "loss": 0.6143, "step": 10216 }, { "epoch": 0.29829785991649876, "grad_norm": 0.622456221450634, "learning_rate": 3.898459042984591e-05, "loss": 0.7335, "step": 10217 }, { "epoch": 0.2983270561443461, "grad_norm": 0.5317995587165126, "learning_rate": 3.8982968369829684e-05, "loss": 0.611, "step": 10218 }, { "epoch": 0.2983562523721935, "grad_norm": 0.5547836767843388, "learning_rate": 3.8981346309813466e-05, "loss": 0.6028, "step": 10219 }, { "epoch": 0.29838544860004085, "grad_norm": 0.5407511644619901, "learning_rate": 3.897972424979725e-05, "loss": 0.6219, "step": 10220 }, { "epoch": 0.2984146448278882, "grad_norm": 0.5593476494210906, "learning_rate": 3.897810218978102e-05, "loss": 0.6108, "step": 10221 }, { "epoch": 0.2984438410557356, "grad_norm": 0.518114478324848, "learning_rate": 3.8976480129764804e-05, "loss": 0.5913, "step": 10222 }, { "epoch": 0.298473037283583, "grad_norm": 0.5212119250102609, "learning_rate": 3.897485806974858e-05, "loss": 0.5621, "step": 10223 }, { "epoch": 0.29850223351143035, "grad_norm": 0.5056257526153843, "learning_rate": 3.897323600973236e-05, "loss": 0.5514, "step": 10224 }, { "epoch": 0.2985314297392777, "grad_norm": 0.6887825383510273, "learning_rate": 3.897161394971614e-05, "loss": 0.723, "step": 10225 }, { "epoch": 0.29856062596712507, "grad_norm": 0.5852157675583218, "learning_rate": 3.896999188969992e-05, "loss": 0.6674, "step": 10226 }, { "epoch": 0.29858982219497243, "grad_norm": 0.5342988709722442, "learning_rate": 3.89683698296837e-05, "loss": 0.6029, "step": 10227 }, { "epoch": 0.2986190184228198, "grad_norm": 0.5456073523192934, "learning_rate": 3.8966747769667475e-05, "loss": 0.6074, "step": 10228 }, { "epoch": 0.29864821465066715, "grad_norm": 0.48850401172434976, "learning_rate": 3.8965125709651256e-05, "loss": 0.5686, "step": 10229 }, { "epoch": 0.2986774108785145, "grad_norm": 0.530591165671735, "learning_rate": 3.896350364963504e-05, "loss": 0.5897, "step": 10230 }, { "epoch": 0.2987066071063619, "grad_norm": 0.5691173483893888, "learning_rate": 3.896188158961882e-05, "loss": 0.6895, "step": 10231 }, { "epoch": 0.29873580333420924, "grad_norm": 0.5932461374976906, "learning_rate": 3.89602595296026e-05, "loss": 0.7219, "step": 10232 }, { "epoch": 0.2987649995620566, "grad_norm": 0.598671939608239, "learning_rate": 3.895863746958638e-05, "loss": 0.7258, "step": 10233 }, { "epoch": 0.29879419578990396, "grad_norm": 0.5723287755377582, "learning_rate": 3.895701540957016e-05, "loss": 0.7278, "step": 10234 }, { "epoch": 0.2988233920177513, "grad_norm": 0.5144373464618913, "learning_rate": 3.895539334955394e-05, "loss": 0.5649, "step": 10235 }, { "epoch": 0.2988525882455987, "grad_norm": 0.5509571497241919, "learning_rate": 3.8953771289537715e-05, "loss": 0.6029, "step": 10236 }, { "epoch": 0.29888178447344604, "grad_norm": 0.4910798245378549, "learning_rate": 3.89521492295215e-05, "loss": 0.5117, "step": 10237 }, { "epoch": 0.2989109807012934, "grad_norm": 0.5207357003064479, "learning_rate": 3.895052716950527e-05, "loss": 0.5864, "step": 10238 }, { "epoch": 0.29894017692914077, "grad_norm": 0.5846825059455898, "learning_rate": 3.8948905109489054e-05, "loss": 0.7196, "step": 10239 }, { "epoch": 0.2989693731569881, "grad_norm": 0.5553560975112555, "learning_rate": 3.8947283049472835e-05, "loss": 0.6672, "step": 10240 }, { "epoch": 0.2989985693848355, "grad_norm": 0.6535839097181486, "learning_rate": 3.894566098945661e-05, "loss": 0.7359, "step": 10241 }, { "epoch": 0.29902776561268285, "grad_norm": 0.5114076680565328, "learning_rate": 3.894403892944039e-05, "loss": 0.5594, "step": 10242 }, { "epoch": 0.2990569618405302, "grad_norm": 0.5569785901251225, "learning_rate": 3.894241686942417e-05, "loss": 0.6555, "step": 10243 }, { "epoch": 0.29908615806837757, "grad_norm": 0.5593315873587611, "learning_rate": 3.894079480940795e-05, "loss": 0.6618, "step": 10244 }, { "epoch": 0.29911535429622493, "grad_norm": 0.5393918201901152, "learning_rate": 3.893917274939173e-05, "loss": 0.6523, "step": 10245 }, { "epoch": 0.2991445505240723, "grad_norm": 0.5487320356303391, "learning_rate": 3.8937550689375506e-05, "loss": 0.6613, "step": 10246 }, { "epoch": 0.29917374675191966, "grad_norm": 0.49788552673257547, "learning_rate": 3.893592862935929e-05, "loss": 0.5441, "step": 10247 }, { "epoch": 0.299202942979767, "grad_norm": 0.5440832205726617, "learning_rate": 3.893430656934306e-05, "loss": 0.6203, "step": 10248 }, { "epoch": 0.2992321392076144, "grad_norm": 0.5440953373059486, "learning_rate": 3.8932684509326844e-05, "loss": 0.6294, "step": 10249 }, { "epoch": 0.29926133543546174, "grad_norm": 0.5684693447548339, "learning_rate": 3.8931062449310626e-05, "loss": 0.6771, "step": 10250 }, { "epoch": 0.2992905316633091, "grad_norm": 0.5777683658934099, "learning_rate": 3.892944038929441e-05, "loss": 0.6749, "step": 10251 }, { "epoch": 0.29931972789115646, "grad_norm": 0.5379204991925101, "learning_rate": 3.892781832927819e-05, "loss": 0.6567, "step": 10252 }, { "epoch": 0.2993489241190038, "grad_norm": 0.4817224810412578, "learning_rate": 3.8926196269261964e-05, "loss": 0.5226, "step": 10253 }, { "epoch": 0.2993781203468512, "grad_norm": 0.6467866805929867, "learning_rate": 3.8924574209245746e-05, "loss": 0.6391, "step": 10254 }, { "epoch": 0.29940731657469855, "grad_norm": 0.6461359097808449, "learning_rate": 3.892295214922953e-05, "loss": 0.6926, "step": 10255 }, { "epoch": 0.2994365128025459, "grad_norm": 0.5965498282562159, "learning_rate": 3.89213300892133e-05, "loss": 0.6888, "step": 10256 }, { "epoch": 0.29946570903039327, "grad_norm": 0.5206526150113511, "learning_rate": 3.8919708029197085e-05, "loss": 0.6142, "step": 10257 }, { "epoch": 0.29949490525824063, "grad_norm": 0.5901494339652347, "learning_rate": 3.891808596918086e-05, "loss": 0.5856, "step": 10258 }, { "epoch": 0.299524101486088, "grad_norm": 0.5955769638171913, "learning_rate": 3.891646390916464e-05, "loss": 0.7037, "step": 10259 }, { "epoch": 0.29955329771393535, "grad_norm": 0.5346816150426368, "learning_rate": 3.891484184914842e-05, "loss": 0.6164, "step": 10260 }, { "epoch": 0.2995824939417827, "grad_norm": 0.5361454208092639, "learning_rate": 3.89132197891322e-05, "loss": 0.6645, "step": 10261 }, { "epoch": 0.2996116901696301, "grad_norm": 0.5922523927816118, "learning_rate": 3.891159772911598e-05, "loss": 0.6868, "step": 10262 }, { "epoch": 0.29964088639747744, "grad_norm": 0.5062618677861784, "learning_rate": 3.8909975669099755e-05, "loss": 0.5378, "step": 10263 }, { "epoch": 0.2996700826253248, "grad_norm": 0.5773129635588253, "learning_rate": 3.890835360908354e-05, "loss": 0.6618, "step": 10264 }, { "epoch": 0.29969927885317216, "grad_norm": 0.5284298489568302, "learning_rate": 3.890673154906732e-05, "loss": 0.6057, "step": 10265 }, { "epoch": 0.2997284750810195, "grad_norm": 0.5594507025177602, "learning_rate": 3.8905109489051093e-05, "loss": 0.6678, "step": 10266 }, { "epoch": 0.2997576713088669, "grad_norm": 0.5862737280031781, "learning_rate": 3.8903487429034875e-05, "loss": 0.7183, "step": 10267 }, { "epoch": 0.29978686753671424, "grad_norm": 0.5731378605389005, "learning_rate": 3.890186536901865e-05, "loss": 0.6814, "step": 10268 }, { "epoch": 0.2998160637645616, "grad_norm": 0.6733524321554935, "learning_rate": 3.890024330900244e-05, "loss": 0.6626, "step": 10269 }, { "epoch": 0.29984525999240896, "grad_norm": 0.5420025376443351, "learning_rate": 3.8898621248986214e-05, "loss": 0.6501, "step": 10270 }, { "epoch": 0.2998744562202563, "grad_norm": 0.545782566561573, "learning_rate": 3.8896999188969995e-05, "loss": 0.6573, "step": 10271 }, { "epoch": 0.2999036524481037, "grad_norm": 0.5595285746187271, "learning_rate": 3.889537712895378e-05, "loss": 0.6774, "step": 10272 }, { "epoch": 0.29993284867595105, "grad_norm": 0.5737615286372207, "learning_rate": 3.889375506893755e-05, "loss": 0.7062, "step": 10273 }, { "epoch": 0.2999620449037984, "grad_norm": 0.5304816424833058, "learning_rate": 3.8892133008921334e-05, "loss": 0.6052, "step": 10274 }, { "epoch": 0.29999124113164577, "grad_norm": 0.5037714031646603, "learning_rate": 3.889051094890511e-05, "loss": 0.5121, "step": 10275 }, { "epoch": 0.30002043735949313, "grad_norm": 0.5330553656097553, "learning_rate": 3.888888888888889e-05, "loss": 0.5887, "step": 10276 }, { "epoch": 0.3000496335873405, "grad_norm": 0.5692247329724037, "learning_rate": 3.888726682887267e-05, "loss": 0.663, "step": 10277 }, { "epoch": 0.30007882981518785, "grad_norm": 0.5938390394349189, "learning_rate": 3.888564476885645e-05, "loss": 0.7248, "step": 10278 }, { "epoch": 0.3001080260430352, "grad_norm": 0.48788240175579006, "learning_rate": 3.888402270884023e-05, "loss": 0.5666, "step": 10279 }, { "epoch": 0.3001372222708826, "grad_norm": 0.5478627498487854, "learning_rate": 3.888240064882401e-05, "loss": 0.6222, "step": 10280 }, { "epoch": 0.30016641849872994, "grad_norm": 0.5799636074504266, "learning_rate": 3.8880778588807786e-05, "loss": 0.7033, "step": 10281 }, { "epoch": 0.30019561472657735, "grad_norm": 0.533795313774998, "learning_rate": 3.887915652879157e-05, "loss": 0.6438, "step": 10282 }, { "epoch": 0.3002248109544247, "grad_norm": 0.5672926732189787, "learning_rate": 3.887753446877534e-05, "loss": 0.6697, "step": 10283 }, { "epoch": 0.3002540071822721, "grad_norm": 0.5696329930525397, "learning_rate": 3.8875912408759125e-05, "loss": 0.7036, "step": 10284 }, { "epoch": 0.30028320341011944, "grad_norm": 0.5412704615329962, "learning_rate": 3.8874290348742906e-05, "loss": 0.6582, "step": 10285 }, { "epoch": 0.3003123996379668, "grad_norm": 0.5949220042305439, "learning_rate": 3.887266828872668e-05, "loss": 0.6586, "step": 10286 }, { "epoch": 0.30034159586581416, "grad_norm": 0.5301444761819172, "learning_rate": 3.887104622871046e-05, "loss": 0.5594, "step": 10287 }, { "epoch": 0.3003707920936615, "grad_norm": 0.5601877369164674, "learning_rate": 3.8869424168694245e-05, "loss": 0.6733, "step": 10288 }, { "epoch": 0.3003999883215089, "grad_norm": 0.5771673730456697, "learning_rate": 3.8867802108678027e-05, "loss": 0.712, "step": 10289 }, { "epoch": 0.30042918454935624, "grad_norm": 0.520773131354116, "learning_rate": 3.88661800486618e-05, "loss": 0.6086, "step": 10290 }, { "epoch": 0.3004583807772036, "grad_norm": 0.6571919680658118, "learning_rate": 3.886455798864558e-05, "loss": 0.6613, "step": 10291 }, { "epoch": 0.30048757700505097, "grad_norm": 0.5481410266408033, "learning_rate": 3.8862935928629365e-05, "loss": 0.6387, "step": 10292 }, { "epoch": 0.3005167732328983, "grad_norm": 0.5498939987106044, "learning_rate": 3.886131386861314e-05, "loss": 0.6729, "step": 10293 }, { "epoch": 0.3005459694607457, "grad_norm": 0.5738063926499989, "learning_rate": 3.885969180859692e-05, "loss": 0.7037, "step": 10294 }, { "epoch": 0.30057516568859305, "grad_norm": 0.5982368112054935, "learning_rate": 3.88580697485807e-05, "loss": 0.6669, "step": 10295 }, { "epoch": 0.3006043619164404, "grad_norm": 0.5492685716549965, "learning_rate": 3.885644768856448e-05, "loss": 0.6503, "step": 10296 }, { "epoch": 0.30063355814428777, "grad_norm": 0.5720516533652262, "learning_rate": 3.885482562854826e-05, "loss": 0.675, "step": 10297 }, { "epoch": 0.30066275437213513, "grad_norm": 0.5295480992729202, "learning_rate": 3.8853203568532035e-05, "loss": 0.6081, "step": 10298 }, { "epoch": 0.3006919505999825, "grad_norm": 0.5394492324916079, "learning_rate": 3.885158150851582e-05, "loss": 0.6794, "step": 10299 }, { "epoch": 0.30072114682782985, "grad_norm": 0.5386496677334665, "learning_rate": 3.88499594484996e-05, "loss": 0.6691, "step": 10300 }, { "epoch": 0.3007503430556772, "grad_norm": 0.5613809547679176, "learning_rate": 3.8848337388483374e-05, "loss": 0.6387, "step": 10301 }, { "epoch": 0.3007795392835246, "grad_norm": 0.517669626990422, "learning_rate": 3.8846715328467156e-05, "loss": 0.5989, "step": 10302 }, { "epoch": 0.30080873551137194, "grad_norm": 0.5359285047970787, "learning_rate": 3.884509326845093e-05, "loss": 0.6097, "step": 10303 }, { "epoch": 0.3008379317392193, "grad_norm": 0.5298684134805696, "learning_rate": 3.884347120843471e-05, "loss": 0.6215, "step": 10304 }, { "epoch": 0.30086712796706666, "grad_norm": 0.5898246325343619, "learning_rate": 3.8841849148418494e-05, "loss": 0.648, "step": 10305 }, { "epoch": 0.300896324194914, "grad_norm": 0.5555706291551882, "learning_rate": 3.884022708840227e-05, "loss": 0.6547, "step": 10306 }, { "epoch": 0.3009255204227614, "grad_norm": 0.5273029032406906, "learning_rate": 3.883860502838606e-05, "loss": 0.5809, "step": 10307 }, { "epoch": 0.30095471665060874, "grad_norm": 0.5530224307210219, "learning_rate": 3.883698296836983e-05, "loss": 0.6316, "step": 10308 }, { "epoch": 0.3009839128784561, "grad_norm": 0.526343186720331, "learning_rate": 3.8835360908353614e-05, "loss": 0.61, "step": 10309 }, { "epoch": 0.30101310910630347, "grad_norm": 0.6024104041925814, "learning_rate": 3.883373884833739e-05, "loss": 0.7022, "step": 10310 }, { "epoch": 0.30104230533415083, "grad_norm": 0.5377195884188629, "learning_rate": 3.883211678832117e-05, "loss": 0.6168, "step": 10311 }, { "epoch": 0.3010715015619982, "grad_norm": 0.565254860582763, "learning_rate": 3.883049472830495e-05, "loss": 0.6356, "step": 10312 }, { "epoch": 0.30110069778984555, "grad_norm": 0.5773915203594518, "learning_rate": 3.882887266828873e-05, "loss": 0.6477, "step": 10313 }, { "epoch": 0.3011298940176929, "grad_norm": 0.5562407557918989, "learning_rate": 3.882725060827251e-05, "loss": 0.6891, "step": 10314 }, { "epoch": 0.3011590902455403, "grad_norm": 0.533415236740489, "learning_rate": 3.8825628548256285e-05, "loss": 0.6036, "step": 10315 }, { "epoch": 0.30118828647338763, "grad_norm": 0.5617454745641332, "learning_rate": 3.8824006488240066e-05, "loss": 0.6515, "step": 10316 }, { "epoch": 0.301217482701235, "grad_norm": 0.5440527362577107, "learning_rate": 3.882238442822385e-05, "loss": 0.6459, "step": 10317 }, { "epoch": 0.30124667892908236, "grad_norm": 0.5760203270988316, "learning_rate": 3.882076236820762e-05, "loss": 0.7088, "step": 10318 }, { "epoch": 0.3012758751569297, "grad_norm": 0.590183129785662, "learning_rate": 3.8819140308191405e-05, "loss": 0.663, "step": 10319 }, { "epoch": 0.3013050713847771, "grad_norm": 0.5201868540518318, "learning_rate": 3.881751824817518e-05, "loss": 0.6088, "step": 10320 }, { "epoch": 0.30133426761262444, "grad_norm": 0.5764735266669552, "learning_rate": 3.881589618815896e-05, "loss": 0.7549, "step": 10321 }, { "epoch": 0.3013634638404718, "grad_norm": 0.5717249917571614, "learning_rate": 3.881427412814274e-05, "loss": 0.7027, "step": 10322 }, { "epoch": 0.30139266006831916, "grad_norm": 0.6238204500138316, "learning_rate": 3.881265206812652e-05, "loss": 0.6343, "step": 10323 }, { "epoch": 0.3014218562961665, "grad_norm": 0.5303309472005443, "learning_rate": 3.88110300081103e-05, "loss": 0.6118, "step": 10324 }, { "epoch": 0.3014510525240139, "grad_norm": 0.5363583710287523, "learning_rate": 3.880940794809408e-05, "loss": 0.6188, "step": 10325 }, { "epoch": 0.30148024875186125, "grad_norm": 0.5469754094702505, "learning_rate": 3.8807785888077864e-05, "loss": 0.6729, "step": 10326 }, { "epoch": 0.3015094449797086, "grad_norm": 0.5627463509713353, "learning_rate": 3.8806163828061645e-05, "loss": 0.6675, "step": 10327 }, { "epoch": 0.30153864120755597, "grad_norm": 0.5287833790085631, "learning_rate": 3.880454176804542e-05, "loss": 0.6092, "step": 10328 }, { "epoch": 0.30156783743540333, "grad_norm": 0.5920842146583375, "learning_rate": 3.88029197080292e-05, "loss": 0.6923, "step": 10329 }, { "epoch": 0.3015970336632507, "grad_norm": 0.5777991233195303, "learning_rate": 3.880129764801298e-05, "loss": 0.6862, "step": 10330 }, { "epoch": 0.30162622989109805, "grad_norm": 0.5189080897558414, "learning_rate": 3.879967558799676e-05, "loss": 0.6171, "step": 10331 }, { "epoch": 0.3016554261189454, "grad_norm": 0.5303325878625803, "learning_rate": 3.879805352798054e-05, "loss": 0.6471, "step": 10332 }, { "epoch": 0.3016846223467928, "grad_norm": 0.5801681457496503, "learning_rate": 3.8796431467964316e-05, "loss": 0.6672, "step": 10333 }, { "epoch": 0.30171381857464014, "grad_norm": 0.580872986551202, "learning_rate": 3.87948094079481e-05, "loss": 0.698, "step": 10334 }, { "epoch": 0.3017430148024875, "grad_norm": 0.528356516490407, "learning_rate": 3.879318734793187e-05, "loss": 0.6255, "step": 10335 }, { "epoch": 0.30177221103033486, "grad_norm": 0.5953328060889858, "learning_rate": 3.8791565287915654e-05, "loss": 0.7307, "step": 10336 }, { "epoch": 0.3018014072581822, "grad_norm": 0.555754477598537, "learning_rate": 3.8789943227899436e-05, "loss": 0.6982, "step": 10337 }, { "epoch": 0.3018306034860296, "grad_norm": 0.5650147860690962, "learning_rate": 3.878832116788321e-05, "loss": 0.7008, "step": 10338 }, { "epoch": 0.30185979971387694, "grad_norm": 0.5925530357194125, "learning_rate": 3.878669910786699e-05, "loss": 0.6968, "step": 10339 }, { "epoch": 0.3018889959417243, "grad_norm": 0.5648577099122672, "learning_rate": 3.878507704785077e-05, "loss": 0.6106, "step": 10340 }, { "epoch": 0.30191819216957166, "grad_norm": 0.516695211395424, "learning_rate": 3.878345498783455e-05, "loss": 0.5904, "step": 10341 }, { "epoch": 0.3019473883974191, "grad_norm": 0.6073799824533421, "learning_rate": 3.878183292781833e-05, "loss": 0.7121, "step": 10342 }, { "epoch": 0.30197658462526644, "grad_norm": 0.5537209098354383, "learning_rate": 3.8780210867802106e-05, "loss": 0.662, "step": 10343 }, { "epoch": 0.3020057808531138, "grad_norm": 0.5294645476723074, "learning_rate": 3.877858880778589e-05, "loss": 0.5866, "step": 10344 }, { "epoch": 0.30203497708096116, "grad_norm": 0.614560084610254, "learning_rate": 3.877696674776967e-05, "loss": 0.7936, "step": 10345 }, { "epoch": 0.3020641733088085, "grad_norm": 0.5733462672872129, "learning_rate": 3.877534468775345e-05, "loss": 0.7095, "step": 10346 }, { "epoch": 0.3020933695366559, "grad_norm": 0.5065377938678035, "learning_rate": 3.877372262773723e-05, "loss": 0.5708, "step": 10347 }, { "epoch": 0.30212256576450325, "grad_norm": 0.5367905763494467, "learning_rate": 3.877210056772101e-05, "loss": 0.6042, "step": 10348 }, { "epoch": 0.3021517619923506, "grad_norm": 0.5383496427397392, "learning_rate": 3.877047850770479e-05, "loss": 0.6358, "step": 10349 }, { "epoch": 0.30218095822019797, "grad_norm": 0.5350800399983955, "learning_rate": 3.8768856447688565e-05, "loss": 0.6076, "step": 10350 }, { "epoch": 0.30221015444804533, "grad_norm": 0.5246328916422116, "learning_rate": 3.876723438767235e-05, "loss": 0.5647, "step": 10351 }, { "epoch": 0.3022393506758927, "grad_norm": 0.6040796398087692, "learning_rate": 3.876561232765613e-05, "loss": 0.631, "step": 10352 }, { "epoch": 0.30226854690374005, "grad_norm": 0.5176879334363085, "learning_rate": 3.8763990267639903e-05, "loss": 0.5892, "step": 10353 }, { "epoch": 0.3022977431315874, "grad_norm": 0.563444096289748, "learning_rate": 3.8762368207623685e-05, "loss": 0.6411, "step": 10354 }, { "epoch": 0.3023269393594348, "grad_norm": 0.5681824088995119, "learning_rate": 3.876074614760746e-05, "loss": 0.6893, "step": 10355 }, { "epoch": 0.30235613558728214, "grad_norm": 0.541919637841291, "learning_rate": 3.875912408759124e-05, "loss": 0.6675, "step": 10356 }, { "epoch": 0.3023853318151295, "grad_norm": 0.5838207317125863, "learning_rate": 3.8757502027575024e-05, "loss": 0.7174, "step": 10357 }, { "epoch": 0.30241452804297686, "grad_norm": 0.558791967335637, "learning_rate": 3.87558799675588e-05, "loss": 0.6515, "step": 10358 }, { "epoch": 0.3024437242708242, "grad_norm": 0.5234314633887098, "learning_rate": 3.875425790754258e-05, "loss": 0.6506, "step": 10359 }, { "epoch": 0.3024729204986716, "grad_norm": 0.5238089327468978, "learning_rate": 3.8752635847526355e-05, "loss": 0.583, "step": 10360 }, { "epoch": 0.30250211672651894, "grad_norm": 0.5595410615941736, "learning_rate": 3.875101378751014e-05, "loss": 0.6255, "step": 10361 }, { "epoch": 0.3025313129543663, "grad_norm": 0.5654517771345557, "learning_rate": 3.874939172749392e-05, "loss": 0.6205, "step": 10362 }, { "epoch": 0.30256050918221367, "grad_norm": 0.566325290206208, "learning_rate": 3.8747769667477694e-05, "loss": 0.6374, "step": 10363 }, { "epoch": 0.302589705410061, "grad_norm": 0.6230240848704509, "learning_rate": 3.874614760746148e-05, "loss": 0.7402, "step": 10364 }, { "epoch": 0.3026189016379084, "grad_norm": 0.5249809035760211, "learning_rate": 3.874452554744526e-05, "loss": 0.6308, "step": 10365 }, { "epoch": 0.30264809786575575, "grad_norm": 0.5913953691143975, "learning_rate": 3.874290348742904e-05, "loss": 0.754, "step": 10366 }, { "epoch": 0.3026772940936031, "grad_norm": 0.5460146075570836, "learning_rate": 3.874128142741282e-05, "loss": 0.6106, "step": 10367 }, { "epoch": 0.30270649032145047, "grad_norm": 0.526456257887933, "learning_rate": 3.8739659367396596e-05, "loss": 0.6428, "step": 10368 }, { "epoch": 0.30273568654929783, "grad_norm": 0.5063594446261291, "learning_rate": 3.873803730738038e-05, "loss": 0.5532, "step": 10369 }, { "epoch": 0.3027648827771452, "grad_norm": 0.49933773817627897, "learning_rate": 3.873641524736415e-05, "loss": 0.5434, "step": 10370 }, { "epoch": 0.30279407900499256, "grad_norm": 0.5596671677113053, "learning_rate": 3.8734793187347935e-05, "loss": 0.6424, "step": 10371 }, { "epoch": 0.3028232752328399, "grad_norm": 0.5950245784672153, "learning_rate": 3.8733171127331716e-05, "loss": 0.7147, "step": 10372 }, { "epoch": 0.3028524714606873, "grad_norm": 0.5784234366296084, "learning_rate": 3.873154906731549e-05, "loss": 0.7375, "step": 10373 }, { "epoch": 0.30288166768853464, "grad_norm": 0.579634395215435, "learning_rate": 3.872992700729927e-05, "loss": 0.7317, "step": 10374 }, { "epoch": 0.302910863916382, "grad_norm": 0.6017758517868261, "learning_rate": 3.872830494728305e-05, "loss": 0.6982, "step": 10375 }, { "epoch": 0.30294006014422936, "grad_norm": 0.5669023820493058, "learning_rate": 3.872668288726683e-05, "loss": 0.6588, "step": 10376 }, { "epoch": 0.3029692563720767, "grad_norm": 0.5121778971044638, "learning_rate": 3.872506082725061e-05, "loss": 0.5641, "step": 10377 }, { "epoch": 0.3029984525999241, "grad_norm": 0.5606262018397588, "learning_rate": 3.8723438767234387e-05, "loss": 0.6853, "step": 10378 }, { "epoch": 0.30302764882777145, "grad_norm": 0.5868957375475773, "learning_rate": 3.872181670721817e-05, "loss": 0.6666, "step": 10379 }, { "epoch": 0.3030568450556188, "grad_norm": 0.5259863476669266, "learning_rate": 3.872019464720194e-05, "loss": 0.6449, "step": 10380 }, { "epoch": 0.30308604128346617, "grad_norm": 0.5433442778265295, "learning_rate": 3.8718572587185725e-05, "loss": 0.6512, "step": 10381 }, { "epoch": 0.30311523751131353, "grad_norm": 0.5571576606759745, "learning_rate": 3.871695052716951e-05, "loss": 0.7215, "step": 10382 }, { "epoch": 0.3031444337391609, "grad_norm": 0.69198316130568, "learning_rate": 3.871532846715329e-05, "loss": 0.718, "step": 10383 }, { "epoch": 0.30317362996700825, "grad_norm": 0.4996448748696858, "learning_rate": 3.871370640713707e-05, "loss": 0.5876, "step": 10384 }, { "epoch": 0.3032028261948556, "grad_norm": 0.5962094308132508, "learning_rate": 3.8712084347120845e-05, "loss": 0.6684, "step": 10385 }, { "epoch": 0.303232022422703, "grad_norm": 0.5844232132219264, "learning_rate": 3.871046228710463e-05, "loss": 0.6964, "step": 10386 }, { "epoch": 0.30326121865055033, "grad_norm": 0.5225700227187535, "learning_rate": 3.87088402270884e-05, "loss": 0.5374, "step": 10387 }, { "epoch": 0.3032904148783977, "grad_norm": 0.5112283077418808, "learning_rate": 3.8707218167072184e-05, "loss": 0.5542, "step": 10388 }, { "epoch": 0.30331961110624506, "grad_norm": 0.5522750524880704, "learning_rate": 3.8705596107055966e-05, "loss": 0.6631, "step": 10389 }, { "epoch": 0.3033488073340924, "grad_norm": 0.581701823574866, "learning_rate": 3.870397404703974e-05, "loss": 0.7048, "step": 10390 }, { "epoch": 0.3033780035619398, "grad_norm": 0.5968146370367501, "learning_rate": 3.870235198702352e-05, "loss": 0.6791, "step": 10391 }, { "epoch": 0.30340719978978714, "grad_norm": 0.5465874191852369, "learning_rate": 3.8700729927007304e-05, "loss": 0.6276, "step": 10392 }, { "epoch": 0.3034363960176345, "grad_norm": 0.5417221166894358, "learning_rate": 3.869910786699108e-05, "loss": 0.6515, "step": 10393 }, { "epoch": 0.30346559224548186, "grad_norm": 0.5058853888179571, "learning_rate": 3.869748580697486e-05, "loss": 0.5794, "step": 10394 }, { "epoch": 0.3034947884733292, "grad_norm": 0.5870984665585295, "learning_rate": 3.8695863746958636e-05, "loss": 0.7026, "step": 10395 }, { "epoch": 0.3035239847011766, "grad_norm": 0.5821102577972522, "learning_rate": 3.869424168694242e-05, "loss": 0.7629, "step": 10396 }, { "epoch": 0.30355318092902395, "grad_norm": 0.5293465177605183, "learning_rate": 3.86926196269262e-05, "loss": 0.6185, "step": 10397 }, { "epoch": 0.3035823771568713, "grad_norm": 0.5296472957812833, "learning_rate": 3.8690997566909974e-05, "loss": 0.6095, "step": 10398 }, { "epoch": 0.30361157338471867, "grad_norm": 0.5542902524518407, "learning_rate": 3.8689375506893756e-05, "loss": 0.6444, "step": 10399 }, { "epoch": 0.30364076961256603, "grad_norm": 0.5089769260782983, "learning_rate": 3.868775344687753e-05, "loss": 0.5499, "step": 10400 }, { "epoch": 0.3036699658404134, "grad_norm": 0.540193618458386, "learning_rate": 3.868613138686132e-05, "loss": 0.6481, "step": 10401 }, { "epoch": 0.3036991620682608, "grad_norm": 0.5952576527673414, "learning_rate": 3.8684509326845095e-05, "loss": 0.722, "step": 10402 }, { "epoch": 0.30372835829610817, "grad_norm": 0.6186553671088341, "learning_rate": 3.8682887266828876e-05, "loss": 0.6733, "step": 10403 }, { "epoch": 0.30375755452395553, "grad_norm": 0.56856058626469, "learning_rate": 3.868126520681266e-05, "loss": 0.6789, "step": 10404 }, { "epoch": 0.3037867507518029, "grad_norm": 0.6127686535182352, "learning_rate": 3.867964314679643e-05, "loss": 0.6567, "step": 10405 }, { "epoch": 0.30381594697965025, "grad_norm": 0.5357162773319235, "learning_rate": 3.8678021086780215e-05, "loss": 0.6643, "step": 10406 }, { "epoch": 0.3038451432074976, "grad_norm": 0.5012227207047274, "learning_rate": 3.867639902676399e-05, "loss": 0.6181, "step": 10407 }, { "epoch": 0.303874339435345, "grad_norm": 0.6136414387423298, "learning_rate": 3.867477696674777e-05, "loss": 0.6527, "step": 10408 }, { "epoch": 0.30390353566319234, "grad_norm": 0.5982739611013025, "learning_rate": 3.8673154906731553e-05, "loss": 0.5984, "step": 10409 }, { "epoch": 0.3039327318910397, "grad_norm": 0.5703201789419741, "learning_rate": 3.867153284671533e-05, "loss": 0.6766, "step": 10410 }, { "epoch": 0.30396192811888706, "grad_norm": 0.5554840232610112, "learning_rate": 3.866991078669911e-05, "loss": 0.6863, "step": 10411 }, { "epoch": 0.3039911243467344, "grad_norm": 0.5905205731606745, "learning_rate": 3.866828872668289e-05, "loss": 0.7552, "step": 10412 }, { "epoch": 0.3040203205745818, "grad_norm": 0.581447436071374, "learning_rate": 3.866666666666667e-05, "loss": 0.7123, "step": 10413 }, { "epoch": 0.30404951680242914, "grad_norm": 0.6060850695946413, "learning_rate": 3.866504460665045e-05, "loss": 0.711, "step": 10414 }, { "epoch": 0.3040787130302765, "grad_norm": 0.4996124595785889, "learning_rate": 3.8663422546634224e-05, "loss": 0.5623, "step": 10415 }, { "epoch": 0.30410790925812387, "grad_norm": 0.5581828322659922, "learning_rate": 3.8661800486618005e-05, "loss": 0.6464, "step": 10416 }, { "epoch": 0.3041371054859712, "grad_norm": 0.5059304744465807, "learning_rate": 3.866017842660179e-05, "loss": 0.5735, "step": 10417 }, { "epoch": 0.3041663017138186, "grad_norm": 0.5677626676376242, "learning_rate": 3.865855636658556e-05, "loss": 0.5846, "step": 10418 }, { "epoch": 0.30419549794166595, "grad_norm": 0.5531052505594078, "learning_rate": 3.8656934306569344e-05, "loss": 0.6852, "step": 10419 }, { "epoch": 0.3042246941695133, "grad_norm": 0.5301055214788415, "learning_rate": 3.8655312246553126e-05, "loss": 0.5953, "step": 10420 }, { "epoch": 0.30425389039736067, "grad_norm": 0.5601758633364039, "learning_rate": 3.865369018653691e-05, "loss": 0.6647, "step": 10421 }, { "epoch": 0.30428308662520803, "grad_norm": 0.5052487065395848, "learning_rate": 3.865206812652068e-05, "loss": 0.587, "step": 10422 }, { "epoch": 0.3043122828530554, "grad_norm": 0.5303786017521688, "learning_rate": 3.8650446066504464e-05, "loss": 0.5862, "step": 10423 }, { "epoch": 0.30434147908090275, "grad_norm": 0.5519167228758927, "learning_rate": 3.8648824006488246e-05, "loss": 0.6817, "step": 10424 }, { "epoch": 0.3043706753087501, "grad_norm": 0.5972465828537864, "learning_rate": 3.864720194647202e-05, "loss": 0.6842, "step": 10425 }, { "epoch": 0.3043998715365975, "grad_norm": 0.5756174939141508, "learning_rate": 3.86455798864558e-05, "loss": 0.6604, "step": 10426 }, { "epoch": 0.30442906776444484, "grad_norm": 0.5253974444988573, "learning_rate": 3.864395782643958e-05, "loss": 0.5514, "step": 10427 }, { "epoch": 0.3044582639922922, "grad_norm": 0.6554073244229721, "learning_rate": 3.864233576642336e-05, "loss": 0.6939, "step": 10428 }, { "epoch": 0.30448746022013956, "grad_norm": 0.5480399065611798, "learning_rate": 3.864071370640714e-05, "loss": 0.666, "step": 10429 }, { "epoch": 0.3045166564479869, "grad_norm": 0.5522815509847541, "learning_rate": 3.8639091646390916e-05, "loss": 0.5901, "step": 10430 }, { "epoch": 0.3045458526758343, "grad_norm": 0.5533374776732474, "learning_rate": 3.86374695863747e-05, "loss": 0.6594, "step": 10431 }, { "epoch": 0.30457504890368164, "grad_norm": 0.5563430315756565, "learning_rate": 3.863584752635847e-05, "loss": 0.6254, "step": 10432 }, { "epoch": 0.304604245131529, "grad_norm": 0.5657248796155291, "learning_rate": 3.8634225466342255e-05, "loss": 0.6751, "step": 10433 }, { "epoch": 0.30463344135937637, "grad_norm": 0.610199596485793, "learning_rate": 3.8632603406326036e-05, "loss": 0.7128, "step": 10434 }, { "epoch": 0.30466263758722373, "grad_norm": 0.5480234036918772, "learning_rate": 3.863098134630981e-05, "loss": 0.6696, "step": 10435 }, { "epoch": 0.3046918338150711, "grad_norm": 0.6678543065488058, "learning_rate": 3.862935928629359e-05, "loss": 0.8159, "step": 10436 }, { "epoch": 0.30472103004291845, "grad_norm": 0.6010443154824928, "learning_rate": 3.8627737226277375e-05, "loss": 0.6321, "step": 10437 }, { "epoch": 0.3047502262707658, "grad_norm": 0.5993170328034656, "learning_rate": 3.862611516626115e-05, "loss": 0.7148, "step": 10438 }, { "epoch": 0.3047794224986132, "grad_norm": 0.5075093647084337, "learning_rate": 3.862449310624494e-05, "loss": 0.5134, "step": 10439 }, { "epoch": 0.30480861872646053, "grad_norm": 0.6095035969172042, "learning_rate": 3.8622871046228713e-05, "loss": 0.7256, "step": 10440 }, { "epoch": 0.3048378149543079, "grad_norm": 0.5383900092191506, "learning_rate": 3.8621248986212495e-05, "loss": 0.6126, "step": 10441 }, { "epoch": 0.30486701118215526, "grad_norm": 0.5832982068303676, "learning_rate": 3.861962692619627e-05, "loss": 0.6516, "step": 10442 }, { "epoch": 0.3048962074100026, "grad_norm": 0.5471069978457762, "learning_rate": 3.861800486618005e-05, "loss": 0.658, "step": 10443 }, { "epoch": 0.30492540363785, "grad_norm": 0.5558112886878274, "learning_rate": 3.8616382806163834e-05, "loss": 0.6255, "step": 10444 }, { "epoch": 0.30495459986569734, "grad_norm": 0.5386496236374406, "learning_rate": 3.861476074614761e-05, "loss": 0.5974, "step": 10445 }, { "epoch": 0.3049837960935447, "grad_norm": 0.5657431694462122, "learning_rate": 3.861313868613139e-05, "loss": 0.6335, "step": 10446 }, { "epoch": 0.30501299232139206, "grad_norm": 0.5702147705043579, "learning_rate": 3.8611516626115165e-05, "loss": 0.6665, "step": 10447 }, { "epoch": 0.3050421885492394, "grad_norm": 0.5722323472635824, "learning_rate": 3.860989456609895e-05, "loss": 0.6444, "step": 10448 }, { "epoch": 0.3050713847770868, "grad_norm": 0.5151867001692757, "learning_rate": 3.860827250608273e-05, "loss": 0.5899, "step": 10449 }, { "epoch": 0.30510058100493415, "grad_norm": 0.5192093933415403, "learning_rate": 3.8606650446066504e-05, "loss": 0.6199, "step": 10450 }, { "epoch": 0.3051297772327815, "grad_norm": 0.5627957615148447, "learning_rate": 3.8605028386050286e-05, "loss": 0.6087, "step": 10451 }, { "epoch": 0.30515897346062887, "grad_norm": 0.5423598180355433, "learning_rate": 3.860340632603406e-05, "loss": 0.6364, "step": 10452 }, { "epoch": 0.30518816968847623, "grad_norm": 0.5194106522670345, "learning_rate": 3.860178426601784e-05, "loss": 0.6276, "step": 10453 }, { "epoch": 0.3052173659163236, "grad_norm": 0.5269780359082452, "learning_rate": 3.8600162206001624e-05, "loss": 0.5991, "step": 10454 }, { "epoch": 0.30524656214417095, "grad_norm": 0.5480764475105604, "learning_rate": 3.85985401459854e-05, "loss": 0.6776, "step": 10455 }, { "epoch": 0.3052757583720183, "grad_norm": 0.5637306195624694, "learning_rate": 3.859691808596918e-05, "loss": 0.6759, "step": 10456 }, { "epoch": 0.3053049545998657, "grad_norm": 0.534077827597895, "learning_rate": 3.859529602595296e-05, "loss": 0.609, "step": 10457 }, { "epoch": 0.30533415082771304, "grad_norm": 0.5057178711436682, "learning_rate": 3.8593673965936745e-05, "loss": 0.5783, "step": 10458 }, { "epoch": 0.3053633470555604, "grad_norm": 0.5557972416262285, "learning_rate": 3.8592051905920526e-05, "loss": 0.6782, "step": 10459 }, { "epoch": 0.30539254328340776, "grad_norm": 0.5268008165600169, "learning_rate": 3.85904298459043e-05, "loss": 0.5969, "step": 10460 }, { "epoch": 0.3054217395112551, "grad_norm": 0.48903754971168356, "learning_rate": 3.858880778588808e-05, "loss": 0.5091, "step": 10461 }, { "epoch": 0.3054509357391025, "grad_norm": 0.541031492680922, "learning_rate": 3.858718572587186e-05, "loss": 0.6096, "step": 10462 }, { "epoch": 0.3054801319669499, "grad_norm": 0.5360279006128374, "learning_rate": 3.858556366585564e-05, "loss": 0.6151, "step": 10463 }, { "epoch": 0.30550932819479726, "grad_norm": 0.549876591433359, "learning_rate": 3.858394160583942e-05, "loss": 0.6819, "step": 10464 }, { "epoch": 0.3055385244226446, "grad_norm": 0.522842604424631, "learning_rate": 3.8582319545823197e-05, "loss": 0.6287, "step": 10465 }, { "epoch": 0.305567720650492, "grad_norm": 0.598964292577544, "learning_rate": 3.858069748580698e-05, "loss": 0.7165, "step": 10466 }, { "epoch": 0.30559691687833934, "grad_norm": 0.5744999774362631, "learning_rate": 3.857907542579075e-05, "loss": 0.7227, "step": 10467 }, { "epoch": 0.3056261131061867, "grad_norm": 0.5206617299032994, "learning_rate": 3.8577453365774535e-05, "loss": 0.6235, "step": 10468 }, { "epoch": 0.30565530933403406, "grad_norm": 0.5567573072731854, "learning_rate": 3.857583130575832e-05, "loss": 0.6308, "step": 10469 }, { "epoch": 0.3056845055618814, "grad_norm": 0.5578849947448088, "learning_rate": 3.857420924574209e-05, "loss": 0.6861, "step": 10470 }, { "epoch": 0.3057137017897288, "grad_norm": 0.571330318512167, "learning_rate": 3.8572587185725874e-05, "loss": 0.7344, "step": 10471 }, { "epoch": 0.30574289801757615, "grad_norm": 0.5213726219720987, "learning_rate": 3.857096512570965e-05, "loss": 0.5819, "step": 10472 }, { "epoch": 0.3057720942454235, "grad_norm": 0.5300132731090655, "learning_rate": 3.856934306569343e-05, "loss": 0.6647, "step": 10473 }, { "epoch": 0.30580129047327087, "grad_norm": 0.5303814442553267, "learning_rate": 3.856772100567721e-05, "loss": 0.6014, "step": 10474 }, { "epoch": 0.30583048670111823, "grad_norm": 0.5358265155428179, "learning_rate": 3.856609894566099e-05, "loss": 0.6542, "step": 10475 }, { "epoch": 0.3058596829289656, "grad_norm": 0.5678932508700063, "learning_rate": 3.856447688564477e-05, "loss": 0.7128, "step": 10476 }, { "epoch": 0.30588887915681295, "grad_norm": 0.5354640194845841, "learning_rate": 3.856285482562855e-05, "loss": 0.6357, "step": 10477 }, { "epoch": 0.3059180753846603, "grad_norm": 0.5703338845419177, "learning_rate": 3.856123276561233e-05, "loss": 0.6366, "step": 10478 }, { "epoch": 0.3059472716125077, "grad_norm": 0.545907252048415, "learning_rate": 3.8559610705596114e-05, "loss": 0.6436, "step": 10479 }, { "epoch": 0.30597646784035504, "grad_norm": 0.48429527103684455, "learning_rate": 3.855798864557989e-05, "loss": 0.543, "step": 10480 }, { "epoch": 0.3060056640682024, "grad_norm": 0.5014024376492876, "learning_rate": 3.855636658556367e-05, "loss": 0.5731, "step": 10481 }, { "epoch": 0.30603486029604976, "grad_norm": 0.5580261070131074, "learning_rate": 3.8554744525547446e-05, "loss": 0.6766, "step": 10482 }, { "epoch": 0.3060640565238971, "grad_norm": 0.533727514090136, "learning_rate": 3.855312246553123e-05, "loss": 0.6114, "step": 10483 }, { "epoch": 0.3060932527517445, "grad_norm": 0.527999957851556, "learning_rate": 3.855150040551501e-05, "loss": 0.6149, "step": 10484 }, { "epoch": 0.30612244897959184, "grad_norm": 0.5717659037710505, "learning_rate": 3.8549878345498784e-05, "loss": 0.7174, "step": 10485 }, { "epoch": 0.3061516452074392, "grad_norm": 0.5182959271354732, "learning_rate": 3.8548256285482566e-05, "loss": 0.5891, "step": 10486 }, { "epoch": 0.30618084143528657, "grad_norm": 0.5238615474235012, "learning_rate": 3.854663422546634e-05, "loss": 0.5855, "step": 10487 }, { "epoch": 0.3062100376631339, "grad_norm": 0.517882766288574, "learning_rate": 3.854501216545012e-05, "loss": 0.5772, "step": 10488 }, { "epoch": 0.3062392338909813, "grad_norm": 0.5570503695322458, "learning_rate": 3.8543390105433905e-05, "loss": 0.6009, "step": 10489 }, { "epoch": 0.30626843011882865, "grad_norm": 0.5242019265205964, "learning_rate": 3.854176804541768e-05, "loss": 0.6199, "step": 10490 }, { "epoch": 0.306297626346676, "grad_norm": 0.5704219506199316, "learning_rate": 3.854014598540146e-05, "loss": 0.657, "step": 10491 }, { "epoch": 0.30632682257452337, "grad_norm": 0.5688746982603095, "learning_rate": 3.8538523925385236e-05, "loss": 0.6961, "step": 10492 }, { "epoch": 0.30635601880237073, "grad_norm": 0.5641328981634263, "learning_rate": 3.853690186536902e-05, "loss": 0.6857, "step": 10493 }, { "epoch": 0.3063852150302181, "grad_norm": 0.5143430352051411, "learning_rate": 3.85352798053528e-05, "loss": 0.6021, "step": 10494 }, { "epoch": 0.30641441125806546, "grad_norm": 0.5326703816385826, "learning_rate": 3.8533657745336575e-05, "loss": 0.6439, "step": 10495 }, { "epoch": 0.3064436074859128, "grad_norm": 0.5545448201064821, "learning_rate": 3.8532035685320363e-05, "loss": 0.6861, "step": 10496 }, { "epoch": 0.3064728037137602, "grad_norm": 0.5839173547958988, "learning_rate": 3.853041362530414e-05, "loss": 0.6542, "step": 10497 }, { "epoch": 0.30650199994160754, "grad_norm": 0.5684010072704477, "learning_rate": 3.852879156528792e-05, "loss": 0.6853, "step": 10498 }, { "epoch": 0.3065311961694549, "grad_norm": 0.5422807930525382, "learning_rate": 3.85271695052717e-05, "loss": 0.638, "step": 10499 }, { "epoch": 0.30656039239730226, "grad_norm": 0.5412075616196914, "learning_rate": 3.852554744525548e-05, "loss": 0.6532, "step": 10500 }, { "epoch": 0.3065895886251496, "grad_norm": 0.5490447249372602, "learning_rate": 3.852392538523926e-05, "loss": 0.6181, "step": 10501 }, { "epoch": 0.306618784852997, "grad_norm": 0.510569461652452, "learning_rate": 3.8522303325223034e-05, "loss": 0.5882, "step": 10502 }, { "epoch": 0.30664798108084435, "grad_norm": 0.5415731512405257, "learning_rate": 3.8520681265206815e-05, "loss": 0.6252, "step": 10503 }, { "epoch": 0.3066771773086917, "grad_norm": 0.49613676826297926, "learning_rate": 3.85190592051906e-05, "loss": 0.5635, "step": 10504 }, { "epoch": 0.30670637353653907, "grad_norm": 0.5298158229527697, "learning_rate": 3.851743714517437e-05, "loss": 0.5773, "step": 10505 }, { "epoch": 0.30673556976438643, "grad_norm": 0.5234090698939948, "learning_rate": 3.8515815085158154e-05, "loss": 0.6029, "step": 10506 }, { "epoch": 0.3067647659922338, "grad_norm": 0.5189827179017354, "learning_rate": 3.851419302514193e-05, "loss": 0.5744, "step": 10507 }, { "epoch": 0.30679396222008115, "grad_norm": 0.5589834162054484, "learning_rate": 3.851257096512571e-05, "loss": 0.6791, "step": 10508 }, { "epoch": 0.3068231584479285, "grad_norm": 0.5343632851214246, "learning_rate": 3.851094890510949e-05, "loss": 0.6219, "step": 10509 }, { "epoch": 0.3068523546757759, "grad_norm": 0.5831273006724583, "learning_rate": 3.850932684509327e-05, "loss": 0.6824, "step": 10510 }, { "epoch": 0.30688155090362323, "grad_norm": 0.6186446532313892, "learning_rate": 3.850770478507705e-05, "loss": 0.6428, "step": 10511 }, { "epoch": 0.3069107471314706, "grad_norm": 0.5645841458433098, "learning_rate": 3.8506082725060824e-05, "loss": 0.6673, "step": 10512 }, { "epoch": 0.30693994335931796, "grad_norm": 0.5866712113925242, "learning_rate": 3.8504460665044606e-05, "loss": 0.6576, "step": 10513 }, { "epoch": 0.3069691395871653, "grad_norm": 0.5728497065764675, "learning_rate": 3.850283860502839e-05, "loss": 0.7119, "step": 10514 }, { "epoch": 0.3069983358150127, "grad_norm": 0.5683286204003815, "learning_rate": 3.850121654501217e-05, "loss": 0.6585, "step": 10515 }, { "epoch": 0.30702753204286004, "grad_norm": 0.5246612048341135, "learning_rate": 3.849959448499595e-05, "loss": 0.5884, "step": 10516 }, { "epoch": 0.3070567282707074, "grad_norm": 0.5969653358589218, "learning_rate": 3.8497972424979726e-05, "loss": 0.7353, "step": 10517 }, { "epoch": 0.30708592449855476, "grad_norm": 0.5181416802363406, "learning_rate": 3.849635036496351e-05, "loss": 0.5624, "step": 10518 }, { "epoch": 0.3071151207264021, "grad_norm": 0.532591437717907, "learning_rate": 3.849472830494728e-05, "loss": 0.6069, "step": 10519 }, { "epoch": 0.3071443169542495, "grad_norm": 0.502981924844455, "learning_rate": 3.8493106244931065e-05, "loss": 0.5439, "step": 10520 }, { "epoch": 0.30717351318209685, "grad_norm": 0.579527660787886, "learning_rate": 3.8491484184914846e-05, "loss": 0.6744, "step": 10521 }, { "epoch": 0.3072027094099442, "grad_norm": 0.5573885635114249, "learning_rate": 3.848986212489862e-05, "loss": 0.6884, "step": 10522 }, { "epoch": 0.3072319056377916, "grad_norm": 0.5413058904904272, "learning_rate": 3.84882400648824e-05, "loss": 0.649, "step": 10523 }, { "epoch": 0.307261101865639, "grad_norm": 0.5665046899432601, "learning_rate": 3.8486618004866185e-05, "loss": 0.6899, "step": 10524 }, { "epoch": 0.30729029809348635, "grad_norm": 0.5635754089383714, "learning_rate": 3.848499594484996e-05, "loss": 0.6926, "step": 10525 }, { "epoch": 0.3073194943213337, "grad_norm": 0.5839169879944007, "learning_rate": 3.848337388483374e-05, "loss": 0.7262, "step": 10526 }, { "epoch": 0.30734869054918107, "grad_norm": 0.5342406115781833, "learning_rate": 3.848175182481752e-05, "loss": 0.6191, "step": 10527 }, { "epoch": 0.30737788677702843, "grad_norm": 0.5849378796956578, "learning_rate": 3.84801297648013e-05, "loss": 0.6903, "step": 10528 }, { "epoch": 0.3074070830048758, "grad_norm": 0.5621903566009817, "learning_rate": 3.847850770478508e-05, "loss": 0.6424, "step": 10529 }, { "epoch": 0.30743627923272315, "grad_norm": 0.5264859427078664, "learning_rate": 3.8476885644768855e-05, "loss": 0.6011, "step": 10530 }, { "epoch": 0.3074654754605705, "grad_norm": 0.5602161462970849, "learning_rate": 3.847526358475264e-05, "loss": 0.6626, "step": 10531 }, { "epoch": 0.3074946716884179, "grad_norm": 0.5550098978955362, "learning_rate": 3.847364152473641e-05, "loss": 0.6159, "step": 10532 }, { "epoch": 0.30752386791626524, "grad_norm": 0.5462679679362306, "learning_rate": 3.84720194647202e-05, "loss": 0.6449, "step": 10533 }, { "epoch": 0.3075530641441126, "grad_norm": 0.5340580305707459, "learning_rate": 3.8470397404703975e-05, "loss": 0.6487, "step": 10534 }, { "epoch": 0.30758226037195996, "grad_norm": 0.5687559498685589, "learning_rate": 3.846877534468776e-05, "loss": 0.702, "step": 10535 }, { "epoch": 0.3076114565998073, "grad_norm": 0.5533961418129898, "learning_rate": 3.846715328467154e-05, "loss": 0.6692, "step": 10536 }, { "epoch": 0.3076406528276547, "grad_norm": 0.5083852007486328, "learning_rate": 3.8465531224655314e-05, "loss": 0.6141, "step": 10537 }, { "epoch": 0.30766984905550204, "grad_norm": 0.5228779401202998, "learning_rate": 3.8463909164639096e-05, "loss": 0.6359, "step": 10538 }, { "epoch": 0.3076990452833494, "grad_norm": 0.5524849195677541, "learning_rate": 3.846228710462287e-05, "loss": 0.6748, "step": 10539 }, { "epoch": 0.30772824151119677, "grad_norm": 0.5633363018220446, "learning_rate": 3.846066504460665e-05, "loss": 0.6539, "step": 10540 }, { "epoch": 0.3077574377390441, "grad_norm": 0.5809227102262224, "learning_rate": 3.8459042984590434e-05, "loss": 0.6595, "step": 10541 }, { "epoch": 0.3077866339668915, "grad_norm": 0.5454478192543102, "learning_rate": 3.845742092457421e-05, "loss": 0.6518, "step": 10542 }, { "epoch": 0.30781583019473885, "grad_norm": 0.548474838137788, "learning_rate": 3.845579886455799e-05, "loss": 0.6247, "step": 10543 }, { "epoch": 0.3078450264225862, "grad_norm": 0.5701264252515673, "learning_rate": 3.845417680454177e-05, "loss": 0.6821, "step": 10544 }, { "epoch": 0.30787422265043357, "grad_norm": 0.5789427077196309, "learning_rate": 3.845255474452555e-05, "loss": 0.6759, "step": 10545 }, { "epoch": 0.30790341887828093, "grad_norm": 0.5542145315043596, "learning_rate": 3.845093268450933e-05, "loss": 0.6325, "step": 10546 }, { "epoch": 0.3079326151061283, "grad_norm": 0.5250040595236626, "learning_rate": 3.8449310624493105e-05, "loss": 0.6105, "step": 10547 }, { "epoch": 0.30796181133397565, "grad_norm": 0.5626380898286193, "learning_rate": 3.8447688564476886e-05, "loss": 0.6723, "step": 10548 }, { "epoch": 0.307991007561823, "grad_norm": 0.5386960210645713, "learning_rate": 3.844606650446067e-05, "loss": 0.6304, "step": 10549 }, { "epoch": 0.3080202037896704, "grad_norm": 0.5399725368467748, "learning_rate": 3.844444444444444e-05, "loss": 0.602, "step": 10550 }, { "epoch": 0.30804940001751774, "grad_norm": 0.5384862893274205, "learning_rate": 3.8442822384428225e-05, "loss": 0.6467, "step": 10551 }, { "epoch": 0.3080785962453651, "grad_norm": 0.595186520275806, "learning_rate": 3.8441200324412007e-05, "loss": 0.7521, "step": 10552 }, { "epoch": 0.30810779247321246, "grad_norm": 0.4646881534022, "learning_rate": 3.843957826439579e-05, "loss": 0.5262, "step": 10553 }, { "epoch": 0.3081369887010598, "grad_norm": 0.5410332721200339, "learning_rate": 3.843795620437956e-05, "loss": 0.6588, "step": 10554 }, { "epoch": 0.3081661849289072, "grad_norm": 0.5458034949643339, "learning_rate": 3.8436334144363345e-05, "loss": 0.6555, "step": 10555 }, { "epoch": 0.30819538115675454, "grad_norm": 0.5260213512759087, "learning_rate": 3.843471208434713e-05, "loss": 0.6058, "step": 10556 }, { "epoch": 0.3082245773846019, "grad_norm": 0.5462489428950855, "learning_rate": 3.84330900243309e-05, "loss": 0.6052, "step": 10557 }, { "epoch": 0.30825377361244927, "grad_norm": 0.5959621756175516, "learning_rate": 3.8431467964314684e-05, "loss": 0.752, "step": 10558 }, { "epoch": 0.30828296984029663, "grad_norm": 0.5304593632952215, "learning_rate": 3.842984590429846e-05, "loss": 0.584, "step": 10559 }, { "epoch": 0.308312166068144, "grad_norm": 0.5633482814410014, "learning_rate": 3.842822384428224e-05, "loss": 0.6527, "step": 10560 }, { "epoch": 0.30834136229599135, "grad_norm": 0.5667905244906543, "learning_rate": 3.842660178426602e-05, "loss": 0.688, "step": 10561 }, { "epoch": 0.3083705585238387, "grad_norm": 0.5380145568401307, "learning_rate": 3.84249797242498e-05, "loss": 0.6137, "step": 10562 }, { "epoch": 0.3083997547516861, "grad_norm": 0.6020095023938908, "learning_rate": 3.842335766423358e-05, "loss": 0.7318, "step": 10563 }, { "epoch": 0.30842895097953343, "grad_norm": 0.5432824382948677, "learning_rate": 3.8421735604217354e-05, "loss": 0.6612, "step": 10564 }, { "epoch": 0.3084581472073808, "grad_norm": 0.5954699696752699, "learning_rate": 3.8420113544201136e-05, "loss": 0.7519, "step": 10565 }, { "epoch": 0.30848734343522816, "grad_norm": 0.5432151470369037, "learning_rate": 3.841849148418492e-05, "loss": 0.6987, "step": 10566 }, { "epoch": 0.3085165396630755, "grad_norm": 0.5960980987650093, "learning_rate": 3.841686942416869e-05, "loss": 0.7202, "step": 10567 }, { "epoch": 0.3085457358909229, "grad_norm": 0.5220890577643096, "learning_rate": 3.8415247364152474e-05, "loss": 0.5893, "step": 10568 }, { "epoch": 0.30857493211877024, "grad_norm": 0.6247820114008437, "learning_rate": 3.8413625304136256e-05, "loss": 0.7052, "step": 10569 }, { "epoch": 0.3086041283466176, "grad_norm": 0.7886701125182516, "learning_rate": 3.841200324412003e-05, "loss": 0.6515, "step": 10570 }, { "epoch": 0.30863332457446496, "grad_norm": 0.5216680594382507, "learning_rate": 3.841038118410382e-05, "loss": 0.6223, "step": 10571 }, { "epoch": 0.3086625208023123, "grad_norm": 0.5708174552766958, "learning_rate": 3.8408759124087594e-05, "loss": 0.7291, "step": 10572 }, { "epoch": 0.3086917170301597, "grad_norm": 0.598891356176891, "learning_rate": 3.8407137064071376e-05, "loss": 0.6922, "step": 10573 }, { "epoch": 0.30872091325800705, "grad_norm": 0.4945392625651726, "learning_rate": 3.840551500405515e-05, "loss": 0.5184, "step": 10574 }, { "epoch": 0.3087501094858544, "grad_norm": 0.5199392189964311, "learning_rate": 3.840389294403893e-05, "loss": 0.6133, "step": 10575 }, { "epoch": 0.30877930571370177, "grad_norm": 0.49675649523931364, "learning_rate": 3.8402270884022715e-05, "loss": 0.5352, "step": 10576 }, { "epoch": 0.30880850194154913, "grad_norm": 0.5360989255776307, "learning_rate": 3.840064882400649e-05, "loss": 0.5981, "step": 10577 }, { "epoch": 0.3088376981693965, "grad_norm": 0.5718822776854472, "learning_rate": 3.839902676399027e-05, "loss": 0.6911, "step": 10578 }, { "epoch": 0.30886689439724385, "grad_norm": 0.49283365661686557, "learning_rate": 3.8397404703974046e-05, "loss": 0.5321, "step": 10579 }, { "epoch": 0.3088960906250912, "grad_norm": 0.504060106222362, "learning_rate": 3.839578264395783e-05, "loss": 0.5902, "step": 10580 }, { "epoch": 0.3089252868529386, "grad_norm": 0.515245937425521, "learning_rate": 3.839416058394161e-05, "loss": 0.5555, "step": 10581 }, { "epoch": 0.30895448308078594, "grad_norm": 0.49470102288475615, "learning_rate": 3.8392538523925385e-05, "loss": 0.5471, "step": 10582 }, { "epoch": 0.30898367930863335, "grad_norm": 0.5926532408734201, "learning_rate": 3.839091646390917e-05, "loss": 0.6674, "step": 10583 }, { "epoch": 0.3090128755364807, "grad_norm": 0.5554864257399542, "learning_rate": 3.838929440389294e-05, "loss": 0.636, "step": 10584 }, { "epoch": 0.3090420717643281, "grad_norm": 0.5238498010790907, "learning_rate": 3.8387672343876723e-05, "loss": 0.5612, "step": 10585 }, { "epoch": 0.30907126799217544, "grad_norm": 0.5406501488830003, "learning_rate": 3.8386050283860505e-05, "loss": 0.6005, "step": 10586 }, { "epoch": 0.3091004642200228, "grad_norm": 0.5638087535799401, "learning_rate": 3.838442822384428e-05, "loss": 0.7392, "step": 10587 }, { "epoch": 0.30912966044787016, "grad_norm": 0.5242106251668629, "learning_rate": 3.838280616382806e-05, "loss": 0.5974, "step": 10588 }, { "epoch": 0.3091588566757175, "grad_norm": 0.5636704772683577, "learning_rate": 3.838118410381184e-05, "loss": 0.6618, "step": 10589 }, { "epoch": 0.3091880529035649, "grad_norm": 0.5425650556118428, "learning_rate": 3.8379562043795625e-05, "loss": 0.6399, "step": 10590 }, { "epoch": 0.30921724913141224, "grad_norm": 0.5689050989053015, "learning_rate": 3.837793998377941e-05, "loss": 0.7122, "step": 10591 }, { "epoch": 0.3092464453592596, "grad_norm": 0.5650906946506726, "learning_rate": 3.837631792376318e-05, "loss": 0.6727, "step": 10592 }, { "epoch": 0.30927564158710696, "grad_norm": 0.5460660464935873, "learning_rate": 3.8374695863746964e-05, "loss": 0.6923, "step": 10593 }, { "epoch": 0.3093048378149543, "grad_norm": 0.5439620521935401, "learning_rate": 3.837307380373074e-05, "loss": 0.6215, "step": 10594 }, { "epoch": 0.3093340340428017, "grad_norm": 0.5826993265547991, "learning_rate": 3.837145174371452e-05, "loss": 0.6804, "step": 10595 }, { "epoch": 0.30936323027064905, "grad_norm": 0.5129515140142413, "learning_rate": 3.83698296836983e-05, "loss": 0.564, "step": 10596 }, { "epoch": 0.3093924264984964, "grad_norm": 0.5621608745556854, "learning_rate": 3.836820762368208e-05, "loss": 0.7335, "step": 10597 }, { "epoch": 0.30942162272634377, "grad_norm": 0.565051932133711, "learning_rate": 3.836658556366586e-05, "loss": 0.6209, "step": 10598 }, { "epoch": 0.30945081895419113, "grad_norm": 0.528684240607101, "learning_rate": 3.8364963503649634e-05, "loss": 0.5966, "step": 10599 }, { "epoch": 0.3094800151820385, "grad_norm": 0.560188194873709, "learning_rate": 3.8363341443633416e-05, "loss": 0.6458, "step": 10600 }, { "epoch": 0.30950921140988585, "grad_norm": 0.5799081757922012, "learning_rate": 3.83617193836172e-05, "loss": 0.759, "step": 10601 }, { "epoch": 0.3095384076377332, "grad_norm": 0.5649525432439716, "learning_rate": 3.836009732360097e-05, "loss": 0.6453, "step": 10602 }, { "epoch": 0.3095676038655806, "grad_norm": 0.5587736099998588, "learning_rate": 3.8358475263584754e-05, "loss": 0.6567, "step": 10603 }, { "epoch": 0.30959680009342794, "grad_norm": 0.5702559865221623, "learning_rate": 3.835685320356853e-05, "loss": 0.6063, "step": 10604 }, { "epoch": 0.3096259963212753, "grad_norm": 0.5558388652479401, "learning_rate": 3.835523114355231e-05, "loss": 0.6705, "step": 10605 }, { "epoch": 0.30965519254912266, "grad_norm": 0.5549216791074739, "learning_rate": 3.835360908353609e-05, "loss": 0.6463, "step": 10606 }, { "epoch": 0.30968438877697, "grad_norm": 0.5765892665915473, "learning_rate": 3.835198702351987e-05, "loss": 0.6504, "step": 10607 }, { "epoch": 0.3097135850048174, "grad_norm": 0.5090428279360588, "learning_rate": 3.835036496350365e-05, "loss": 0.5832, "step": 10608 }, { "epoch": 0.30974278123266474, "grad_norm": 0.5674444352554233, "learning_rate": 3.834874290348743e-05, "loss": 0.6541, "step": 10609 }, { "epoch": 0.3097719774605121, "grad_norm": 1.357297078324892, "learning_rate": 3.834712084347121e-05, "loss": 0.7338, "step": 10610 }, { "epoch": 0.30980117368835947, "grad_norm": 0.5540918614984488, "learning_rate": 3.8345498783454995e-05, "loss": 0.6868, "step": 10611 }, { "epoch": 0.3098303699162068, "grad_norm": 0.5354235293963657, "learning_rate": 3.834387672343877e-05, "loss": 0.5896, "step": 10612 }, { "epoch": 0.3098595661440542, "grad_norm": 0.5698229757632443, "learning_rate": 3.834225466342255e-05, "loss": 0.63, "step": 10613 }, { "epoch": 0.30988876237190155, "grad_norm": 0.5559732741873156, "learning_rate": 3.834063260340633e-05, "loss": 0.7047, "step": 10614 }, { "epoch": 0.3099179585997489, "grad_norm": 0.5399381980045687, "learning_rate": 3.833901054339011e-05, "loss": 0.6703, "step": 10615 }, { "epoch": 0.30994715482759627, "grad_norm": 0.564105561999191, "learning_rate": 3.833738848337389e-05, "loss": 0.6743, "step": 10616 }, { "epoch": 0.30997635105544363, "grad_norm": 0.5744782284742276, "learning_rate": 3.8335766423357665e-05, "loss": 0.6545, "step": 10617 }, { "epoch": 0.310005547283291, "grad_norm": 0.5859501002143158, "learning_rate": 3.833414436334145e-05, "loss": 0.7194, "step": 10618 }, { "epoch": 0.31003474351113836, "grad_norm": 0.5422274364191471, "learning_rate": 3.833252230332522e-05, "loss": 0.6321, "step": 10619 }, { "epoch": 0.3100639397389857, "grad_norm": 0.5142423726849147, "learning_rate": 3.8330900243309004e-05, "loss": 0.5818, "step": 10620 }, { "epoch": 0.3100931359668331, "grad_norm": 0.5830584859101681, "learning_rate": 3.8329278183292786e-05, "loss": 0.7409, "step": 10621 }, { "epoch": 0.31012233219468044, "grad_norm": 0.5135568530963679, "learning_rate": 3.832765612327656e-05, "loss": 0.5936, "step": 10622 }, { "epoch": 0.3101515284225278, "grad_norm": 0.6124144587267724, "learning_rate": 3.832603406326034e-05, "loss": 0.7756, "step": 10623 }, { "epoch": 0.31018072465037516, "grad_norm": 0.5289430707526129, "learning_rate": 3.832441200324412e-05, "loss": 0.6045, "step": 10624 }, { "epoch": 0.3102099208782225, "grad_norm": 0.5294598983549432, "learning_rate": 3.83227899432279e-05, "loss": 0.6143, "step": 10625 }, { "epoch": 0.3102391171060699, "grad_norm": 0.5340486604353364, "learning_rate": 3.832116788321168e-05, "loss": 0.6404, "step": 10626 }, { "epoch": 0.31026831333391724, "grad_norm": 0.5673108196861447, "learning_rate": 3.8319545823195456e-05, "loss": 0.7125, "step": 10627 }, { "epoch": 0.3102975095617646, "grad_norm": 0.5395767043178386, "learning_rate": 3.8317923763179244e-05, "loss": 0.624, "step": 10628 }, { "epoch": 0.31032670578961197, "grad_norm": 0.5410111097228048, "learning_rate": 3.831630170316302e-05, "loss": 0.5981, "step": 10629 }, { "epoch": 0.31035590201745933, "grad_norm": 0.5518017366653681, "learning_rate": 3.83146796431468e-05, "loss": 0.6248, "step": 10630 }, { "epoch": 0.3103850982453067, "grad_norm": 0.5747263764637626, "learning_rate": 3.8313057583130576e-05, "loss": 0.6971, "step": 10631 }, { "epoch": 0.31041429447315405, "grad_norm": 0.5319998570825943, "learning_rate": 3.831143552311436e-05, "loss": 0.6288, "step": 10632 }, { "epoch": 0.3104434907010014, "grad_norm": 0.5696049807026178, "learning_rate": 3.830981346309814e-05, "loss": 0.6564, "step": 10633 }, { "epoch": 0.3104726869288488, "grad_norm": 0.5434998664082554, "learning_rate": 3.8308191403081915e-05, "loss": 0.6306, "step": 10634 }, { "epoch": 0.31050188315669613, "grad_norm": 0.5028567813912258, "learning_rate": 3.8306569343065696e-05, "loss": 0.57, "step": 10635 }, { "epoch": 0.3105310793845435, "grad_norm": 0.5219357409319467, "learning_rate": 3.830494728304948e-05, "loss": 0.62, "step": 10636 }, { "epoch": 0.31056027561239086, "grad_norm": 0.5230389026852544, "learning_rate": 3.830332522303325e-05, "loss": 0.6114, "step": 10637 }, { "epoch": 0.3105894718402382, "grad_norm": 0.5643101020920676, "learning_rate": 3.8301703163017035e-05, "loss": 0.6517, "step": 10638 }, { "epoch": 0.3106186680680856, "grad_norm": 0.5361797118074559, "learning_rate": 3.830008110300081e-05, "loss": 0.6379, "step": 10639 }, { "epoch": 0.31064786429593294, "grad_norm": 0.5833820056559571, "learning_rate": 3.829845904298459e-05, "loss": 0.6714, "step": 10640 }, { "epoch": 0.3106770605237803, "grad_norm": 0.5676517338055521, "learning_rate": 3.829683698296837e-05, "loss": 0.6565, "step": 10641 }, { "epoch": 0.31070625675162766, "grad_norm": 0.5739824570877099, "learning_rate": 3.829521492295215e-05, "loss": 0.7355, "step": 10642 }, { "epoch": 0.310735452979475, "grad_norm": 0.5171241398108077, "learning_rate": 3.829359286293593e-05, "loss": 0.6058, "step": 10643 }, { "epoch": 0.31076464920732244, "grad_norm": 0.5114196331869422, "learning_rate": 3.8291970802919705e-05, "loss": 0.5646, "step": 10644 }, { "epoch": 0.3107938454351698, "grad_norm": 0.5497145728634121, "learning_rate": 3.829034874290349e-05, "loss": 0.6375, "step": 10645 }, { "epoch": 0.31082304166301716, "grad_norm": 0.515569979786461, "learning_rate": 3.828872668288727e-05, "loss": 0.5943, "step": 10646 }, { "epoch": 0.3108522378908645, "grad_norm": 0.5500524511963502, "learning_rate": 3.828710462287105e-05, "loss": 0.6455, "step": 10647 }, { "epoch": 0.3108814341187119, "grad_norm": 0.5464004260352303, "learning_rate": 3.828548256285483e-05, "loss": 0.6112, "step": 10648 }, { "epoch": 0.31091063034655925, "grad_norm": 0.5415154162035195, "learning_rate": 3.828386050283861e-05, "loss": 0.5815, "step": 10649 }, { "epoch": 0.3109398265744066, "grad_norm": 0.5878904717266591, "learning_rate": 3.828223844282239e-05, "loss": 0.6877, "step": 10650 }, { "epoch": 0.31096902280225397, "grad_norm": 0.5842609090218757, "learning_rate": 3.8280616382806164e-05, "loss": 0.7527, "step": 10651 }, { "epoch": 0.31099821903010133, "grad_norm": 0.5353466994698839, "learning_rate": 3.8278994322789946e-05, "loss": 0.5861, "step": 10652 }, { "epoch": 0.3110274152579487, "grad_norm": 0.5942334180744603, "learning_rate": 3.827737226277373e-05, "loss": 0.6837, "step": 10653 }, { "epoch": 0.31105661148579605, "grad_norm": 0.4985456018174844, "learning_rate": 3.82757502027575e-05, "loss": 0.5647, "step": 10654 }, { "epoch": 0.3110858077136434, "grad_norm": 0.5767089398931796, "learning_rate": 3.8274128142741284e-05, "loss": 0.7138, "step": 10655 }, { "epoch": 0.3111150039414908, "grad_norm": 0.5245597078131292, "learning_rate": 3.8272506082725066e-05, "loss": 0.5584, "step": 10656 }, { "epoch": 0.31114420016933814, "grad_norm": 0.5549685790359102, "learning_rate": 3.827088402270884e-05, "loss": 0.6543, "step": 10657 }, { "epoch": 0.3111733963971855, "grad_norm": 0.5903546324411573, "learning_rate": 3.826926196269262e-05, "loss": 0.6584, "step": 10658 }, { "epoch": 0.31120259262503286, "grad_norm": 0.563595836646381, "learning_rate": 3.82676399026764e-05, "loss": 0.6306, "step": 10659 }, { "epoch": 0.3112317888528802, "grad_norm": 0.562407278706257, "learning_rate": 3.826601784266018e-05, "loss": 0.6199, "step": 10660 }, { "epoch": 0.3112609850807276, "grad_norm": 0.5316779897498406, "learning_rate": 3.826439578264396e-05, "loss": 0.634, "step": 10661 }, { "epoch": 0.31129018130857494, "grad_norm": 0.5354257745988739, "learning_rate": 3.8262773722627736e-05, "loss": 0.679, "step": 10662 }, { "epoch": 0.3113193775364223, "grad_norm": 0.5465348098848912, "learning_rate": 3.826115166261152e-05, "loss": 0.6673, "step": 10663 }, { "epoch": 0.31134857376426966, "grad_norm": 0.5299100348579701, "learning_rate": 3.825952960259529e-05, "loss": 0.5789, "step": 10664 }, { "epoch": 0.311377769992117, "grad_norm": 0.5495717708741307, "learning_rate": 3.8257907542579075e-05, "loss": 0.652, "step": 10665 }, { "epoch": 0.3114069662199644, "grad_norm": 0.6048555378075137, "learning_rate": 3.8256285482562856e-05, "loss": 0.5118, "step": 10666 }, { "epoch": 0.31143616244781175, "grad_norm": 0.5543111353087128, "learning_rate": 3.825466342254664e-05, "loss": 0.6788, "step": 10667 }, { "epoch": 0.3114653586756591, "grad_norm": 0.529554700746624, "learning_rate": 3.825304136253042e-05, "loss": 0.6455, "step": 10668 }, { "epoch": 0.31149455490350647, "grad_norm": 0.5734026112138111, "learning_rate": 3.8251419302514195e-05, "loss": 0.696, "step": 10669 }, { "epoch": 0.31152375113135383, "grad_norm": 0.5166503182090234, "learning_rate": 3.824979724249798e-05, "loss": 0.5828, "step": 10670 }, { "epoch": 0.3115529473592012, "grad_norm": 0.599789831939073, "learning_rate": 3.824817518248175e-05, "loss": 0.7203, "step": 10671 }, { "epoch": 0.31158214358704855, "grad_norm": 0.5337245971407367, "learning_rate": 3.8246553122465533e-05, "loss": 0.6753, "step": 10672 }, { "epoch": 0.3116113398148959, "grad_norm": 0.5202861724517837, "learning_rate": 3.8244931062449315e-05, "loss": 0.6223, "step": 10673 }, { "epoch": 0.3116405360427433, "grad_norm": 0.4956320568989944, "learning_rate": 3.824330900243309e-05, "loss": 0.5657, "step": 10674 }, { "epoch": 0.31166973227059064, "grad_norm": 0.5164791216687535, "learning_rate": 3.824168694241687e-05, "loss": 0.6015, "step": 10675 }, { "epoch": 0.311698928498438, "grad_norm": 0.5327397198786443, "learning_rate": 3.824006488240065e-05, "loss": 0.6216, "step": 10676 }, { "epoch": 0.31172812472628536, "grad_norm": 0.518058545320388, "learning_rate": 3.823844282238443e-05, "loss": 0.5704, "step": 10677 }, { "epoch": 0.3117573209541327, "grad_norm": 0.5733585243600338, "learning_rate": 3.823682076236821e-05, "loss": 0.6423, "step": 10678 }, { "epoch": 0.3117865171819801, "grad_norm": 0.5513572692137069, "learning_rate": 3.8235198702351985e-05, "loss": 0.6347, "step": 10679 }, { "epoch": 0.31181571340982744, "grad_norm": 0.5504585517896813, "learning_rate": 3.823357664233577e-05, "loss": 0.6193, "step": 10680 }, { "epoch": 0.3118449096376748, "grad_norm": 0.6650795958520009, "learning_rate": 3.823195458231955e-05, "loss": 0.6615, "step": 10681 }, { "epoch": 0.31187410586552217, "grad_norm": 0.5216752975184545, "learning_rate": 3.8230332522303324e-05, "loss": 0.5902, "step": 10682 }, { "epoch": 0.3119033020933695, "grad_norm": 0.5905566110547067, "learning_rate": 3.8228710462287106e-05, "loss": 0.727, "step": 10683 }, { "epoch": 0.3119324983212169, "grad_norm": 0.5357178929184215, "learning_rate": 3.822708840227089e-05, "loss": 0.6445, "step": 10684 }, { "epoch": 0.31196169454906425, "grad_norm": 0.5114431134071887, "learning_rate": 3.822546634225467e-05, "loss": 0.6223, "step": 10685 }, { "epoch": 0.3119908907769116, "grad_norm": 0.5471389529481854, "learning_rate": 3.8223844282238444e-05, "loss": 0.6715, "step": 10686 }, { "epoch": 0.312020087004759, "grad_norm": 0.5464684326623168, "learning_rate": 3.8222222222222226e-05, "loss": 0.6657, "step": 10687 }, { "epoch": 0.31204928323260633, "grad_norm": 0.500546501770602, "learning_rate": 3.822060016220601e-05, "loss": 0.5827, "step": 10688 }, { "epoch": 0.3120784794604537, "grad_norm": 0.543256777065897, "learning_rate": 3.821897810218978e-05, "loss": 0.6713, "step": 10689 }, { "epoch": 0.31210767568830106, "grad_norm": 0.5199614815896821, "learning_rate": 3.8217356042173564e-05, "loss": 0.6043, "step": 10690 }, { "epoch": 0.3121368719161484, "grad_norm": 0.61297824000896, "learning_rate": 3.821573398215734e-05, "loss": 0.64, "step": 10691 }, { "epoch": 0.3121660681439958, "grad_norm": 0.5024226391854816, "learning_rate": 3.821411192214112e-05, "loss": 0.5361, "step": 10692 }, { "epoch": 0.31219526437184314, "grad_norm": 0.5805660139700087, "learning_rate": 3.82124898621249e-05, "loss": 0.5936, "step": 10693 }, { "epoch": 0.3122244605996905, "grad_norm": 0.5422373201159104, "learning_rate": 3.821086780210868e-05, "loss": 0.623, "step": 10694 }, { "epoch": 0.31225365682753786, "grad_norm": 0.5211331553264921, "learning_rate": 3.820924574209246e-05, "loss": 0.6098, "step": 10695 }, { "epoch": 0.3122828530553852, "grad_norm": 0.5448778871393994, "learning_rate": 3.8207623682076235e-05, "loss": 0.6431, "step": 10696 }, { "epoch": 0.3123120492832326, "grad_norm": 0.5188689066965664, "learning_rate": 3.8206001622060016e-05, "loss": 0.614, "step": 10697 }, { "epoch": 0.31234124551107995, "grad_norm": 0.5282245483568369, "learning_rate": 3.82043795620438e-05, "loss": 0.6236, "step": 10698 }, { "epoch": 0.3123704417389273, "grad_norm": 0.5635954520357483, "learning_rate": 3.820275750202757e-05, "loss": 0.6847, "step": 10699 }, { "epoch": 0.31239963796677467, "grad_norm": 0.5908912016727582, "learning_rate": 3.8201135442011355e-05, "loss": 0.702, "step": 10700 }, { "epoch": 0.31242883419462203, "grad_norm": 0.5493289423655985, "learning_rate": 3.819951338199514e-05, "loss": 0.6323, "step": 10701 }, { "epoch": 0.3124580304224694, "grad_norm": 0.5384232630009582, "learning_rate": 3.819789132197891e-05, "loss": 0.6233, "step": 10702 }, { "epoch": 0.31248722665031675, "grad_norm": 0.5481881235367265, "learning_rate": 3.81962692619627e-05, "loss": 0.671, "step": 10703 }, { "epoch": 0.31251642287816417, "grad_norm": 0.5495413091950646, "learning_rate": 3.8194647201946475e-05, "loss": 0.6365, "step": 10704 }, { "epoch": 0.31254561910601153, "grad_norm": 0.5902742010961755, "learning_rate": 3.819302514193026e-05, "loss": 0.7273, "step": 10705 }, { "epoch": 0.3125748153338589, "grad_norm": 0.5027936825221433, "learning_rate": 3.819140308191403e-05, "loss": 0.557, "step": 10706 }, { "epoch": 0.31260401156170625, "grad_norm": 0.516382384625396, "learning_rate": 3.8189781021897814e-05, "loss": 0.5938, "step": 10707 }, { "epoch": 0.3126332077895536, "grad_norm": 0.5460828357554806, "learning_rate": 3.8188158961881596e-05, "loss": 0.5764, "step": 10708 }, { "epoch": 0.312662404017401, "grad_norm": 0.5595119954144755, "learning_rate": 3.818653690186537e-05, "loss": 0.7454, "step": 10709 }, { "epoch": 0.31269160024524834, "grad_norm": 0.5653414390938146, "learning_rate": 3.818491484184915e-05, "loss": 0.6842, "step": 10710 }, { "epoch": 0.3127207964730957, "grad_norm": 0.5215359215455325, "learning_rate": 3.818329278183293e-05, "loss": 0.5947, "step": 10711 }, { "epoch": 0.31274999270094306, "grad_norm": 0.5488779184938731, "learning_rate": 3.818167072181671e-05, "loss": 0.6753, "step": 10712 }, { "epoch": 0.3127791889287904, "grad_norm": 0.5402866846425727, "learning_rate": 3.818004866180049e-05, "loss": 0.6308, "step": 10713 }, { "epoch": 0.3128083851566378, "grad_norm": 0.5386239953801705, "learning_rate": 3.8178426601784266e-05, "loss": 0.6712, "step": 10714 }, { "epoch": 0.31283758138448514, "grad_norm": 0.5570220528292629, "learning_rate": 3.817680454176805e-05, "loss": 0.6957, "step": 10715 }, { "epoch": 0.3128667776123325, "grad_norm": 0.572442569790469, "learning_rate": 3.817518248175182e-05, "loss": 0.7161, "step": 10716 }, { "epoch": 0.31289597384017986, "grad_norm": 0.5810844526367727, "learning_rate": 3.8173560421735604e-05, "loss": 0.6411, "step": 10717 }, { "epoch": 0.3129251700680272, "grad_norm": 0.578911199394594, "learning_rate": 3.8171938361719386e-05, "loss": 0.7001, "step": 10718 }, { "epoch": 0.3129543662958746, "grad_norm": 0.5404564676260041, "learning_rate": 3.817031630170316e-05, "loss": 0.6228, "step": 10719 }, { "epoch": 0.31298356252372195, "grad_norm": 0.5440042211821374, "learning_rate": 3.816869424168694e-05, "loss": 0.6532, "step": 10720 }, { "epoch": 0.3130127587515693, "grad_norm": 0.5387089314258267, "learning_rate": 3.816707218167072e-05, "loss": 0.6627, "step": 10721 }, { "epoch": 0.31304195497941667, "grad_norm": 0.5274739064155979, "learning_rate": 3.8165450121654506e-05, "loss": 0.5984, "step": 10722 }, { "epoch": 0.31307115120726403, "grad_norm": 0.5976149417419753, "learning_rate": 3.816382806163829e-05, "loss": 0.6242, "step": 10723 }, { "epoch": 0.3131003474351114, "grad_norm": 0.5509693858354842, "learning_rate": 3.816220600162206e-05, "loss": 0.666, "step": 10724 }, { "epoch": 0.31312954366295875, "grad_norm": 0.5400879631723434, "learning_rate": 3.8160583941605845e-05, "loss": 0.618, "step": 10725 }, { "epoch": 0.3131587398908061, "grad_norm": 0.5334817329690215, "learning_rate": 3.815896188158962e-05, "loss": 0.5964, "step": 10726 }, { "epoch": 0.3131879361186535, "grad_norm": 0.5344977449344983, "learning_rate": 3.81573398215734e-05, "loss": 0.626, "step": 10727 }, { "epoch": 0.31321713234650084, "grad_norm": 0.5424852765063211, "learning_rate": 3.815571776155718e-05, "loss": 0.6926, "step": 10728 }, { "epoch": 0.3132463285743482, "grad_norm": 0.5497592066442812, "learning_rate": 3.815409570154096e-05, "loss": 0.6204, "step": 10729 }, { "epoch": 0.31327552480219556, "grad_norm": 0.506371393389278, "learning_rate": 3.815247364152474e-05, "loss": 0.5171, "step": 10730 }, { "epoch": 0.3133047210300429, "grad_norm": 0.5361804361781277, "learning_rate": 3.8150851581508515e-05, "loss": 0.5976, "step": 10731 }, { "epoch": 0.3133339172578903, "grad_norm": 0.5512440574646428, "learning_rate": 3.81492295214923e-05, "loss": 0.6292, "step": 10732 }, { "epoch": 0.31336311348573764, "grad_norm": 0.6498566323167325, "learning_rate": 3.814760746147608e-05, "loss": 0.7853, "step": 10733 }, { "epoch": 0.313392309713585, "grad_norm": 0.5246875061503039, "learning_rate": 3.8145985401459854e-05, "loss": 0.6423, "step": 10734 }, { "epoch": 0.31342150594143237, "grad_norm": 0.5435406530933152, "learning_rate": 3.8144363341443635e-05, "loss": 0.632, "step": 10735 }, { "epoch": 0.3134507021692797, "grad_norm": 0.5273791027746992, "learning_rate": 3.814274128142741e-05, "loss": 0.5965, "step": 10736 }, { "epoch": 0.3134798983971271, "grad_norm": 0.552506630373778, "learning_rate": 3.814111922141119e-05, "loss": 0.6379, "step": 10737 }, { "epoch": 0.31350909462497445, "grad_norm": 0.562502299694161, "learning_rate": 3.8139497161394974e-05, "loss": 0.6523, "step": 10738 }, { "epoch": 0.3135382908528218, "grad_norm": 0.5126993356886413, "learning_rate": 3.813787510137875e-05, "loss": 0.6001, "step": 10739 }, { "epoch": 0.31356748708066917, "grad_norm": 0.5118008914789205, "learning_rate": 3.813625304136253e-05, "loss": 0.593, "step": 10740 }, { "epoch": 0.31359668330851653, "grad_norm": 0.5012695671741586, "learning_rate": 3.813463098134631e-05, "loss": 0.5638, "step": 10741 }, { "epoch": 0.3136258795363639, "grad_norm": 0.5393573515407826, "learning_rate": 3.8133008921330094e-05, "loss": 0.6702, "step": 10742 }, { "epoch": 0.31365507576421126, "grad_norm": 0.556598412660155, "learning_rate": 3.8131386861313876e-05, "loss": 0.6691, "step": 10743 }, { "epoch": 0.3136842719920586, "grad_norm": 0.5582004877597366, "learning_rate": 3.812976480129765e-05, "loss": 0.7028, "step": 10744 }, { "epoch": 0.313713468219906, "grad_norm": 0.5355256096510946, "learning_rate": 3.812814274128143e-05, "loss": 0.6321, "step": 10745 }, { "epoch": 0.31374266444775334, "grad_norm": 0.5589906664826338, "learning_rate": 3.812652068126521e-05, "loss": 0.6715, "step": 10746 }, { "epoch": 0.3137718606756007, "grad_norm": 0.48763383402347693, "learning_rate": 3.812489862124899e-05, "loss": 0.5398, "step": 10747 }, { "epoch": 0.31380105690344806, "grad_norm": 0.5875576246534459, "learning_rate": 3.812327656123277e-05, "loss": 0.6705, "step": 10748 }, { "epoch": 0.3138302531312954, "grad_norm": 0.6008400628184128, "learning_rate": 3.8121654501216546e-05, "loss": 0.7229, "step": 10749 }, { "epoch": 0.3138594493591428, "grad_norm": 0.5420756072279902, "learning_rate": 3.812003244120033e-05, "loss": 0.6386, "step": 10750 }, { "epoch": 0.31388864558699014, "grad_norm": 0.5586786036701332, "learning_rate": 3.81184103811841e-05, "loss": 0.6499, "step": 10751 }, { "epoch": 0.3139178418148375, "grad_norm": 0.5345810686997907, "learning_rate": 3.8116788321167885e-05, "loss": 0.6141, "step": 10752 }, { "epoch": 0.31394703804268487, "grad_norm": 0.5712762923411746, "learning_rate": 3.8115166261151666e-05, "loss": 0.6821, "step": 10753 }, { "epoch": 0.31397623427053223, "grad_norm": 0.5815671140399166, "learning_rate": 3.811354420113544e-05, "loss": 0.6398, "step": 10754 }, { "epoch": 0.3140054304983796, "grad_norm": 0.5583815672343827, "learning_rate": 3.811192214111922e-05, "loss": 0.7065, "step": 10755 }, { "epoch": 0.31403462672622695, "grad_norm": 0.5234311886155857, "learning_rate": 3.8110300081103e-05, "loss": 0.6322, "step": 10756 }, { "epoch": 0.3140638229540743, "grad_norm": 0.4867526513606698, "learning_rate": 3.810867802108678e-05, "loss": 0.5556, "step": 10757 }, { "epoch": 0.3140930191819217, "grad_norm": 0.5109525085121143, "learning_rate": 3.810705596107056e-05, "loss": 0.5624, "step": 10758 }, { "epoch": 0.31412221540976903, "grad_norm": 0.6097286955532315, "learning_rate": 3.810543390105434e-05, "loss": 0.6521, "step": 10759 }, { "epoch": 0.3141514116376164, "grad_norm": 0.5485887347187822, "learning_rate": 3.8103811841038125e-05, "loss": 0.6299, "step": 10760 }, { "epoch": 0.31418060786546376, "grad_norm": 0.5399353276875571, "learning_rate": 3.81021897810219e-05, "loss": 0.6425, "step": 10761 }, { "epoch": 0.3142098040933111, "grad_norm": 0.5578855852768051, "learning_rate": 3.810056772100568e-05, "loss": 0.6563, "step": 10762 }, { "epoch": 0.3142390003211585, "grad_norm": 0.46585824416844646, "learning_rate": 3.809894566098946e-05, "loss": 0.5147, "step": 10763 }, { "epoch": 0.3142681965490059, "grad_norm": 0.5086832150690354, "learning_rate": 3.809732360097324e-05, "loss": 0.5645, "step": 10764 }, { "epoch": 0.31429739277685326, "grad_norm": 0.562042080104149, "learning_rate": 3.809570154095702e-05, "loss": 0.7243, "step": 10765 }, { "epoch": 0.3143265890047006, "grad_norm": 0.5664852106826757, "learning_rate": 3.8094079480940795e-05, "loss": 0.6043, "step": 10766 }, { "epoch": 0.314355785232548, "grad_norm": 0.5126262974012397, "learning_rate": 3.809245742092458e-05, "loss": 0.5738, "step": 10767 }, { "epoch": 0.31438498146039534, "grad_norm": 0.5872936206187351, "learning_rate": 3.809083536090836e-05, "loss": 0.7426, "step": 10768 }, { "epoch": 0.3144141776882427, "grad_norm": 0.5746550980619457, "learning_rate": 3.8089213300892134e-05, "loss": 0.6669, "step": 10769 }, { "epoch": 0.31444337391609006, "grad_norm": 0.5603702604385683, "learning_rate": 3.8087591240875916e-05, "loss": 0.6696, "step": 10770 }, { "epoch": 0.3144725701439374, "grad_norm": 0.5813188966076711, "learning_rate": 3.808596918085969e-05, "loss": 0.5846, "step": 10771 }, { "epoch": 0.3145017663717848, "grad_norm": 0.5373548354724911, "learning_rate": 3.808434712084347e-05, "loss": 0.6182, "step": 10772 }, { "epoch": 0.31453096259963215, "grad_norm": 0.5128798847351028, "learning_rate": 3.8082725060827254e-05, "loss": 0.5688, "step": 10773 }, { "epoch": 0.3145601588274795, "grad_norm": 0.5186992436454402, "learning_rate": 3.808110300081103e-05, "loss": 0.5891, "step": 10774 }, { "epoch": 0.31458935505532687, "grad_norm": 0.5248597775002448, "learning_rate": 3.807948094079481e-05, "loss": 0.6182, "step": 10775 }, { "epoch": 0.31461855128317423, "grad_norm": 0.6142177717716795, "learning_rate": 3.8077858880778586e-05, "loss": 0.7004, "step": 10776 }, { "epoch": 0.3146477475110216, "grad_norm": 0.5414146721421, "learning_rate": 3.807623682076237e-05, "loss": 0.6639, "step": 10777 }, { "epoch": 0.31467694373886895, "grad_norm": 0.5334059659303756, "learning_rate": 3.807461476074615e-05, "loss": 0.6618, "step": 10778 }, { "epoch": 0.3147061399667163, "grad_norm": 0.5162330807436916, "learning_rate": 3.807299270072993e-05, "loss": 0.5775, "step": 10779 }, { "epoch": 0.3147353361945637, "grad_norm": 0.5290418803688623, "learning_rate": 3.807137064071371e-05, "loss": 0.623, "step": 10780 }, { "epoch": 0.31476453242241104, "grad_norm": 0.5406338168213314, "learning_rate": 3.806974858069749e-05, "loss": 0.6507, "step": 10781 }, { "epoch": 0.3147937286502584, "grad_norm": 0.5712315302333529, "learning_rate": 3.806812652068127e-05, "loss": 0.6541, "step": 10782 }, { "epoch": 0.31482292487810576, "grad_norm": 0.5314197929946314, "learning_rate": 3.8066504460665045e-05, "loss": 0.6341, "step": 10783 }, { "epoch": 0.3148521211059531, "grad_norm": 0.601927740238462, "learning_rate": 3.8064882400648826e-05, "loss": 0.8087, "step": 10784 }, { "epoch": 0.3148813173338005, "grad_norm": 0.6159448311066289, "learning_rate": 3.806326034063261e-05, "loss": 0.7452, "step": 10785 }, { "epoch": 0.31491051356164784, "grad_norm": 0.5523706635044743, "learning_rate": 3.806163828061638e-05, "loss": 0.6995, "step": 10786 }, { "epoch": 0.3149397097894952, "grad_norm": 0.5636563367981301, "learning_rate": 3.8060016220600165e-05, "loss": 0.6675, "step": 10787 }, { "epoch": 0.31496890601734256, "grad_norm": 0.5638138092740351, "learning_rate": 3.805839416058394e-05, "loss": 0.6966, "step": 10788 }, { "epoch": 0.3149981022451899, "grad_norm": 0.5491853584544978, "learning_rate": 3.805677210056772e-05, "loss": 0.6603, "step": 10789 }, { "epoch": 0.3150272984730373, "grad_norm": 0.5562786715045099, "learning_rate": 3.8055150040551503e-05, "loss": 0.611, "step": 10790 }, { "epoch": 0.31505649470088465, "grad_norm": 0.5379272233430317, "learning_rate": 3.805352798053528e-05, "loss": 0.6388, "step": 10791 }, { "epoch": 0.315085690928732, "grad_norm": 0.5275692871559284, "learning_rate": 3.805190592051906e-05, "loss": 0.5954, "step": 10792 }, { "epoch": 0.31511488715657937, "grad_norm": 0.5843939135854357, "learning_rate": 3.805028386050284e-05, "loss": 0.709, "step": 10793 }, { "epoch": 0.31514408338442673, "grad_norm": 0.5436646869958435, "learning_rate": 3.804866180048662e-05, "loss": 0.6406, "step": 10794 }, { "epoch": 0.3151732796122741, "grad_norm": 0.5770841457598914, "learning_rate": 3.80470397404704e-05, "loss": 0.6915, "step": 10795 }, { "epoch": 0.31520247584012145, "grad_norm": 0.5605528577089163, "learning_rate": 3.8045417680454174e-05, "loss": 0.6365, "step": 10796 }, { "epoch": 0.3152316720679688, "grad_norm": 0.5631217069111291, "learning_rate": 3.8043795620437956e-05, "loss": 0.6701, "step": 10797 }, { "epoch": 0.3152608682958162, "grad_norm": 0.5095650881626773, "learning_rate": 3.804217356042174e-05, "loss": 0.6119, "step": 10798 }, { "epoch": 0.31529006452366354, "grad_norm": 0.55674198216339, "learning_rate": 3.804055150040552e-05, "loss": 0.6687, "step": 10799 }, { "epoch": 0.3153192607515109, "grad_norm": 0.5968464704150763, "learning_rate": 3.80389294403893e-05, "loss": 0.6444, "step": 10800 }, { "epoch": 0.31534845697935826, "grad_norm": 0.5731559207751539, "learning_rate": 3.8037307380373076e-05, "loss": 0.7243, "step": 10801 }, { "epoch": 0.3153776532072056, "grad_norm": 0.5201585041894763, "learning_rate": 3.803568532035686e-05, "loss": 0.6019, "step": 10802 }, { "epoch": 0.315406849435053, "grad_norm": 0.5711829406587352, "learning_rate": 3.803406326034063e-05, "loss": 0.6987, "step": 10803 }, { "epoch": 0.31543604566290034, "grad_norm": 0.5148551228999912, "learning_rate": 3.8032441200324414e-05, "loss": 0.588, "step": 10804 }, { "epoch": 0.3154652418907477, "grad_norm": 0.560322464533042, "learning_rate": 3.8030819140308196e-05, "loss": 0.6974, "step": 10805 }, { "epoch": 0.31549443811859507, "grad_norm": 0.5358234905927355, "learning_rate": 3.802919708029197e-05, "loss": 0.646, "step": 10806 }, { "epoch": 0.3155236343464424, "grad_norm": 0.5001765515474554, "learning_rate": 3.802757502027575e-05, "loss": 0.5709, "step": 10807 }, { "epoch": 0.3155528305742898, "grad_norm": 0.550170786601968, "learning_rate": 3.802595296025953e-05, "loss": 0.6255, "step": 10808 }, { "epoch": 0.31558202680213715, "grad_norm": 0.52218653977799, "learning_rate": 3.802433090024331e-05, "loss": 0.6332, "step": 10809 }, { "epoch": 0.3156112230299845, "grad_norm": 0.521057428195657, "learning_rate": 3.802270884022709e-05, "loss": 0.5842, "step": 10810 }, { "epoch": 0.3156404192578319, "grad_norm": 0.5610047934082415, "learning_rate": 3.8021086780210866e-05, "loss": 0.6734, "step": 10811 }, { "epoch": 0.31566961548567923, "grad_norm": 0.5808323022753892, "learning_rate": 3.801946472019465e-05, "loss": 0.6955, "step": 10812 }, { "epoch": 0.3156988117135266, "grad_norm": 0.5340679398072582, "learning_rate": 3.801784266017843e-05, "loss": 0.6173, "step": 10813 }, { "epoch": 0.31572800794137396, "grad_norm": 0.5605890861414081, "learning_rate": 3.8016220600162205e-05, "loss": 0.6214, "step": 10814 }, { "epoch": 0.3157572041692213, "grad_norm": 0.5390188943283931, "learning_rate": 3.8014598540145987e-05, "loss": 0.5868, "step": 10815 }, { "epoch": 0.3157864003970687, "grad_norm": 0.5700628703349628, "learning_rate": 3.801297648012976e-05, "loss": 0.6857, "step": 10816 }, { "epoch": 0.31581559662491604, "grad_norm": 0.6303608887015577, "learning_rate": 3.801135442011355e-05, "loss": 0.6901, "step": 10817 }, { "epoch": 0.3158447928527634, "grad_norm": 0.5260183812160089, "learning_rate": 3.8009732360097325e-05, "loss": 0.6253, "step": 10818 }, { "epoch": 0.31587398908061076, "grad_norm": 0.5024246890012336, "learning_rate": 3.800811030008111e-05, "loss": 0.552, "step": 10819 }, { "epoch": 0.3159031853084581, "grad_norm": 0.5701702681128838, "learning_rate": 3.800648824006489e-05, "loss": 0.7336, "step": 10820 }, { "epoch": 0.3159323815363055, "grad_norm": 0.5180658874062211, "learning_rate": 3.8004866180048664e-05, "loss": 0.5557, "step": 10821 }, { "epoch": 0.31596157776415285, "grad_norm": 0.5558398789832636, "learning_rate": 3.8003244120032445e-05, "loss": 0.6301, "step": 10822 }, { "epoch": 0.3159907739920002, "grad_norm": 0.554821648968338, "learning_rate": 3.800162206001622e-05, "loss": 0.6568, "step": 10823 }, { "epoch": 0.31601997021984757, "grad_norm": 0.581238093847475, "learning_rate": 3.8e-05, "loss": 0.7069, "step": 10824 }, { "epoch": 0.316049166447695, "grad_norm": 0.5353259596949582, "learning_rate": 3.7998377939983784e-05, "loss": 0.6053, "step": 10825 }, { "epoch": 0.31607836267554235, "grad_norm": 0.5768365216959582, "learning_rate": 3.799675587996756e-05, "loss": 0.7013, "step": 10826 }, { "epoch": 0.3161075589033897, "grad_norm": 0.5406975495042388, "learning_rate": 3.799513381995134e-05, "loss": 0.6651, "step": 10827 }, { "epoch": 0.31613675513123707, "grad_norm": 0.5353713651425855, "learning_rate": 3.7993511759935116e-05, "loss": 0.6296, "step": 10828 }, { "epoch": 0.31616595135908443, "grad_norm": 0.5558167799224262, "learning_rate": 3.79918896999189e-05, "loss": 0.6573, "step": 10829 }, { "epoch": 0.3161951475869318, "grad_norm": 0.6116008360628298, "learning_rate": 3.799026763990268e-05, "loss": 0.7563, "step": 10830 }, { "epoch": 0.31622434381477915, "grad_norm": 0.5642169045285677, "learning_rate": 3.7988645579886454e-05, "loss": 0.6523, "step": 10831 }, { "epoch": 0.3162535400426265, "grad_norm": 0.5232156918981534, "learning_rate": 3.7987023519870236e-05, "loss": 0.5866, "step": 10832 }, { "epoch": 0.3162827362704739, "grad_norm": 0.5550958652772233, "learning_rate": 3.798540145985401e-05, "loss": 0.6433, "step": 10833 }, { "epoch": 0.31631193249832124, "grad_norm": 0.5701226549543031, "learning_rate": 3.798377939983779e-05, "loss": 0.6728, "step": 10834 }, { "epoch": 0.3163411287261686, "grad_norm": 0.5232095776722459, "learning_rate": 3.798215733982158e-05, "loss": 0.5716, "step": 10835 }, { "epoch": 0.31637032495401596, "grad_norm": 0.532922925611943, "learning_rate": 3.7980535279805356e-05, "loss": 0.6247, "step": 10836 }, { "epoch": 0.3163995211818633, "grad_norm": 0.5284230677315312, "learning_rate": 3.797891321978914e-05, "loss": 0.579, "step": 10837 }, { "epoch": 0.3164287174097107, "grad_norm": 0.5619142706421321, "learning_rate": 3.797729115977291e-05, "loss": 0.6777, "step": 10838 }, { "epoch": 0.31645791363755804, "grad_norm": 0.5745469295336546, "learning_rate": 3.7975669099756695e-05, "loss": 0.6291, "step": 10839 }, { "epoch": 0.3164871098654054, "grad_norm": 0.4963395176558032, "learning_rate": 3.7974047039740476e-05, "loss": 0.569, "step": 10840 }, { "epoch": 0.31651630609325276, "grad_norm": 0.5385967600296302, "learning_rate": 3.797242497972425e-05, "loss": 0.6827, "step": 10841 }, { "epoch": 0.3165455023211001, "grad_norm": 0.5633414173430922, "learning_rate": 3.797080291970803e-05, "loss": 0.6466, "step": 10842 }, { "epoch": 0.3165746985489475, "grad_norm": 0.5860343663960694, "learning_rate": 3.796918085969181e-05, "loss": 0.678, "step": 10843 }, { "epoch": 0.31660389477679485, "grad_norm": 0.5289495201699039, "learning_rate": 3.796755879967559e-05, "loss": 0.6272, "step": 10844 }, { "epoch": 0.3166330910046422, "grad_norm": 0.5359414049186434, "learning_rate": 3.796593673965937e-05, "loss": 0.6398, "step": 10845 }, { "epoch": 0.31666228723248957, "grad_norm": 0.5793069269057635, "learning_rate": 3.796431467964315e-05, "loss": 0.6721, "step": 10846 }, { "epoch": 0.31669148346033693, "grad_norm": 0.5040470334986581, "learning_rate": 3.796269261962693e-05, "loss": 0.606, "step": 10847 }, { "epoch": 0.3167206796881843, "grad_norm": 0.6008408707881007, "learning_rate": 3.7961070559610703e-05, "loss": 0.601, "step": 10848 }, { "epoch": 0.31674987591603165, "grad_norm": 0.55816855001266, "learning_rate": 3.7959448499594485e-05, "loss": 0.6595, "step": 10849 }, { "epoch": 0.316779072143879, "grad_norm": 0.5538218759419606, "learning_rate": 3.795782643957827e-05, "loss": 0.6881, "step": 10850 }, { "epoch": 0.3168082683717264, "grad_norm": 0.5089668413791166, "learning_rate": 3.795620437956204e-05, "loss": 0.5802, "step": 10851 }, { "epoch": 0.31683746459957374, "grad_norm": 0.5779838051874258, "learning_rate": 3.7954582319545824e-05, "loss": 0.6758, "step": 10852 }, { "epoch": 0.3168666608274211, "grad_norm": 0.560163639511969, "learning_rate": 3.79529602595296e-05, "loss": 0.6951, "step": 10853 }, { "epoch": 0.31689585705526846, "grad_norm": 0.5570469680786077, "learning_rate": 3.795133819951339e-05, "loss": 0.6494, "step": 10854 }, { "epoch": 0.3169250532831158, "grad_norm": 0.5386195497011745, "learning_rate": 3.794971613949717e-05, "loss": 0.628, "step": 10855 }, { "epoch": 0.3169542495109632, "grad_norm": 0.5363052866209338, "learning_rate": 3.7948094079480944e-05, "loss": 0.6316, "step": 10856 }, { "epoch": 0.31698344573881054, "grad_norm": 0.6116925481917128, "learning_rate": 3.7946472019464726e-05, "loss": 0.7697, "step": 10857 }, { "epoch": 0.3170126419666579, "grad_norm": 0.5937212304960109, "learning_rate": 3.79448499594485e-05, "loss": 0.715, "step": 10858 }, { "epoch": 0.31704183819450527, "grad_norm": 0.5167861768132874, "learning_rate": 3.794322789943228e-05, "loss": 0.6073, "step": 10859 }, { "epoch": 0.3170710344223526, "grad_norm": 0.5609694967172469, "learning_rate": 3.7941605839416064e-05, "loss": 0.6764, "step": 10860 }, { "epoch": 0.3171002306502, "grad_norm": 0.5561888088040895, "learning_rate": 3.793998377939984e-05, "loss": 0.6422, "step": 10861 }, { "epoch": 0.31712942687804735, "grad_norm": 0.5012443733703389, "learning_rate": 3.793836171938362e-05, "loss": 0.5521, "step": 10862 }, { "epoch": 0.3171586231058947, "grad_norm": 0.5519470624569225, "learning_rate": 3.7936739659367396e-05, "loss": 0.6157, "step": 10863 }, { "epoch": 0.31718781933374207, "grad_norm": 0.5443594461665566, "learning_rate": 3.793511759935118e-05, "loss": 0.6369, "step": 10864 }, { "epoch": 0.31721701556158943, "grad_norm": 0.5732354908969299, "learning_rate": 3.793349553933496e-05, "loss": 0.7033, "step": 10865 }, { "epoch": 0.3172462117894368, "grad_norm": 0.5470446010153243, "learning_rate": 3.7931873479318734e-05, "loss": 0.6456, "step": 10866 }, { "epoch": 0.31727540801728416, "grad_norm": 0.4893179734044926, "learning_rate": 3.7930251419302516e-05, "loss": 0.5617, "step": 10867 }, { "epoch": 0.3173046042451315, "grad_norm": 0.5430631098826468, "learning_rate": 3.792862935928629e-05, "loss": 0.5914, "step": 10868 }, { "epoch": 0.3173338004729789, "grad_norm": 0.5341308253113306, "learning_rate": 3.792700729927007e-05, "loss": 0.5914, "step": 10869 }, { "epoch": 0.31736299670082624, "grad_norm": 0.56064829124745, "learning_rate": 3.7925385239253855e-05, "loss": 0.6722, "step": 10870 }, { "epoch": 0.3173921929286736, "grad_norm": 0.5405900235014707, "learning_rate": 3.792376317923763e-05, "loss": 0.6312, "step": 10871 }, { "epoch": 0.31742138915652096, "grad_norm": 0.5668271938290623, "learning_rate": 3.792214111922141e-05, "loss": 0.6535, "step": 10872 }, { "epoch": 0.3174505853843683, "grad_norm": 0.5885778426765274, "learning_rate": 3.792051905920519e-05, "loss": 0.7531, "step": 10873 }, { "epoch": 0.3174797816122157, "grad_norm": 0.5560388539710549, "learning_rate": 3.7918896999188975e-05, "loss": 0.6819, "step": 10874 }, { "epoch": 0.31750897784006304, "grad_norm": 0.5551103342702972, "learning_rate": 3.791727493917275e-05, "loss": 0.6712, "step": 10875 }, { "epoch": 0.3175381740679104, "grad_norm": 0.5425408988482496, "learning_rate": 3.791565287915653e-05, "loss": 0.6681, "step": 10876 }, { "epoch": 0.31756737029575777, "grad_norm": 0.5593878549582758, "learning_rate": 3.7914030819140314e-05, "loss": 0.6753, "step": 10877 }, { "epoch": 0.31759656652360513, "grad_norm": 0.5292075198598952, "learning_rate": 3.791240875912409e-05, "loss": 0.5684, "step": 10878 }, { "epoch": 0.3176257627514525, "grad_norm": 0.5296120307926611, "learning_rate": 3.791078669910787e-05, "loss": 0.6169, "step": 10879 }, { "epoch": 0.31765495897929985, "grad_norm": 0.5059370091857198, "learning_rate": 3.790916463909165e-05, "loss": 0.577, "step": 10880 }, { "epoch": 0.3176841552071472, "grad_norm": 0.5327330352665154, "learning_rate": 3.790754257907543e-05, "loss": 0.5924, "step": 10881 }, { "epoch": 0.3177133514349946, "grad_norm": 0.4857699288897677, "learning_rate": 3.790592051905921e-05, "loss": 0.5368, "step": 10882 }, { "epoch": 0.31774254766284193, "grad_norm": 0.5373861720994982, "learning_rate": 3.7904298459042984e-05, "loss": 0.6582, "step": 10883 }, { "epoch": 0.3177717438906893, "grad_norm": 0.5350878521227991, "learning_rate": 3.7902676399026766e-05, "loss": 0.6397, "step": 10884 }, { "epoch": 0.3178009401185367, "grad_norm": 0.5257888054666893, "learning_rate": 3.790105433901055e-05, "loss": 0.6223, "step": 10885 }, { "epoch": 0.3178301363463841, "grad_norm": 0.5625540983891746, "learning_rate": 3.789943227899432e-05, "loss": 0.6897, "step": 10886 }, { "epoch": 0.31785933257423143, "grad_norm": 0.510563661225661, "learning_rate": 3.7897810218978104e-05, "loss": 0.5695, "step": 10887 }, { "epoch": 0.3178885288020788, "grad_norm": 0.5571946498218858, "learning_rate": 3.789618815896188e-05, "loss": 0.7013, "step": 10888 }, { "epoch": 0.31791772502992616, "grad_norm": 0.5600515498732886, "learning_rate": 3.789456609894566e-05, "loss": 0.6533, "step": 10889 }, { "epoch": 0.3179469212577735, "grad_norm": 0.5192959016144127, "learning_rate": 3.789294403892944e-05, "loss": 0.6178, "step": 10890 }, { "epoch": 0.3179761174856209, "grad_norm": 0.5315503740327978, "learning_rate": 3.789132197891322e-05, "loss": 0.6108, "step": 10891 }, { "epoch": 0.31800531371346824, "grad_norm": 0.5905863421917952, "learning_rate": 3.7889699918897006e-05, "loss": 0.7344, "step": 10892 }, { "epoch": 0.3180345099413156, "grad_norm": 0.5219559963516807, "learning_rate": 3.788807785888078e-05, "loss": 0.5952, "step": 10893 }, { "epoch": 0.31806370616916296, "grad_norm": 0.5174431299286191, "learning_rate": 3.788645579886456e-05, "loss": 0.5843, "step": 10894 }, { "epoch": 0.3180929023970103, "grad_norm": 0.6199605860825417, "learning_rate": 3.788483373884834e-05, "loss": 0.721, "step": 10895 }, { "epoch": 0.3181220986248577, "grad_norm": 0.5137754655314689, "learning_rate": 3.788321167883212e-05, "loss": 0.5721, "step": 10896 }, { "epoch": 0.31815129485270505, "grad_norm": 0.5876127032768227, "learning_rate": 3.78815896188159e-05, "loss": 0.6834, "step": 10897 }, { "epoch": 0.3181804910805524, "grad_norm": 0.527496033030192, "learning_rate": 3.7879967558799676e-05, "loss": 0.6397, "step": 10898 }, { "epoch": 0.31820968730839977, "grad_norm": 0.5812801288518457, "learning_rate": 3.787834549878346e-05, "loss": 0.6492, "step": 10899 }, { "epoch": 0.31823888353624713, "grad_norm": 0.5766305063202255, "learning_rate": 3.787672343876724e-05, "loss": 0.6529, "step": 10900 }, { "epoch": 0.3182680797640945, "grad_norm": 0.5596000962096048, "learning_rate": 3.7875101378751015e-05, "loss": 0.6434, "step": 10901 }, { "epoch": 0.31829727599194185, "grad_norm": 0.6244508897188676, "learning_rate": 3.7873479318734797e-05, "loss": 0.7355, "step": 10902 }, { "epoch": 0.3183264722197892, "grad_norm": 0.50531563161731, "learning_rate": 3.787185725871857e-05, "loss": 0.5493, "step": 10903 }, { "epoch": 0.3183556684476366, "grad_norm": 0.5710832224516194, "learning_rate": 3.787023519870235e-05, "loss": 0.6989, "step": 10904 }, { "epoch": 0.31838486467548394, "grad_norm": 0.5703780407670314, "learning_rate": 3.7868613138686135e-05, "loss": 0.6664, "step": 10905 }, { "epoch": 0.3184140609033313, "grad_norm": 0.5382150514165559, "learning_rate": 3.786699107866991e-05, "loss": 0.6315, "step": 10906 }, { "epoch": 0.31844325713117866, "grad_norm": 0.5885611615573845, "learning_rate": 3.786536901865369e-05, "loss": 0.7161, "step": 10907 }, { "epoch": 0.318472453359026, "grad_norm": 0.5304712930445444, "learning_rate": 3.786374695863747e-05, "loss": 0.5833, "step": 10908 }, { "epoch": 0.3185016495868734, "grad_norm": 0.5830517119035318, "learning_rate": 3.786212489862125e-05, "loss": 0.7374, "step": 10909 }, { "epoch": 0.31853084581472074, "grad_norm": 0.610980318536635, "learning_rate": 3.786050283860503e-05, "loss": 0.7518, "step": 10910 }, { "epoch": 0.3185600420425681, "grad_norm": 0.5752883280808582, "learning_rate": 3.785888077858881e-05, "loss": 0.6346, "step": 10911 }, { "epoch": 0.31858923827041546, "grad_norm": 0.5254452902181561, "learning_rate": 3.7857258718572594e-05, "loss": 0.6363, "step": 10912 }, { "epoch": 0.3186184344982628, "grad_norm": 0.5364827986809668, "learning_rate": 3.785563665855637e-05, "loss": 0.6457, "step": 10913 }, { "epoch": 0.3186476307261102, "grad_norm": 0.5236152255012473, "learning_rate": 3.785401459854015e-05, "loss": 0.6075, "step": 10914 }, { "epoch": 0.31867682695395755, "grad_norm": 0.5018820537884375, "learning_rate": 3.7852392538523926e-05, "loss": 0.5557, "step": 10915 }, { "epoch": 0.3187060231818049, "grad_norm": 0.5404656851359201, "learning_rate": 3.785077047850771e-05, "loss": 0.6596, "step": 10916 }, { "epoch": 0.31873521940965227, "grad_norm": 0.6026633508802701, "learning_rate": 3.784914841849149e-05, "loss": 0.6767, "step": 10917 }, { "epoch": 0.31876441563749963, "grad_norm": 0.5879547808234095, "learning_rate": 3.7847526358475264e-05, "loss": 0.7404, "step": 10918 }, { "epoch": 0.318793611865347, "grad_norm": 0.56659574817325, "learning_rate": 3.7845904298459046e-05, "loss": 0.6762, "step": 10919 }, { "epoch": 0.31882280809319435, "grad_norm": 0.5477652500904547, "learning_rate": 3.784428223844282e-05, "loss": 0.6669, "step": 10920 }, { "epoch": 0.3188520043210417, "grad_norm": 0.5290185109746475, "learning_rate": 3.78426601784266e-05, "loss": 0.6403, "step": 10921 }, { "epoch": 0.3188812005488891, "grad_norm": 0.5231010124653974, "learning_rate": 3.7841038118410384e-05, "loss": 0.61, "step": 10922 }, { "epoch": 0.31891039677673644, "grad_norm": 0.5834072846300746, "learning_rate": 3.783941605839416e-05, "loss": 0.7003, "step": 10923 }, { "epoch": 0.3189395930045838, "grad_norm": 0.5734348497964196, "learning_rate": 3.783779399837794e-05, "loss": 0.6199, "step": 10924 }, { "epoch": 0.31896878923243116, "grad_norm": 0.5317215473623979, "learning_rate": 3.783617193836172e-05, "loss": 0.5594, "step": 10925 }, { "epoch": 0.3189979854602785, "grad_norm": 0.5737015352774081, "learning_rate": 3.78345498783455e-05, "loss": 0.6934, "step": 10926 }, { "epoch": 0.3190271816881259, "grad_norm": 0.5125852326764271, "learning_rate": 3.783292781832928e-05, "loss": 0.5987, "step": 10927 }, { "epoch": 0.31905637791597324, "grad_norm": 0.583691626360458, "learning_rate": 3.7831305758313055e-05, "loss": 0.6345, "step": 10928 }, { "epoch": 0.3190855741438206, "grad_norm": 0.5388115634546421, "learning_rate": 3.7829683698296836e-05, "loss": 0.6016, "step": 10929 }, { "epoch": 0.31911477037166797, "grad_norm": 0.533173083481132, "learning_rate": 3.782806163828062e-05, "loss": 0.5858, "step": 10930 }, { "epoch": 0.3191439665995153, "grad_norm": 0.5452801534332266, "learning_rate": 3.78264395782644e-05, "loss": 0.6944, "step": 10931 }, { "epoch": 0.3191731628273627, "grad_norm": 0.5381989086347602, "learning_rate": 3.782481751824818e-05, "loss": 0.627, "step": 10932 }, { "epoch": 0.31920235905521005, "grad_norm": 0.5200385704121931, "learning_rate": 3.782319545823196e-05, "loss": 0.5653, "step": 10933 }, { "epoch": 0.3192315552830574, "grad_norm": 0.5647038412198483, "learning_rate": 3.782157339821574e-05, "loss": 0.6553, "step": 10934 }, { "epoch": 0.31926075151090477, "grad_norm": 0.5494668373845154, "learning_rate": 3.7819951338199513e-05, "loss": 0.6449, "step": 10935 }, { "epoch": 0.31928994773875213, "grad_norm": 0.5723617943427374, "learning_rate": 3.7818329278183295e-05, "loss": 0.6519, "step": 10936 }, { "epoch": 0.3193191439665995, "grad_norm": 0.574715515597626, "learning_rate": 3.781670721816708e-05, "loss": 0.7296, "step": 10937 }, { "epoch": 0.31934834019444686, "grad_norm": 0.5082326986607234, "learning_rate": 3.781508515815085e-05, "loss": 0.5653, "step": 10938 }, { "epoch": 0.3193775364222942, "grad_norm": 0.5017131989535829, "learning_rate": 3.7813463098134634e-05, "loss": 0.5703, "step": 10939 }, { "epoch": 0.3194067326501416, "grad_norm": 0.4847180224436744, "learning_rate": 3.781184103811841e-05, "loss": 0.5341, "step": 10940 }, { "epoch": 0.31943592887798894, "grad_norm": 0.5116211194655993, "learning_rate": 3.781021897810219e-05, "loss": 0.528, "step": 10941 }, { "epoch": 0.3194651251058363, "grad_norm": 0.5464188461616746, "learning_rate": 3.780859691808597e-05, "loss": 0.6272, "step": 10942 }, { "epoch": 0.31949432133368366, "grad_norm": 0.5884805953266542, "learning_rate": 3.780697485806975e-05, "loss": 0.6928, "step": 10943 }, { "epoch": 0.319523517561531, "grad_norm": 0.5368657149729796, "learning_rate": 3.780535279805353e-05, "loss": 0.6125, "step": 10944 }, { "epoch": 0.31955271378937844, "grad_norm": 0.5785851020465248, "learning_rate": 3.780373073803731e-05, "loss": 0.6885, "step": 10945 }, { "epoch": 0.3195819100172258, "grad_norm": 0.5894263183659741, "learning_rate": 3.7802108678021086e-05, "loss": 0.6578, "step": 10946 }, { "epoch": 0.31961110624507316, "grad_norm": 0.5528067359262292, "learning_rate": 3.780048661800487e-05, "loss": 0.6463, "step": 10947 }, { "epoch": 0.3196403024729205, "grad_norm": 0.5641069322435055, "learning_rate": 3.779886455798864e-05, "loss": 0.71, "step": 10948 }, { "epoch": 0.3196694987007679, "grad_norm": 0.5486205601301917, "learning_rate": 3.779724249797243e-05, "loss": 0.6346, "step": 10949 }, { "epoch": 0.31969869492861525, "grad_norm": 0.5747223239083484, "learning_rate": 3.7795620437956206e-05, "loss": 0.6711, "step": 10950 }, { "epoch": 0.3197278911564626, "grad_norm": 0.5088560211775586, "learning_rate": 3.779399837793999e-05, "loss": 0.5852, "step": 10951 }, { "epoch": 0.31975708738430997, "grad_norm": 0.5015355541781623, "learning_rate": 3.779237631792377e-05, "loss": 0.5694, "step": 10952 }, { "epoch": 0.31978628361215733, "grad_norm": 0.553767829589312, "learning_rate": 3.7790754257907544e-05, "loss": 0.6906, "step": 10953 }, { "epoch": 0.3198154798400047, "grad_norm": 0.5593137990733422, "learning_rate": 3.7789132197891326e-05, "loss": 0.6512, "step": 10954 }, { "epoch": 0.31984467606785205, "grad_norm": 0.49103723412145217, "learning_rate": 3.77875101378751e-05, "loss": 0.5611, "step": 10955 }, { "epoch": 0.3198738722956994, "grad_norm": 0.554570317070133, "learning_rate": 3.778588807785888e-05, "loss": 0.7061, "step": 10956 }, { "epoch": 0.3199030685235468, "grad_norm": 0.5230564042375586, "learning_rate": 3.7784266017842665e-05, "loss": 0.6074, "step": 10957 }, { "epoch": 0.31993226475139414, "grad_norm": 0.5632369733548789, "learning_rate": 3.778264395782644e-05, "loss": 0.6694, "step": 10958 }, { "epoch": 0.3199614609792415, "grad_norm": 0.5243265998996479, "learning_rate": 3.778102189781022e-05, "loss": 0.5883, "step": 10959 }, { "epoch": 0.31999065720708886, "grad_norm": 0.6597472600162303, "learning_rate": 3.7779399837793996e-05, "loss": 0.6969, "step": 10960 }, { "epoch": 0.3200198534349362, "grad_norm": 0.5966054395920208, "learning_rate": 3.777777777777778e-05, "loss": 0.6889, "step": 10961 }, { "epoch": 0.3200490496627836, "grad_norm": 0.5241852622337421, "learning_rate": 3.777615571776156e-05, "loss": 0.6703, "step": 10962 }, { "epoch": 0.32007824589063094, "grad_norm": 0.590634546089954, "learning_rate": 3.7774533657745335e-05, "loss": 0.7437, "step": 10963 }, { "epoch": 0.3201074421184783, "grad_norm": 0.5869134026978435, "learning_rate": 3.777291159772912e-05, "loss": 0.7461, "step": 10964 }, { "epoch": 0.32013663834632566, "grad_norm": 0.5435608175093116, "learning_rate": 3.777128953771289e-05, "loss": 0.6164, "step": 10965 }, { "epoch": 0.320165834574173, "grad_norm": 0.5234640147539191, "learning_rate": 3.7769667477696673e-05, "loss": 0.6038, "step": 10966 }, { "epoch": 0.3201950308020204, "grad_norm": 0.5604200322406914, "learning_rate": 3.7768045417680455e-05, "loss": 0.6379, "step": 10967 }, { "epoch": 0.32022422702986775, "grad_norm": 0.5348084592602902, "learning_rate": 3.776642335766424e-05, "loss": 0.6349, "step": 10968 }, { "epoch": 0.3202534232577151, "grad_norm": 0.580903426729745, "learning_rate": 3.776480129764802e-05, "loss": 0.7299, "step": 10969 }, { "epoch": 0.32028261948556247, "grad_norm": 0.5121251673235231, "learning_rate": 3.7763179237631794e-05, "loss": 0.6289, "step": 10970 }, { "epoch": 0.32031181571340983, "grad_norm": 0.5213970534184428, "learning_rate": 3.7761557177615576e-05, "loss": 0.569, "step": 10971 }, { "epoch": 0.3203410119412572, "grad_norm": 0.5370347698507811, "learning_rate": 3.775993511759936e-05, "loss": 0.5913, "step": 10972 }, { "epoch": 0.32037020816910455, "grad_norm": 0.5272503167508588, "learning_rate": 3.775831305758313e-05, "loss": 0.605, "step": 10973 }, { "epoch": 0.3203994043969519, "grad_norm": 0.5181472796580728, "learning_rate": 3.7756690997566914e-05, "loss": 0.5921, "step": 10974 }, { "epoch": 0.3204286006247993, "grad_norm": 0.6281594497232957, "learning_rate": 3.775506893755069e-05, "loss": 0.6764, "step": 10975 }, { "epoch": 0.32045779685264664, "grad_norm": 0.5123417135209712, "learning_rate": 3.775344687753447e-05, "loss": 0.5803, "step": 10976 }, { "epoch": 0.320486993080494, "grad_norm": 0.533033425409041, "learning_rate": 3.775182481751825e-05, "loss": 0.6435, "step": 10977 }, { "epoch": 0.32051618930834136, "grad_norm": 0.521386418560668, "learning_rate": 3.775020275750203e-05, "loss": 0.6395, "step": 10978 }, { "epoch": 0.3205453855361887, "grad_norm": 0.5711684537931419, "learning_rate": 3.774858069748581e-05, "loss": 0.5972, "step": 10979 }, { "epoch": 0.3205745817640361, "grad_norm": 0.5680388585606349, "learning_rate": 3.7746958637469584e-05, "loss": 0.7048, "step": 10980 }, { "epoch": 0.32060377799188344, "grad_norm": 0.5688801526655013, "learning_rate": 3.7745336577453366e-05, "loss": 0.6396, "step": 10981 }, { "epoch": 0.3206329742197308, "grad_norm": 0.5713141191759795, "learning_rate": 3.774371451743715e-05, "loss": 0.6879, "step": 10982 }, { "epoch": 0.32066217044757817, "grad_norm": 0.5447811384273754, "learning_rate": 3.774209245742092e-05, "loss": 0.6356, "step": 10983 }, { "epoch": 0.3206913666754255, "grad_norm": 0.5990226325052656, "learning_rate": 3.7740470397404705e-05, "loss": 0.7027, "step": 10984 }, { "epoch": 0.3207205629032729, "grad_norm": 0.5428639622024974, "learning_rate": 3.773884833738848e-05, "loss": 0.6302, "step": 10985 }, { "epoch": 0.32074975913112025, "grad_norm": 0.5573301466526587, "learning_rate": 3.773722627737227e-05, "loss": 0.6957, "step": 10986 }, { "epoch": 0.3207789553589676, "grad_norm": 0.5637364911356822, "learning_rate": 3.773560421735605e-05, "loss": 0.6756, "step": 10987 }, { "epoch": 0.32080815158681497, "grad_norm": 0.5299351344519719, "learning_rate": 3.7733982157339825e-05, "loss": 0.604, "step": 10988 }, { "epoch": 0.32083734781466233, "grad_norm": 0.5348494420301899, "learning_rate": 3.7732360097323607e-05, "loss": 0.6195, "step": 10989 }, { "epoch": 0.3208665440425097, "grad_norm": 0.5366472996291944, "learning_rate": 3.773073803730738e-05, "loss": 0.6232, "step": 10990 }, { "epoch": 0.32089574027035705, "grad_norm": 0.49372370161149093, "learning_rate": 3.772911597729116e-05, "loss": 0.5562, "step": 10991 }, { "epoch": 0.3209249364982044, "grad_norm": 0.5458667512378108, "learning_rate": 3.7727493917274945e-05, "loss": 0.6164, "step": 10992 }, { "epoch": 0.3209541327260518, "grad_norm": 0.5160952056385034, "learning_rate": 3.772587185725872e-05, "loss": 0.62, "step": 10993 }, { "epoch": 0.32098332895389914, "grad_norm": 0.5257759670553336, "learning_rate": 3.77242497972425e-05, "loss": 0.617, "step": 10994 }, { "epoch": 0.3210125251817465, "grad_norm": 0.5228246038481117, "learning_rate": 3.772262773722628e-05, "loss": 0.6319, "step": 10995 }, { "epoch": 0.32104172140959386, "grad_norm": 0.5237563067514405, "learning_rate": 3.772100567721006e-05, "loss": 0.6208, "step": 10996 }, { "epoch": 0.3210709176374412, "grad_norm": 0.5541002937244291, "learning_rate": 3.771938361719384e-05, "loss": 0.6249, "step": 10997 }, { "epoch": 0.3211001138652886, "grad_norm": 0.5660051405039045, "learning_rate": 3.7717761557177615e-05, "loss": 0.6705, "step": 10998 }, { "epoch": 0.32112931009313594, "grad_norm": 0.5232106920612032, "learning_rate": 3.77161394971614e-05, "loss": 0.5738, "step": 10999 }, { "epoch": 0.3211585063209833, "grad_norm": 0.5350392540430641, "learning_rate": 3.771451743714517e-05, "loss": 0.6244, "step": 11000 }, { "epoch": 0.32118770254883067, "grad_norm": 0.5581358793461372, "learning_rate": 3.7712895377128954e-05, "loss": 0.6426, "step": 11001 }, { "epoch": 0.32121689877667803, "grad_norm": 0.542235569097102, "learning_rate": 3.7711273317112736e-05, "loss": 0.7101, "step": 11002 }, { "epoch": 0.3212460950045254, "grad_norm": 0.5202321772473145, "learning_rate": 3.770965125709651e-05, "loss": 0.5993, "step": 11003 }, { "epoch": 0.32127529123237275, "grad_norm": 0.5200743598488385, "learning_rate": 3.770802919708029e-05, "loss": 0.6428, "step": 11004 }, { "epoch": 0.32130448746022017, "grad_norm": 0.541710258431504, "learning_rate": 3.7706407137064074e-05, "loss": 0.6338, "step": 11005 }, { "epoch": 0.32133368368806753, "grad_norm": 0.5264809781915286, "learning_rate": 3.7704785077047856e-05, "loss": 0.5954, "step": 11006 }, { "epoch": 0.3213628799159149, "grad_norm": 0.5387123882894341, "learning_rate": 3.770316301703163e-05, "loss": 0.6621, "step": 11007 }, { "epoch": 0.32139207614376225, "grad_norm": 0.5161260674118434, "learning_rate": 3.770154095701541e-05, "loss": 0.6075, "step": 11008 }, { "epoch": 0.3214212723716096, "grad_norm": 0.5030230305505086, "learning_rate": 3.7699918896999194e-05, "loss": 0.6079, "step": 11009 }, { "epoch": 0.321450468599457, "grad_norm": 0.5282034029130446, "learning_rate": 3.769829683698297e-05, "loss": 0.6208, "step": 11010 }, { "epoch": 0.32147966482730433, "grad_norm": 0.5230231543741037, "learning_rate": 3.769667477696675e-05, "loss": 0.6375, "step": 11011 }, { "epoch": 0.3215088610551517, "grad_norm": 0.5947699224975364, "learning_rate": 3.769505271695053e-05, "loss": 0.7441, "step": 11012 }, { "epoch": 0.32153805728299906, "grad_norm": 0.5319153119860456, "learning_rate": 3.769343065693431e-05, "loss": 0.6363, "step": 11013 }, { "epoch": 0.3215672535108464, "grad_norm": 0.5355862460411721, "learning_rate": 3.769180859691809e-05, "loss": 0.6075, "step": 11014 }, { "epoch": 0.3215964497386938, "grad_norm": 0.5126499165182211, "learning_rate": 3.7690186536901865e-05, "loss": 0.5883, "step": 11015 }, { "epoch": 0.32162564596654114, "grad_norm": 0.6713679921558301, "learning_rate": 3.7688564476885646e-05, "loss": 0.6694, "step": 11016 }, { "epoch": 0.3216548421943885, "grad_norm": 0.5565701550913879, "learning_rate": 3.768694241686943e-05, "loss": 0.6983, "step": 11017 }, { "epoch": 0.32168403842223586, "grad_norm": 0.5640900684447251, "learning_rate": 3.76853203568532e-05, "loss": 0.7214, "step": 11018 }, { "epoch": 0.3217132346500832, "grad_norm": 0.5124517820962509, "learning_rate": 3.7683698296836985e-05, "loss": 0.6206, "step": 11019 }, { "epoch": 0.3217424308779306, "grad_norm": 0.567485849475142, "learning_rate": 3.768207623682076e-05, "loss": 0.6904, "step": 11020 }, { "epoch": 0.32177162710577795, "grad_norm": 0.5275839909556982, "learning_rate": 3.768045417680454e-05, "loss": 0.6352, "step": 11021 }, { "epoch": 0.3218008233336253, "grad_norm": 0.5411251513958106, "learning_rate": 3.7678832116788323e-05, "loss": 0.5862, "step": 11022 }, { "epoch": 0.32183001956147267, "grad_norm": 0.5104852646888853, "learning_rate": 3.76772100567721e-05, "loss": 0.5718, "step": 11023 }, { "epoch": 0.32185921578932003, "grad_norm": 0.5940939039670832, "learning_rate": 3.767558799675589e-05, "loss": 0.6932, "step": 11024 }, { "epoch": 0.3218884120171674, "grad_norm": 0.5350882048222783, "learning_rate": 3.767396593673966e-05, "loss": 0.6029, "step": 11025 }, { "epoch": 0.32191760824501475, "grad_norm": 0.5341938192662595, "learning_rate": 3.7672343876723444e-05, "loss": 0.6194, "step": 11026 }, { "epoch": 0.3219468044728621, "grad_norm": 0.5619364393274956, "learning_rate": 3.767072181670722e-05, "loss": 0.6376, "step": 11027 }, { "epoch": 0.3219760007007095, "grad_norm": 0.5421008113842601, "learning_rate": 3.7669099756691e-05, "loss": 0.6524, "step": 11028 }, { "epoch": 0.32200519692855684, "grad_norm": 0.587848291404014, "learning_rate": 3.766747769667478e-05, "loss": 0.7203, "step": 11029 }, { "epoch": 0.3220343931564042, "grad_norm": 0.555189424327483, "learning_rate": 3.766585563665856e-05, "loss": 0.6881, "step": 11030 }, { "epoch": 0.32206358938425156, "grad_norm": 0.5562608560275772, "learning_rate": 3.766423357664234e-05, "loss": 0.6911, "step": 11031 }, { "epoch": 0.3220927856120989, "grad_norm": 0.5350292407662814, "learning_rate": 3.7662611516626114e-05, "loss": 0.6123, "step": 11032 }, { "epoch": 0.3221219818399463, "grad_norm": 0.5449467245888238, "learning_rate": 3.7660989456609896e-05, "loss": 0.6788, "step": 11033 }, { "epoch": 0.32215117806779364, "grad_norm": 0.5620685663861056, "learning_rate": 3.765936739659368e-05, "loss": 0.6075, "step": 11034 }, { "epoch": 0.322180374295641, "grad_norm": 0.5204830983145736, "learning_rate": 3.765774533657745e-05, "loss": 0.5786, "step": 11035 }, { "epoch": 0.32220957052348836, "grad_norm": 0.5632606293224719, "learning_rate": 3.7656123276561234e-05, "loss": 0.6375, "step": 11036 }, { "epoch": 0.3222387667513357, "grad_norm": 0.5123479632104153, "learning_rate": 3.7654501216545016e-05, "loss": 0.5945, "step": 11037 }, { "epoch": 0.3222679629791831, "grad_norm": 0.5332130867235148, "learning_rate": 3.765287915652879e-05, "loss": 0.593, "step": 11038 }, { "epoch": 0.32229715920703045, "grad_norm": 0.5549769460394891, "learning_rate": 3.765125709651257e-05, "loss": 0.5925, "step": 11039 }, { "epoch": 0.3223263554348778, "grad_norm": 0.5702474195794479, "learning_rate": 3.764963503649635e-05, "loss": 0.6933, "step": 11040 }, { "epoch": 0.32235555166272517, "grad_norm": 0.5665847232659768, "learning_rate": 3.764801297648013e-05, "loss": 0.666, "step": 11041 }, { "epoch": 0.32238474789057253, "grad_norm": 0.5355200285573651, "learning_rate": 3.764639091646391e-05, "loss": 0.625, "step": 11042 }, { "epoch": 0.3224139441184199, "grad_norm": 0.5991387275918469, "learning_rate": 3.764476885644769e-05, "loss": 0.77, "step": 11043 }, { "epoch": 0.32244314034626725, "grad_norm": 0.5772211920772552, "learning_rate": 3.7643146796431475e-05, "loss": 0.7016, "step": 11044 }, { "epoch": 0.3224723365741146, "grad_norm": 0.542032600886303, "learning_rate": 3.764152473641525e-05, "loss": 0.6751, "step": 11045 }, { "epoch": 0.322501532801962, "grad_norm": 0.5469785386605731, "learning_rate": 3.763990267639903e-05, "loss": 0.6434, "step": 11046 }, { "epoch": 0.32253072902980934, "grad_norm": 0.5431280119470866, "learning_rate": 3.7638280616382806e-05, "loss": 0.5911, "step": 11047 }, { "epoch": 0.3225599252576567, "grad_norm": 0.5539296531785256, "learning_rate": 3.763665855636659e-05, "loss": 0.679, "step": 11048 }, { "epoch": 0.32258912148550406, "grad_norm": 0.5550743844436145, "learning_rate": 3.763503649635037e-05, "loss": 0.6725, "step": 11049 }, { "epoch": 0.3226183177133514, "grad_norm": 0.5831431775202911, "learning_rate": 3.7633414436334145e-05, "loss": 0.6555, "step": 11050 }, { "epoch": 0.3226475139411988, "grad_norm": 0.5112875706303481, "learning_rate": 3.763179237631793e-05, "loss": 0.5775, "step": 11051 }, { "epoch": 0.32267671016904614, "grad_norm": 0.6006266439266692, "learning_rate": 3.76301703163017e-05, "loss": 0.6819, "step": 11052 }, { "epoch": 0.3227059063968935, "grad_norm": 0.5537056475989136, "learning_rate": 3.7628548256285484e-05, "loss": 0.6596, "step": 11053 }, { "epoch": 0.32273510262474087, "grad_norm": 0.5151046686823774, "learning_rate": 3.7626926196269265e-05, "loss": 0.574, "step": 11054 }, { "epoch": 0.3227642988525882, "grad_norm": 0.5501954694701816, "learning_rate": 3.762530413625304e-05, "loss": 0.6435, "step": 11055 }, { "epoch": 0.3227934950804356, "grad_norm": 0.5726689553649132, "learning_rate": 3.762368207623682e-05, "loss": 0.6487, "step": 11056 }, { "epoch": 0.32282269130828295, "grad_norm": 0.5516751724662159, "learning_rate": 3.7622060016220604e-05, "loss": 0.6788, "step": 11057 }, { "epoch": 0.3228518875361303, "grad_norm": 0.5087700166522503, "learning_rate": 3.762043795620438e-05, "loss": 0.5471, "step": 11058 }, { "epoch": 0.32288108376397767, "grad_norm": 0.5320655205064654, "learning_rate": 3.761881589618816e-05, "loss": 0.6215, "step": 11059 }, { "epoch": 0.32291027999182503, "grad_norm": 0.48183060735970024, "learning_rate": 3.7617193836171936e-05, "loss": 0.52, "step": 11060 }, { "epoch": 0.3229394762196724, "grad_norm": 0.5198725418188319, "learning_rate": 3.761557177615572e-05, "loss": 0.6439, "step": 11061 }, { "epoch": 0.32296867244751976, "grad_norm": 0.5486963892250728, "learning_rate": 3.76139497161395e-05, "loss": 0.6971, "step": 11062 }, { "epoch": 0.3229978686753671, "grad_norm": 0.5657932447718242, "learning_rate": 3.761232765612328e-05, "loss": 0.7057, "step": 11063 }, { "epoch": 0.3230270649032145, "grad_norm": 0.6780218654404937, "learning_rate": 3.761070559610706e-05, "loss": 0.7558, "step": 11064 }, { "epoch": 0.32305626113106184, "grad_norm": 0.5678720466163872, "learning_rate": 3.760908353609084e-05, "loss": 0.7068, "step": 11065 }, { "epoch": 0.32308545735890926, "grad_norm": 0.5756850640844232, "learning_rate": 3.760746147607462e-05, "loss": 0.6544, "step": 11066 }, { "epoch": 0.3231146535867566, "grad_norm": 0.5580509430906887, "learning_rate": 3.7605839416058394e-05, "loss": 0.6764, "step": 11067 }, { "epoch": 0.323143849814604, "grad_norm": 0.5465488684196105, "learning_rate": 3.7604217356042176e-05, "loss": 0.6356, "step": 11068 }, { "epoch": 0.32317304604245134, "grad_norm": 0.576765889239161, "learning_rate": 3.760259529602596e-05, "loss": 0.746, "step": 11069 }, { "epoch": 0.3232022422702987, "grad_norm": 0.564718202596239, "learning_rate": 3.760097323600973e-05, "loss": 0.6967, "step": 11070 }, { "epoch": 0.32323143849814606, "grad_norm": 0.5486139127770696, "learning_rate": 3.7599351175993515e-05, "loss": 0.6515, "step": 11071 }, { "epoch": 0.3232606347259934, "grad_norm": 0.49965680739098356, "learning_rate": 3.759772911597729e-05, "loss": 0.5539, "step": 11072 }, { "epoch": 0.3232898309538408, "grad_norm": 0.5259783248894585, "learning_rate": 3.759610705596107e-05, "loss": 0.6498, "step": 11073 }, { "epoch": 0.32331902718168815, "grad_norm": 0.5611702393418733, "learning_rate": 3.759448499594485e-05, "loss": 0.6851, "step": 11074 }, { "epoch": 0.3233482234095355, "grad_norm": 0.6220797507005007, "learning_rate": 3.759286293592863e-05, "loss": 0.7727, "step": 11075 }, { "epoch": 0.32337741963738287, "grad_norm": 0.5520390878344217, "learning_rate": 3.759124087591241e-05, "loss": 0.6503, "step": 11076 }, { "epoch": 0.32340661586523023, "grad_norm": 0.5575940759255258, "learning_rate": 3.7589618815896185e-05, "loss": 0.7074, "step": 11077 }, { "epoch": 0.3234358120930776, "grad_norm": 0.5334006085769821, "learning_rate": 3.7587996755879967e-05, "loss": 0.6441, "step": 11078 }, { "epoch": 0.32346500832092495, "grad_norm": 0.5360275160514357, "learning_rate": 3.758637469586375e-05, "loss": 0.6332, "step": 11079 }, { "epoch": 0.3234942045487723, "grad_norm": 0.5377937691682202, "learning_rate": 3.758475263584752e-05, "loss": 0.6562, "step": 11080 }, { "epoch": 0.3235234007766197, "grad_norm": 0.5202748325553246, "learning_rate": 3.758313057583131e-05, "loss": 0.6187, "step": 11081 }, { "epoch": 0.32355259700446704, "grad_norm": 0.5378477638507962, "learning_rate": 3.758150851581509e-05, "loss": 0.6784, "step": 11082 }, { "epoch": 0.3235817932323144, "grad_norm": 0.5424792501091391, "learning_rate": 3.757988645579887e-05, "loss": 0.609, "step": 11083 }, { "epoch": 0.32361098946016176, "grad_norm": 0.5243397567297436, "learning_rate": 3.757826439578265e-05, "loss": 0.6077, "step": 11084 }, { "epoch": 0.3236401856880091, "grad_norm": 0.5560728564057692, "learning_rate": 3.7576642335766425e-05, "loss": 0.6178, "step": 11085 }, { "epoch": 0.3236693819158565, "grad_norm": 0.5187247492209703, "learning_rate": 3.757502027575021e-05, "loss": 0.5774, "step": 11086 }, { "epoch": 0.32369857814370384, "grad_norm": 0.5291592189879708, "learning_rate": 3.757339821573398e-05, "loss": 0.6462, "step": 11087 }, { "epoch": 0.3237277743715512, "grad_norm": 0.5664009234147466, "learning_rate": 3.7571776155717764e-05, "loss": 0.6774, "step": 11088 }, { "epoch": 0.32375697059939856, "grad_norm": 0.5595353705151526, "learning_rate": 3.7570154095701546e-05, "loss": 0.6512, "step": 11089 }, { "epoch": 0.3237861668272459, "grad_norm": 0.5304869755968963, "learning_rate": 3.756853203568532e-05, "loss": 0.623, "step": 11090 }, { "epoch": 0.3238153630550933, "grad_norm": 0.543936478466762, "learning_rate": 3.75669099756691e-05, "loss": 0.6232, "step": 11091 }, { "epoch": 0.32384455928294065, "grad_norm": 0.5285367384217265, "learning_rate": 3.756528791565288e-05, "loss": 0.6005, "step": 11092 }, { "epoch": 0.323873755510788, "grad_norm": 0.5260399902127343, "learning_rate": 3.756366585563666e-05, "loss": 0.5962, "step": 11093 }, { "epoch": 0.32390295173863537, "grad_norm": 0.46101463337051163, "learning_rate": 3.756204379562044e-05, "loss": 0.4913, "step": 11094 }, { "epoch": 0.32393214796648273, "grad_norm": 0.516802779318003, "learning_rate": 3.7560421735604216e-05, "loss": 0.6055, "step": 11095 }, { "epoch": 0.3239613441943301, "grad_norm": 0.555702694130182, "learning_rate": 3.7558799675588e-05, "loss": 0.6619, "step": 11096 }, { "epoch": 0.32399054042217745, "grad_norm": 0.5717844801847249, "learning_rate": 3.755717761557177e-05, "loss": 0.7017, "step": 11097 }, { "epoch": 0.3240197366500248, "grad_norm": 0.5572714488186101, "learning_rate": 3.7555555555555554e-05, "loss": 0.6792, "step": 11098 }, { "epoch": 0.3240489328778722, "grad_norm": 0.5221657392163056, "learning_rate": 3.7553933495539336e-05, "loss": 0.6459, "step": 11099 }, { "epoch": 0.32407812910571954, "grad_norm": 0.549627885427576, "learning_rate": 3.755231143552312e-05, "loss": 0.6015, "step": 11100 }, { "epoch": 0.3241073253335669, "grad_norm": 0.5664189383091859, "learning_rate": 3.75506893755069e-05, "loss": 0.6592, "step": 11101 }, { "epoch": 0.32413652156141426, "grad_norm": 0.4888250254112904, "learning_rate": 3.7549067315490675e-05, "loss": 0.5371, "step": 11102 }, { "epoch": 0.3241657177892616, "grad_norm": 0.6133179819519771, "learning_rate": 3.7547445255474456e-05, "loss": 0.7308, "step": 11103 }, { "epoch": 0.324194914017109, "grad_norm": 0.6013706785586755, "learning_rate": 3.754582319545824e-05, "loss": 0.7672, "step": 11104 }, { "epoch": 0.32422411024495634, "grad_norm": 0.5073829300978573, "learning_rate": 3.754420113544201e-05, "loss": 0.5851, "step": 11105 }, { "epoch": 0.3242533064728037, "grad_norm": 0.5533995836918522, "learning_rate": 3.7542579075425795e-05, "loss": 0.6677, "step": 11106 }, { "epoch": 0.32428250270065107, "grad_norm": 0.4911848358818367, "learning_rate": 3.754095701540957e-05, "loss": 0.5129, "step": 11107 }, { "epoch": 0.3243116989284984, "grad_norm": 0.5486166894642935, "learning_rate": 3.753933495539335e-05, "loss": 0.6157, "step": 11108 }, { "epoch": 0.3243408951563458, "grad_norm": 0.5877293668077525, "learning_rate": 3.7537712895377133e-05, "loss": 0.6812, "step": 11109 }, { "epoch": 0.32437009138419315, "grad_norm": 0.5299941364036961, "learning_rate": 3.753609083536091e-05, "loss": 0.6221, "step": 11110 }, { "epoch": 0.3243992876120405, "grad_norm": 0.49579043348114765, "learning_rate": 3.753446877534469e-05, "loss": 0.5359, "step": 11111 }, { "epoch": 0.32442848383988787, "grad_norm": 0.6197613441283041, "learning_rate": 3.7532846715328465e-05, "loss": 0.7091, "step": 11112 }, { "epoch": 0.32445768006773523, "grad_norm": 0.5955280864866457, "learning_rate": 3.753122465531225e-05, "loss": 0.6892, "step": 11113 }, { "epoch": 0.3244868762955826, "grad_norm": 0.5441885562321539, "learning_rate": 3.752960259529603e-05, "loss": 0.6516, "step": 11114 }, { "epoch": 0.32451607252342995, "grad_norm": 0.533713230523191, "learning_rate": 3.7527980535279804e-05, "loss": 0.5911, "step": 11115 }, { "epoch": 0.3245452687512773, "grad_norm": 0.5735604455581734, "learning_rate": 3.7526358475263585e-05, "loss": 0.6769, "step": 11116 }, { "epoch": 0.3245744649791247, "grad_norm": 0.5330194413909006, "learning_rate": 3.752473641524736e-05, "loss": 0.6228, "step": 11117 }, { "epoch": 0.32460366120697204, "grad_norm": 0.5319319423998539, "learning_rate": 3.752311435523114e-05, "loss": 0.5861, "step": 11118 }, { "epoch": 0.3246328574348194, "grad_norm": 0.5276964964670176, "learning_rate": 3.7521492295214924e-05, "loss": 0.6062, "step": 11119 }, { "epoch": 0.32466205366266676, "grad_norm": 0.5503180160006669, "learning_rate": 3.7519870235198706e-05, "loss": 0.5964, "step": 11120 }, { "epoch": 0.3246912498905141, "grad_norm": 0.5195603393974346, "learning_rate": 3.751824817518249e-05, "loss": 0.6, "step": 11121 }, { "epoch": 0.3247204461183615, "grad_norm": 0.5491057222411587, "learning_rate": 3.751662611516626e-05, "loss": 0.6934, "step": 11122 }, { "epoch": 0.32474964234620884, "grad_norm": 0.5643942023435377, "learning_rate": 3.7515004055150044e-05, "loss": 0.7199, "step": 11123 }, { "epoch": 0.3247788385740562, "grad_norm": 0.5694886726050166, "learning_rate": 3.7513381995133826e-05, "loss": 0.6699, "step": 11124 }, { "epoch": 0.32480803480190357, "grad_norm": 0.5879617875209707, "learning_rate": 3.75117599351176e-05, "loss": 0.6565, "step": 11125 }, { "epoch": 0.324837231029751, "grad_norm": 0.5067258417185466, "learning_rate": 3.751013787510138e-05, "loss": 0.5725, "step": 11126 }, { "epoch": 0.32486642725759834, "grad_norm": 0.5519438114294601, "learning_rate": 3.750851581508516e-05, "loss": 0.6335, "step": 11127 }, { "epoch": 0.3248956234854457, "grad_norm": 0.5437635351830324, "learning_rate": 3.750689375506894e-05, "loss": 0.6545, "step": 11128 }, { "epoch": 0.32492481971329307, "grad_norm": 0.5653827798943031, "learning_rate": 3.750527169505272e-05, "loss": 0.6952, "step": 11129 }, { "epoch": 0.32495401594114043, "grad_norm": 0.522659288961664, "learning_rate": 3.7503649635036496e-05, "loss": 0.6042, "step": 11130 }, { "epoch": 0.3249832121689878, "grad_norm": 0.5884203141223067, "learning_rate": 3.750202757502028e-05, "loss": 0.6953, "step": 11131 }, { "epoch": 0.32501240839683515, "grad_norm": 0.5741968802198709, "learning_rate": 3.750040551500405e-05, "loss": 0.6912, "step": 11132 }, { "epoch": 0.3250416046246825, "grad_norm": 0.5153788197059775, "learning_rate": 3.7498783454987835e-05, "loss": 0.5753, "step": 11133 }, { "epoch": 0.3250708008525299, "grad_norm": 0.5158474051779343, "learning_rate": 3.7497161394971616e-05, "loss": 0.5868, "step": 11134 }, { "epoch": 0.32509999708037723, "grad_norm": 0.5836078906242893, "learning_rate": 3.749553933495539e-05, "loss": 0.7124, "step": 11135 }, { "epoch": 0.3251291933082246, "grad_norm": 0.5065075143138799, "learning_rate": 3.749391727493917e-05, "loss": 0.5705, "step": 11136 }, { "epoch": 0.32515838953607196, "grad_norm": 0.5812869309572474, "learning_rate": 3.7492295214922955e-05, "loss": 0.6713, "step": 11137 }, { "epoch": 0.3251875857639193, "grad_norm": 0.5607861110811965, "learning_rate": 3.749067315490674e-05, "loss": 0.598, "step": 11138 }, { "epoch": 0.3252167819917667, "grad_norm": 0.5204411449531637, "learning_rate": 3.748905109489051e-05, "loss": 0.5837, "step": 11139 }, { "epoch": 0.32524597821961404, "grad_norm": 0.5300803070749462, "learning_rate": 3.7487429034874294e-05, "loss": 0.6204, "step": 11140 }, { "epoch": 0.3252751744474614, "grad_norm": 0.552807391211775, "learning_rate": 3.7485806974858075e-05, "loss": 0.6519, "step": 11141 }, { "epoch": 0.32530437067530876, "grad_norm": 0.5352542546687714, "learning_rate": 3.748418491484185e-05, "loss": 0.6225, "step": 11142 }, { "epoch": 0.3253335669031561, "grad_norm": 0.49573254620279306, "learning_rate": 3.748256285482563e-05, "loss": 0.5468, "step": 11143 }, { "epoch": 0.3253627631310035, "grad_norm": 0.5454695319408, "learning_rate": 3.7480940794809414e-05, "loss": 0.5998, "step": 11144 }, { "epoch": 0.32539195935885085, "grad_norm": 0.5039053159801548, "learning_rate": 3.747931873479319e-05, "loss": 0.5793, "step": 11145 }, { "epoch": 0.3254211555866982, "grad_norm": 0.5495355626326698, "learning_rate": 3.747769667477697e-05, "loss": 0.6218, "step": 11146 }, { "epoch": 0.32545035181454557, "grad_norm": 0.5455522994074411, "learning_rate": 3.7476074614760746e-05, "loss": 0.654, "step": 11147 }, { "epoch": 0.32547954804239293, "grad_norm": 0.5451230660913193, "learning_rate": 3.747445255474453e-05, "loss": 0.6492, "step": 11148 }, { "epoch": 0.3255087442702403, "grad_norm": 0.5641164514872774, "learning_rate": 3.747283049472831e-05, "loss": 0.617, "step": 11149 }, { "epoch": 0.32553794049808765, "grad_norm": 0.5840621808432865, "learning_rate": 3.7471208434712084e-05, "loss": 0.7417, "step": 11150 }, { "epoch": 0.325567136725935, "grad_norm": 0.5499642603895972, "learning_rate": 3.7469586374695866e-05, "loss": 0.6028, "step": 11151 }, { "epoch": 0.3255963329537824, "grad_norm": 0.5227774563146022, "learning_rate": 3.746796431467964e-05, "loss": 0.595, "step": 11152 }, { "epoch": 0.32562552918162974, "grad_norm": 0.5517326849255699, "learning_rate": 3.746634225466342e-05, "loss": 0.6769, "step": 11153 }, { "epoch": 0.3256547254094771, "grad_norm": 0.5243150600404618, "learning_rate": 3.7464720194647204e-05, "loss": 0.5954, "step": 11154 }, { "epoch": 0.32568392163732446, "grad_norm": 0.5332582257719786, "learning_rate": 3.746309813463098e-05, "loss": 0.651, "step": 11155 }, { "epoch": 0.3257131178651718, "grad_norm": 0.5151722171787768, "learning_rate": 3.746147607461477e-05, "loss": 0.611, "step": 11156 }, { "epoch": 0.3257423140930192, "grad_norm": 0.5862122809090485, "learning_rate": 3.745985401459854e-05, "loss": 0.6682, "step": 11157 }, { "epoch": 0.32577151032086654, "grad_norm": 0.5698613089732374, "learning_rate": 3.7458231954582325e-05, "loss": 0.7135, "step": 11158 }, { "epoch": 0.3258007065487139, "grad_norm": 0.5354884870923924, "learning_rate": 3.74566098945661e-05, "loss": 0.6339, "step": 11159 }, { "epoch": 0.32582990277656126, "grad_norm": 0.5644326559257234, "learning_rate": 3.745498783454988e-05, "loss": 0.682, "step": 11160 }, { "epoch": 0.3258590990044086, "grad_norm": 0.5380162660736135, "learning_rate": 3.745336577453366e-05, "loss": 0.6133, "step": 11161 }, { "epoch": 0.325888295232256, "grad_norm": 0.5442251035262706, "learning_rate": 3.745174371451744e-05, "loss": 0.6014, "step": 11162 }, { "epoch": 0.32591749146010335, "grad_norm": 0.5773745569203322, "learning_rate": 3.745012165450122e-05, "loss": 0.6809, "step": 11163 }, { "epoch": 0.3259466876879507, "grad_norm": 0.5196550427434963, "learning_rate": 3.7448499594484995e-05, "loss": 0.5913, "step": 11164 }, { "epoch": 0.32597588391579807, "grad_norm": 0.5446499390243411, "learning_rate": 3.7446877534468777e-05, "loss": 0.5927, "step": 11165 }, { "epoch": 0.32600508014364543, "grad_norm": 0.509979786403043, "learning_rate": 3.744525547445256e-05, "loss": 0.5775, "step": 11166 }, { "epoch": 0.3260342763714928, "grad_norm": 0.5068913428193714, "learning_rate": 3.744363341443633e-05, "loss": 0.6367, "step": 11167 }, { "epoch": 0.32606347259934015, "grad_norm": 0.531917534473932, "learning_rate": 3.7442011354420115e-05, "loss": 0.5843, "step": 11168 }, { "epoch": 0.3260926688271875, "grad_norm": 0.5859245681393639, "learning_rate": 3.74403892944039e-05, "loss": 0.7386, "step": 11169 }, { "epoch": 0.3261218650550349, "grad_norm": 0.5633379650702566, "learning_rate": 3.743876723438767e-05, "loss": 0.654, "step": 11170 }, { "epoch": 0.32615106128288224, "grad_norm": 0.6107305938730277, "learning_rate": 3.7437145174371454e-05, "loss": 0.7469, "step": 11171 }, { "epoch": 0.3261802575107296, "grad_norm": 0.5117812666344048, "learning_rate": 3.743552311435523e-05, "loss": 0.5931, "step": 11172 }, { "epoch": 0.32620945373857696, "grad_norm": 0.5634265145478791, "learning_rate": 3.743390105433901e-05, "loss": 0.5787, "step": 11173 }, { "epoch": 0.3262386499664243, "grad_norm": 0.48390642973810033, "learning_rate": 3.743227899432279e-05, "loss": 0.5543, "step": 11174 }, { "epoch": 0.3262678461942717, "grad_norm": 0.5313873191911062, "learning_rate": 3.7430656934306574e-05, "loss": 0.5896, "step": 11175 }, { "epoch": 0.32629704242211904, "grad_norm": 0.580079616421171, "learning_rate": 3.7429034874290356e-05, "loss": 0.655, "step": 11176 }, { "epoch": 0.3263262386499664, "grad_norm": 0.5222223392402168, "learning_rate": 3.742741281427413e-05, "loss": 0.6294, "step": 11177 }, { "epoch": 0.32635543487781377, "grad_norm": 0.5438363807405775, "learning_rate": 3.742579075425791e-05, "loss": 0.618, "step": 11178 }, { "epoch": 0.3263846311056611, "grad_norm": 0.5529882628744539, "learning_rate": 3.742416869424169e-05, "loss": 0.6196, "step": 11179 }, { "epoch": 0.3264138273335085, "grad_norm": 0.536514775950027, "learning_rate": 3.742254663422547e-05, "loss": 0.6359, "step": 11180 }, { "epoch": 0.32644302356135585, "grad_norm": 0.561373259167329, "learning_rate": 3.742092457420925e-05, "loss": 0.6432, "step": 11181 }, { "epoch": 0.3264722197892032, "grad_norm": 0.5656190977598958, "learning_rate": 3.7419302514193026e-05, "loss": 0.71, "step": 11182 }, { "epoch": 0.32650141601705057, "grad_norm": 0.5628092228670611, "learning_rate": 3.741768045417681e-05, "loss": 0.6661, "step": 11183 }, { "epoch": 0.32653061224489793, "grad_norm": 0.5326856884922079, "learning_rate": 3.741605839416058e-05, "loss": 0.6289, "step": 11184 }, { "epoch": 0.3265598084727453, "grad_norm": 0.5394332567937424, "learning_rate": 3.7414436334144364e-05, "loss": 0.6323, "step": 11185 }, { "epoch": 0.3265890047005927, "grad_norm": 0.5658622428604276, "learning_rate": 3.7412814274128146e-05, "loss": 0.694, "step": 11186 }, { "epoch": 0.32661820092844007, "grad_norm": 0.5126755923268197, "learning_rate": 3.741119221411192e-05, "loss": 0.5784, "step": 11187 }, { "epoch": 0.32664739715628743, "grad_norm": 0.5460445352716166, "learning_rate": 3.74095701540957e-05, "loss": 0.6382, "step": 11188 }, { "epoch": 0.3266765933841348, "grad_norm": 0.5631453199712732, "learning_rate": 3.7407948094079485e-05, "loss": 0.6422, "step": 11189 }, { "epoch": 0.32670578961198216, "grad_norm": 0.5480235218400175, "learning_rate": 3.740632603406326e-05, "loss": 0.5974, "step": 11190 }, { "epoch": 0.3267349858398295, "grad_norm": 0.5629843151049391, "learning_rate": 3.740470397404704e-05, "loss": 0.6347, "step": 11191 }, { "epoch": 0.3267641820676769, "grad_norm": 0.5167275770069836, "learning_rate": 3.7403081914030816e-05, "loss": 0.6019, "step": 11192 }, { "epoch": 0.32679337829552424, "grad_norm": 0.5447221124038607, "learning_rate": 3.74014598540146e-05, "loss": 0.649, "step": 11193 }, { "epoch": 0.3268225745233716, "grad_norm": 0.5577522278793858, "learning_rate": 3.739983779399838e-05, "loss": 0.6785, "step": 11194 }, { "epoch": 0.32685177075121896, "grad_norm": 0.5571165221962368, "learning_rate": 3.739821573398216e-05, "loss": 0.6363, "step": 11195 }, { "epoch": 0.3268809669790663, "grad_norm": 0.5357022655350884, "learning_rate": 3.7396593673965943e-05, "loss": 0.6403, "step": 11196 }, { "epoch": 0.3269101632069137, "grad_norm": 0.5354470330517597, "learning_rate": 3.739497161394972e-05, "loss": 0.6127, "step": 11197 }, { "epoch": 0.32693935943476105, "grad_norm": 0.5300861298622754, "learning_rate": 3.73933495539335e-05, "loss": 0.5861, "step": 11198 }, { "epoch": 0.3269685556626084, "grad_norm": 0.5077019842965823, "learning_rate": 3.7391727493917275e-05, "loss": 0.5779, "step": 11199 }, { "epoch": 0.32699775189045577, "grad_norm": 0.7045285207851951, "learning_rate": 3.739010543390106e-05, "loss": 0.5943, "step": 11200 }, { "epoch": 0.32702694811830313, "grad_norm": 0.5683315885451512, "learning_rate": 3.738848337388484e-05, "loss": 0.6164, "step": 11201 }, { "epoch": 0.3270561443461505, "grad_norm": 0.5127490119070384, "learning_rate": 3.7386861313868614e-05, "loss": 0.6248, "step": 11202 }, { "epoch": 0.32708534057399785, "grad_norm": 0.564310597611858, "learning_rate": 3.7385239253852395e-05, "loss": 0.6727, "step": 11203 }, { "epoch": 0.3271145368018452, "grad_norm": 0.5743010683128645, "learning_rate": 3.738361719383617e-05, "loss": 0.6026, "step": 11204 }, { "epoch": 0.3271437330296926, "grad_norm": 0.5548857258239135, "learning_rate": 3.738199513381995e-05, "loss": 0.6731, "step": 11205 }, { "epoch": 0.32717292925753994, "grad_norm": 0.5570221573139379, "learning_rate": 3.7380373073803734e-05, "loss": 0.6957, "step": 11206 }, { "epoch": 0.3272021254853873, "grad_norm": 0.5096602213458589, "learning_rate": 3.737875101378751e-05, "loss": 0.6026, "step": 11207 }, { "epoch": 0.32723132171323466, "grad_norm": 0.5287150250637165, "learning_rate": 3.737712895377129e-05, "loss": 0.5531, "step": 11208 }, { "epoch": 0.327260517941082, "grad_norm": 0.5592227573453774, "learning_rate": 3.7375506893755066e-05, "loss": 0.6481, "step": 11209 }, { "epoch": 0.3272897141689294, "grad_norm": 0.533096654965334, "learning_rate": 3.737388483373885e-05, "loss": 0.6245, "step": 11210 }, { "epoch": 0.32731891039677674, "grad_norm": 0.5647382014886673, "learning_rate": 3.737226277372263e-05, "loss": 0.6862, "step": 11211 }, { "epoch": 0.3273481066246241, "grad_norm": 0.529382076894956, "learning_rate": 3.7370640713706404e-05, "loss": 0.6132, "step": 11212 }, { "epoch": 0.32737730285247146, "grad_norm": 0.5802148926401485, "learning_rate": 3.736901865369019e-05, "loss": 0.6997, "step": 11213 }, { "epoch": 0.3274064990803188, "grad_norm": 0.5449534253239302, "learning_rate": 3.736739659367397e-05, "loss": 0.6051, "step": 11214 }, { "epoch": 0.3274356953081662, "grad_norm": 0.512313275747488, "learning_rate": 3.736577453365775e-05, "loss": 0.5874, "step": 11215 }, { "epoch": 0.32746489153601355, "grad_norm": 0.6000439852070474, "learning_rate": 3.736415247364153e-05, "loss": 0.6772, "step": 11216 }, { "epoch": 0.3274940877638609, "grad_norm": 0.5425141156688196, "learning_rate": 3.7362530413625306e-05, "loss": 0.6731, "step": 11217 }, { "epoch": 0.32752328399170827, "grad_norm": 0.502688918131295, "learning_rate": 3.736090835360909e-05, "loss": 0.5895, "step": 11218 }, { "epoch": 0.32755248021955563, "grad_norm": 0.5467881564871431, "learning_rate": 3.735928629359286e-05, "loss": 0.6721, "step": 11219 }, { "epoch": 0.327581676447403, "grad_norm": 0.5508441250368163, "learning_rate": 3.7357664233576645e-05, "loss": 0.6412, "step": 11220 }, { "epoch": 0.32761087267525035, "grad_norm": 0.5433151441859896, "learning_rate": 3.7356042173560427e-05, "loss": 0.6603, "step": 11221 }, { "epoch": 0.3276400689030977, "grad_norm": 0.5940206637670743, "learning_rate": 3.73544201135442e-05, "loss": 0.7381, "step": 11222 }, { "epoch": 0.3276692651309451, "grad_norm": 0.5346498344156633, "learning_rate": 3.735279805352798e-05, "loss": 0.6452, "step": 11223 }, { "epoch": 0.32769846135879244, "grad_norm": 0.5783150735425158, "learning_rate": 3.735117599351176e-05, "loss": 0.7296, "step": 11224 }, { "epoch": 0.3277276575866398, "grad_norm": 0.5131362427816882, "learning_rate": 3.734955393349554e-05, "loss": 0.5644, "step": 11225 }, { "epoch": 0.32775685381448716, "grad_norm": 0.5647212658332488, "learning_rate": 3.734793187347932e-05, "loss": 0.6624, "step": 11226 }, { "epoch": 0.3277860500423345, "grad_norm": 0.5235366696196119, "learning_rate": 3.73463098134631e-05, "loss": 0.6488, "step": 11227 }, { "epoch": 0.3278152462701819, "grad_norm": 0.5710743559554078, "learning_rate": 3.734468775344688e-05, "loss": 0.6701, "step": 11228 }, { "epoch": 0.32784444249802924, "grad_norm": 0.5538756382541695, "learning_rate": 3.7343065693430653e-05, "loss": 0.6601, "step": 11229 }, { "epoch": 0.3278736387258766, "grad_norm": 0.49061186507242693, "learning_rate": 3.7341443633414435e-05, "loss": 0.528, "step": 11230 }, { "epoch": 0.32790283495372397, "grad_norm": 14.071310516123527, "learning_rate": 3.733982157339822e-05, "loss": 1.156, "step": 11231 }, { "epoch": 0.3279320311815713, "grad_norm": 0.5412862495713507, "learning_rate": 3.7338199513382e-05, "loss": 0.5949, "step": 11232 }, { "epoch": 0.3279612274094187, "grad_norm": 0.5587269860690226, "learning_rate": 3.733657745336578e-05, "loss": 0.6563, "step": 11233 }, { "epoch": 0.32799042363726605, "grad_norm": 0.5420598732329681, "learning_rate": 3.7334955393349556e-05, "loss": 0.5601, "step": 11234 }, { "epoch": 0.3280196198651134, "grad_norm": 0.5575489476239425, "learning_rate": 3.733333333333334e-05, "loss": 0.6218, "step": 11235 }, { "epoch": 0.32804881609296077, "grad_norm": 0.5617350232245197, "learning_rate": 3.733171127331712e-05, "loss": 0.6463, "step": 11236 }, { "epoch": 0.32807801232080813, "grad_norm": 0.5354565896483962, "learning_rate": 3.7330089213300894e-05, "loss": 0.6148, "step": 11237 }, { "epoch": 0.3281072085486555, "grad_norm": 0.5285213465290576, "learning_rate": 3.7328467153284676e-05, "loss": 0.6357, "step": 11238 }, { "epoch": 0.32813640477650285, "grad_norm": 0.5500502498120764, "learning_rate": 3.732684509326845e-05, "loss": 0.6395, "step": 11239 }, { "epoch": 0.3281656010043502, "grad_norm": 0.6327721627229668, "learning_rate": 3.732522303325223e-05, "loss": 0.7978, "step": 11240 }, { "epoch": 0.3281947972321976, "grad_norm": 0.5608443918524975, "learning_rate": 3.7323600973236014e-05, "loss": 0.6487, "step": 11241 }, { "epoch": 0.32822399346004494, "grad_norm": 0.5192947340193139, "learning_rate": 3.732197891321979e-05, "loss": 0.5637, "step": 11242 }, { "epoch": 0.3282531896878923, "grad_norm": 0.5733602484863293, "learning_rate": 3.732035685320357e-05, "loss": 0.7171, "step": 11243 }, { "epoch": 0.32828238591573966, "grad_norm": 0.541601126046811, "learning_rate": 3.7318734793187346e-05, "loss": 0.6381, "step": 11244 }, { "epoch": 0.328311582143587, "grad_norm": 0.5644478407925908, "learning_rate": 3.731711273317113e-05, "loss": 0.6796, "step": 11245 }, { "epoch": 0.3283407783714344, "grad_norm": 0.5629226829079602, "learning_rate": 3.731549067315491e-05, "loss": 0.682, "step": 11246 }, { "epoch": 0.3283699745992818, "grad_norm": 0.5250821431812817, "learning_rate": 3.7313868613138685e-05, "loss": 0.58, "step": 11247 }, { "epoch": 0.32839917082712916, "grad_norm": 0.5055919799283113, "learning_rate": 3.7312246553122466e-05, "loss": 0.5622, "step": 11248 }, { "epoch": 0.3284283670549765, "grad_norm": 0.4993785403743351, "learning_rate": 3.731062449310624e-05, "loss": 0.5576, "step": 11249 }, { "epoch": 0.3284575632828239, "grad_norm": 0.5180138306380898, "learning_rate": 3.730900243309002e-05, "loss": 0.5726, "step": 11250 }, { "epoch": 0.32848675951067124, "grad_norm": 0.5293399310303746, "learning_rate": 3.7307380373073805e-05, "loss": 0.6339, "step": 11251 }, { "epoch": 0.3285159557385186, "grad_norm": 0.5368889396654837, "learning_rate": 3.7305758313057587e-05, "loss": 0.6627, "step": 11252 }, { "epoch": 0.32854515196636597, "grad_norm": 0.4916831095073852, "learning_rate": 3.730413625304137e-05, "loss": 0.6023, "step": 11253 }, { "epoch": 0.32857434819421333, "grad_norm": 0.5684237628361323, "learning_rate": 3.730251419302514e-05, "loss": 0.6873, "step": 11254 }, { "epoch": 0.3286035444220607, "grad_norm": 0.5671147142000413, "learning_rate": 3.7300892133008925e-05, "loss": 0.6546, "step": 11255 }, { "epoch": 0.32863274064990805, "grad_norm": 0.5655836788282655, "learning_rate": 3.729927007299271e-05, "loss": 0.6596, "step": 11256 }, { "epoch": 0.3286619368777554, "grad_norm": 0.5105527812433791, "learning_rate": 3.729764801297648e-05, "loss": 0.6177, "step": 11257 }, { "epoch": 0.3286911331056028, "grad_norm": 0.5817157360878956, "learning_rate": 3.7296025952960264e-05, "loss": 0.7187, "step": 11258 }, { "epoch": 0.32872032933345013, "grad_norm": 0.5230907994035354, "learning_rate": 3.729440389294404e-05, "loss": 0.6109, "step": 11259 }, { "epoch": 0.3287495255612975, "grad_norm": 0.4896759690233677, "learning_rate": 3.729278183292782e-05, "loss": 0.5301, "step": 11260 }, { "epoch": 0.32877872178914486, "grad_norm": 0.524893118209506, "learning_rate": 3.72911597729116e-05, "loss": 0.6393, "step": 11261 }, { "epoch": 0.3288079180169922, "grad_norm": 0.5516452365219107, "learning_rate": 3.728953771289538e-05, "loss": 0.6192, "step": 11262 }, { "epoch": 0.3288371142448396, "grad_norm": 0.5021390883622152, "learning_rate": 3.728791565287916e-05, "loss": 0.5705, "step": 11263 }, { "epoch": 0.32886631047268694, "grad_norm": 0.5743688707306583, "learning_rate": 3.7286293592862934e-05, "loss": 0.6663, "step": 11264 }, { "epoch": 0.3288955067005343, "grad_norm": 0.5588659088717151, "learning_rate": 3.7284671532846716e-05, "loss": 0.7002, "step": 11265 }, { "epoch": 0.32892470292838166, "grad_norm": 0.5480465620434987, "learning_rate": 3.72830494728305e-05, "loss": 0.5964, "step": 11266 }, { "epoch": 0.328953899156229, "grad_norm": 0.5522822749802565, "learning_rate": 3.728142741281427e-05, "loss": 0.5862, "step": 11267 }, { "epoch": 0.3289830953840764, "grad_norm": 0.5840555132391613, "learning_rate": 3.7279805352798054e-05, "loss": 0.6356, "step": 11268 }, { "epoch": 0.32901229161192375, "grad_norm": 0.5705421975917653, "learning_rate": 3.727818329278183e-05, "loss": 0.7535, "step": 11269 }, { "epoch": 0.3290414878397711, "grad_norm": 0.5577965145380736, "learning_rate": 3.727656123276562e-05, "loss": 0.6438, "step": 11270 }, { "epoch": 0.32907068406761847, "grad_norm": 0.5654814985430008, "learning_rate": 3.727493917274939e-05, "loss": 0.6495, "step": 11271 }, { "epoch": 0.32909988029546583, "grad_norm": 0.6285759041019735, "learning_rate": 3.7273317112733174e-05, "loss": 0.7795, "step": 11272 }, { "epoch": 0.3291290765233132, "grad_norm": 0.5604496120430594, "learning_rate": 3.7271695052716956e-05, "loss": 0.6315, "step": 11273 }, { "epoch": 0.32915827275116055, "grad_norm": 0.5307858918984577, "learning_rate": 3.727007299270073e-05, "loss": 0.6226, "step": 11274 }, { "epoch": 0.3291874689790079, "grad_norm": 0.5141705977665383, "learning_rate": 3.726845093268451e-05, "loss": 0.629, "step": 11275 }, { "epoch": 0.3292166652068553, "grad_norm": 0.5687834036111875, "learning_rate": 3.726682887266829e-05, "loss": 0.7278, "step": 11276 }, { "epoch": 0.32924586143470264, "grad_norm": 0.5163903047829342, "learning_rate": 3.726520681265207e-05, "loss": 0.6052, "step": 11277 }, { "epoch": 0.32927505766255, "grad_norm": 0.5456282838515193, "learning_rate": 3.726358475263585e-05, "loss": 0.5616, "step": 11278 }, { "epoch": 0.32930425389039736, "grad_norm": 0.5753837468431955, "learning_rate": 3.7261962692619626e-05, "loss": 0.6882, "step": 11279 }, { "epoch": 0.3293334501182447, "grad_norm": 0.5615301815723106, "learning_rate": 3.726034063260341e-05, "loss": 0.6404, "step": 11280 }, { "epoch": 0.3293626463460921, "grad_norm": 0.5410565050288223, "learning_rate": 3.725871857258719e-05, "loss": 0.5959, "step": 11281 }, { "epoch": 0.32939184257393944, "grad_norm": 0.6365966534791436, "learning_rate": 3.7257096512570965e-05, "loss": 0.7191, "step": 11282 }, { "epoch": 0.3294210388017868, "grad_norm": 0.505312733748046, "learning_rate": 3.725547445255475e-05, "loss": 0.5923, "step": 11283 }, { "epoch": 0.32945023502963416, "grad_norm": 0.6472396870933247, "learning_rate": 3.725385239253852e-05, "loss": 0.7657, "step": 11284 }, { "epoch": 0.3294794312574815, "grad_norm": 0.5467448780461123, "learning_rate": 3.7252230332522303e-05, "loss": 0.6217, "step": 11285 }, { "epoch": 0.3295086274853289, "grad_norm": 0.5397500674825999, "learning_rate": 3.7250608272506085e-05, "loss": 0.6435, "step": 11286 }, { "epoch": 0.32953782371317625, "grad_norm": 0.5405722083566129, "learning_rate": 3.724898621248986e-05, "loss": 0.6024, "step": 11287 }, { "epoch": 0.3295670199410236, "grad_norm": 0.5342415763785769, "learning_rate": 3.724736415247365e-05, "loss": 0.6328, "step": 11288 }, { "epoch": 0.32959621616887097, "grad_norm": 0.5665932106197048, "learning_rate": 3.7245742092457424e-05, "loss": 0.6573, "step": 11289 }, { "epoch": 0.32962541239671833, "grad_norm": 0.5639629456450161, "learning_rate": 3.7244120032441205e-05, "loss": 0.6702, "step": 11290 }, { "epoch": 0.3296546086245657, "grad_norm": 0.5254744987569085, "learning_rate": 3.724249797242498e-05, "loss": 0.5953, "step": 11291 }, { "epoch": 0.32968380485241305, "grad_norm": 0.5515620678929068, "learning_rate": 3.724087591240876e-05, "loss": 0.6526, "step": 11292 }, { "epoch": 0.3297130010802604, "grad_norm": 0.5806588472524469, "learning_rate": 3.7239253852392544e-05, "loss": 0.7456, "step": 11293 }, { "epoch": 0.3297421973081078, "grad_norm": 0.5726478812291547, "learning_rate": 3.723763179237632e-05, "loss": 0.6751, "step": 11294 }, { "epoch": 0.32977139353595514, "grad_norm": 0.5543440088050844, "learning_rate": 3.72360097323601e-05, "loss": 0.6305, "step": 11295 }, { "epoch": 0.3298005897638025, "grad_norm": 0.5451002743791629, "learning_rate": 3.7234387672343876e-05, "loss": 0.6867, "step": 11296 }, { "epoch": 0.32982978599164986, "grad_norm": 0.5292562306346911, "learning_rate": 3.723276561232766e-05, "loss": 0.6253, "step": 11297 }, { "epoch": 0.3298589822194972, "grad_norm": 0.573045636391955, "learning_rate": 3.723114355231144e-05, "loss": 0.6753, "step": 11298 }, { "epoch": 0.3298881784473446, "grad_norm": 0.5846171400797553, "learning_rate": 3.7229521492295214e-05, "loss": 0.71, "step": 11299 }, { "epoch": 0.32991737467519194, "grad_norm": 0.5252483959880264, "learning_rate": 3.7227899432278996e-05, "loss": 0.6061, "step": 11300 }, { "epoch": 0.3299465709030393, "grad_norm": 0.5559660991916984, "learning_rate": 3.722627737226278e-05, "loss": 0.642, "step": 11301 }, { "epoch": 0.32997576713088667, "grad_norm": 0.5785187721978412, "learning_rate": 3.722465531224655e-05, "loss": 0.7145, "step": 11302 }, { "epoch": 0.330004963358734, "grad_norm": 0.5380686469193235, "learning_rate": 3.7223033252230334e-05, "loss": 0.586, "step": 11303 }, { "epoch": 0.3300341595865814, "grad_norm": 0.5093180457850901, "learning_rate": 3.722141119221411e-05, "loss": 0.5633, "step": 11304 }, { "epoch": 0.33006335581442875, "grad_norm": 0.5486915063395422, "learning_rate": 3.721978913219789e-05, "loss": 0.6234, "step": 11305 }, { "epoch": 0.3300925520422761, "grad_norm": 0.5056143022167354, "learning_rate": 3.721816707218167e-05, "loss": 0.5721, "step": 11306 }, { "epoch": 0.3301217482701235, "grad_norm": 0.5765675189446424, "learning_rate": 3.7216545012165455e-05, "loss": 0.7452, "step": 11307 }, { "epoch": 0.3301509444979709, "grad_norm": 0.5495681916078188, "learning_rate": 3.7214922952149237e-05, "loss": 0.6703, "step": 11308 }, { "epoch": 0.33018014072581825, "grad_norm": 0.5412818235664513, "learning_rate": 3.721330089213301e-05, "loss": 0.6503, "step": 11309 }, { "epoch": 0.3302093369536656, "grad_norm": 0.5645289137986589, "learning_rate": 3.721167883211679e-05, "loss": 0.7134, "step": 11310 }, { "epoch": 0.33023853318151297, "grad_norm": 0.6303624815207097, "learning_rate": 3.721005677210057e-05, "loss": 0.7568, "step": 11311 }, { "epoch": 0.33026772940936033, "grad_norm": 0.5196400540740947, "learning_rate": 3.720843471208435e-05, "loss": 0.5986, "step": 11312 }, { "epoch": 0.3302969256372077, "grad_norm": 0.5394826758037956, "learning_rate": 3.720681265206813e-05, "loss": 0.6258, "step": 11313 }, { "epoch": 0.33032612186505506, "grad_norm": 0.5349636943875901, "learning_rate": 3.720519059205191e-05, "loss": 0.6588, "step": 11314 }, { "epoch": 0.3303553180929024, "grad_norm": 0.5683595208999027, "learning_rate": 3.720356853203569e-05, "loss": 0.6821, "step": 11315 }, { "epoch": 0.3303845143207498, "grad_norm": 0.5644198587021891, "learning_rate": 3.7201946472019464e-05, "loss": 0.6113, "step": 11316 }, { "epoch": 0.33041371054859714, "grad_norm": 0.5400162782938909, "learning_rate": 3.7200324412003245e-05, "loss": 0.591, "step": 11317 }, { "epoch": 0.3304429067764445, "grad_norm": 0.5289728317578832, "learning_rate": 3.719870235198703e-05, "loss": 0.5725, "step": 11318 }, { "epoch": 0.33047210300429186, "grad_norm": 0.5790683114659188, "learning_rate": 3.71970802919708e-05, "loss": 0.6981, "step": 11319 }, { "epoch": 0.3305012992321392, "grad_norm": 0.5652584278471271, "learning_rate": 3.7195458231954584e-05, "loss": 0.6021, "step": 11320 }, { "epoch": 0.3305304954599866, "grad_norm": 0.5617672059696207, "learning_rate": 3.719383617193836e-05, "loss": 0.6817, "step": 11321 }, { "epoch": 0.33055969168783395, "grad_norm": 0.5298762662474746, "learning_rate": 3.719221411192214e-05, "loss": 0.6278, "step": 11322 }, { "epoch": 0.3305888879156813, "grad_norm": 0.5555565523063459, "learning_rate": 3.719059205190592e-05, "loss": 0.6925, "step": 11323 }, { "epoch": 0.33061808414352867, "grad_norm": 0.5384198912445587, "learning_rate": 3.71889699918897e-05, "loss": 0.5776, "step": 11324 }, { "epoch": 0.33064728037137603, "grad_norm": 0.5235510488099817, "learning_rate": 3.718734793187348e-05, "loss": 0.612, "step": 11325 }, { "epoch": 0.3306764765992234, "grad_norm": 0.552952018657366, "learning_rate": 3.718572587185726e-05, "loss": 0.6772, "step": 11326 }, { "epoch": 0.33070567282707075, "grad_norm": 0.5248214567328238, "learning_rate": 3.718410381184104e-05, "loss": 0.6056, "step": 11327 }, { "epoch": 0.3307348690549181, "grad_norm": 0.5134075070996815, "learning_rate": 3.7182481751824824e-05, "loss": 0.5724, "step": 11328 }, { "epoch": 0.3307640652827655, "grad_norm": 0.5310034564111431, "learning_rate": 3.71808596918086e-05, "loss": 0.6184, "step": 11329 }, { "epoch": 0.33079326151061283, "grad_norm": 0.5630525300089197, "learning_rate": 3.717923763179238e-05, "loss": 0.6556, "step": 11330 }, { "epoch": 0.3308224577384602, "grad_norm": 0.5641397407967691, "learning_rate": 3.7177615571776156e-05, "loss": 0.6596, "step": 11331 }, { "epoch": 0.33085165396630756, "grad_norm": 0.5338324037759486, "learning_rate": 3.717599351175994e-05, "loss": 0.6026, "step": 11332 }, { "epoch": 0.3308808501941549, "grad_norm": 0.5904207509932995, "learning_rate": 3.717437145174372e-05, "loss": 0.6588, "step": 11333 }, { "epoch": 0.3309100464220023, "grad_norm": 0.6219927367471577, "learning_rate": 3.7172749391727495e-05, "loss": 0.6931, "step": 11334 }, { "epoch": 0.33093924264984964, "grad_norm": 0.540871300713818, "learning_rate": 3.7171127331711276e-05, "loss": 0.6153, "step": 11335 }, { "epoch": 0.330968438877697, "grad_norm": 0.5697991975837212, "learning_rate": 3.716950527169505e-05, "loss": 0.677, "step": 11336 }, { "epoch": 0.33099763510554436, "grad_norm": 0.5452556687552402, "learning_rate": 3.716788321167883e-05, "loss": 0.6624, "step": 11337 }, { "epoch": 0.3310268313333917, "grad_norm": 0.5358066217229913, "learning_rate": 3.7166261151662615e-05, "loss": 0.6671, "step": 11338 }, { "epoch": 0.3310560275612391, "grad_norm": 0.5413908956523745, "learning_rate": 3.716463909164639e-05, "loss": 0.6456, "step": 11339 }, { "epoch": 0.33108522378908645, "grad_norm": 0.5129679690833918, "learning_rate": 3.716301703163017e-05, "loss": 0.5343, "step": 11340 }, { "epoch": 0.3311144200169338, "grad_norm": 0.5784246842623629, "learning_rate": 3.7161394971613947e-05, "loss": 0.6558, "step": 11341 }, { "epoch": 0.33114361624478117, "grad_norm": 0.5726641506844524, "learning_rate": 3.715977291159773e-05, "loss": 0.6484, "step": 11342 }, { "epoch": 0.33117281247262853, "grad_norm": 0.5187820288077076, "learning_rate": 3.715815085158151e-05, "loss": 0.6255, "step": 11343 }, { "epoch": 0.3312020087004759, "grad_norm": 0.4900824326099491, "learning_rate": 3.7156528791565285e-05, "loss": 0.6128, "step": 11344 }, { "epoch": 0.33123120492832325, "grad_norm": 0.5359844827587409, "learning_rate": 3.7154906731549074e-05, "loss": 0.6597, "step": 11345 }, { "epoch": 0.3312604011561706, "grad_norm": 0.6598063839124334, "learning_rate": 3.715328467153285e-05, "loss": 0.6609, "step": 11346 }, { "epoch": 0.331289597384018, "grad_norm": 0.5352622071217975, "learning_rate": 3.715166261151663e-05, "loss": 0.6397, "step": 11347 }, { "epoch": 0.33131879361186534, "grad_norm": 0.5177620833894423, "learning_rate": 3.715004055150041e-05, "loss": 0.6266, "step": 11348 }, { "epoch": 0.3313479898397127, "grad_norm": 0.5553820485932723, "learning_rate": 3.714841849148419e-05, "loss": 0.6371, "step": 11349 }, { "epoch": 0.33137718606756006, "grad_norm": 0.556526312937387, "learning_rate": 3.714679643146797e-05, "loss": 0.6376, "step": 11350 }, { "epoch": 0.3314063822954074, "grad_norm": 0.5803501914370016, "learning_rate": 3.7145174371451744e-05, "loss": 0.6976, "step": 11351 }, { "epoch": 0.3314355785232548, "grad_norm": 0.5200176792602982, "learning_rate": 3.7143552311435526e-05, "loss": 0.6295, "step": 11352 }, { "epoch": 0.33146477475110214, "grad_norm": 0.5546597576198127, "learning_rate": 3.714193025141931e-05, "loss": 0.6752, "step": 11353 }, { "epoch": 0.3314939709789495, "grad_norm": 0.526079098339217, "learning_rate": 3.714030819140308e-05, "loss": 0.5647, "step": 11354 }, { "epoch": 0.33152316720679686, "grad_norm": 0.49969082846641927, "learning_rate": 3.7138686131386864e-05, "loss": 0.5662, "step": 11355 }, { "epoch": 0.3315523634346442, "grad_norm": 0.5531990654094667, "learning_rate": 3.713706407137064e-05, "loss": 0.6988, "step": 11356 }, { "epoch": 0.3315815596624916, "grad_norm": 0.575027636093669, "learning_rate": 3.713544201135442e-05, "loss": 0.7033, "step": 11357 }, { "epoch": 0.33161075589033895, "grad_norm": 0.5975199025968586, "learning_rate": 3.71338199513382e-05, "loss": 0.7232, "step": 11358 }, { "epoch": 0.3316399521181863, "grad_norm": 0.578538930611589, "learning_rate": 3.713219789132198e-05, "loss": 0.7411, "step": 11359 }, { "epoch": 0.33166914834603367, "grad_norm": 0.5476913264186675, "learning_rate": 3.713057583130576e-05, "loss": 0.6043, "step": 11360 }, { "epoch": 0.33169834457388103, "grad_norm": 0.5456708697196304, "learning_rate": 3.7128953771289534e-05, "loss": 0.6488, "step": 11361 }, { "epoch": 0.3317275408017284, "grad_norm": 0.5492322723687557, "learning_rate": 3.7127331711273316e-05, "loss": 0.6673, "step": 11362 }, { "epoch": 0.33175673702957575, "grad_norm": 0.5345087324945945, "learning_rate": 3.71257096512571e-05, "loss": 0.5785, "step": 11363 }, { "epoch": 0.3317859332574231, "grad_norm": 0.5620328570189468, "learning_rate": 3.712408759124088e-05, "loss": 0.6403, "step": 11364 }, { "epoch": 0.3318151294852705, "grad_norm": 0.5420071546136375, "learning_rate": 3.712246553122466e-05, "loss": 0.673, "step": 11365 }, { "epoch": 0.33184432571311784, "grad_norm": 0.5616167816157734, "learning_rate": 3.7120843471208436e-05, "loss": 0.6846, "step": 11366 }, { "epoch": 0.33187352194096525, "grad_norm": 0.5569308702556974, "learning_rate": 3.711922141119222e-05, "loss": 0.6899, "step": 11367 }, { "epoch": 0.3319027181688126, "grad_norm": 0.5813788841682809, "learning_rate": 3.7117599351176e-05, "loss": 0.7374, "step": 11368 }, { "epoch": 0.33193191439666, "grad_norm": 0.5615409299568195, "learning_rate": 3.7115977291159775e-05, "loss": 0.7215, "step": 11369 }, { "epoch": 0.33196111062450734, "grad_norm": 0.516269518850834, "learning_rate": 3.711435523114356e-05, "loss": 0.5747, "step": 11370 }, { "epoch": 0.3319903068523547, "grad_norm": 0.6348928962536587, "learning_rate": 3.711273317112733e-05, "loss": 0.6697, "step": 11371 }, { "epoch": 0.33201950308020206, "grad_norm": 0.5638507898093073, "learning_rate": 3.7111111111111113e-05, "loss": 0.6275, "step": 11372 }, { "epoch": 0.3320486993080494, "grad_norm": 0.5326205917717731, "learning_rate": 3.7109489051094895e-05, "loss": 0.5825, "step": 11373 }, { "epoch": 0.3320778955358968, "grad_norm": 0.5881782811299153, "learning_rate": 3.710786699107867e-05, "loss": 0.6668, "step": 11374 }, { "epoch": 0.33210709176374414, "grad_norm": 0.6130995957527031, "learning_rate": 3.710624493106245e-05, "loss": 0.7127, "step": 11375 }, { "epoch": 0.3321362879915915, "grad_norm": 0.5697002499563505, "learning_rate": 3.710462287104623e-05, "loss": 0.6819, "step": 11376 }, { "epoch": 0.33216548421943887, "grad_norm": 0.5257835951584379, "learning_rate": 3.710300081103001e-05, "loss": 0.5885, "step": 11377 }, { "epoch": 0.33219468044728623, "grad_norm": 0.5947005828715908, "learning_rate": 3.710137875101379e-05, "loss": 0.738, "step": 11378 }, { "epoch": 0.3322238766751336, "grad_norm": 0.5134582319210569, "learning_rate": 3.7099756690997565e-05, "loss": 0.5927, "step": 11379 }, { "epoch": 0.33225307290298095, "grad_norm": 0.5361351053196788, "learning_rate": 3.709813463098135e-05, "loss": 0.5931, "step": 11380 }, { "epoch": 0.3322822691308283, "grad_norm": 0.5233729422967751, "learning_rate": 3.709651257096512e-05, "loss": 0.6215, "step": 11381 }, { "epoch": 0.3323114653586757, "grad_norm": 0.5779078317134984, "learning_rate": 3.7094890510948904e-05, "loss": 0.6514, "step": 11382 }, { "epoch": 0.33234066158652303, "grad_norm": 0.6057387419780765, "learning_rate": 3.7093268450932686e-05, "loss": 0.713, "step": 11383 }, { "epoch": 0.3323698578143704, "grad_norm": 0.6357072538804092, "learning_rate": 3.709164639091647e-05, "loss": 0.7414, "step": 11384 }, { "epoch": 0.33239905404221776, "grad_norm": 0.5216684328489758, "learning_rate": 3.709002433090025e-05, "loss": 0.5882, "step": 11385 }, { "epoch": 0.3324282502700651, "grad_norm": 0.5622813028204918, "learning_rate": 3.7088402270884024e-05, "loss": 0.6625, "step": 11386 }, { "epoch": 0.3324574464979125, "grad_norm": 0.5338615623267342, "learning_rate": 3.7086780210867806e-05, "loss": 0.578, "step": 11387 }, { "epoch": 0.33248664272575984, "grad_norm": 0.5556666603331563, "learning_rate": 3.708515815085159e-05, "loss": 0.6665, "step": 11388 }, { "epoch": 0.3325158389536072, "grad_norm": 0.5656452854367198, "learning_rate": 3.708353609083536e-05, "loss": 0.6671, "step": 11389 }, { "epoch": 0.33254503518145456, "grad_norm": 0.5556217720887909, "learning_rate": 3.7081914030819144e-05, "loss": 0.6167, "step": 11390 }, { "epoch": 0.3325742314093019, "grad_norm": 0.5881440240044797, "learning_rate": 3.708029197080292e-05, "loss": 0.6391, "step": 11391 }, { "epoch": 0.3326034276371493, "grad_norm": 0.5168086263193516, "learning_rate": 3.70786699107867e-05, "loss": 0.5594, "step": 11392 }, { "epoch": 0.33263262386499665, "grad_norm": 0.5502690592601147, "learning_rate": 3.707704785077048e-05, "loss": 0.6171, "step": 11393 }, { "epoch": 0.332661820092844, "grad_norm": 0.53131981400063, "learning_rate": 3.707542579075426e-05, "loss": 0.6011, "step": 11394 }, { "epoch": 0.33269101632069137, "grad_norm": 0.5212262593586641, "learning_rate": 3.707380373073804e-05, "loss": 0.5669, "step": 11395 }, { "epoch": 0.33272021254853873, "grad_norm": 0.53667968391937, "learning_rate": 3.7072181670721815e-05, "loss": 0.6375, "step": 11396 }, { "epoch": 0.3327494087763861, "grad_norm": 0.5698618106254797, "learning_rate": 3.7070559610705596e-05, "loss": 0.6517, "step": 11397 }, { "epoch": 0.33277860500423345, "grad_norm": 0.5610782311118393, "learning_rate": 3.706893755068938e-05, "loss": 0.6889, "step": 11398 }, { "epoch": 0.3328078012320808, "grad_norm": 0.5707649992554723, "learning_rate": 3.706731549067315e-05, "loss": 0.6683, "step": 11399 }, { "epoch": 0.3328369974599282, "grad_norm": 0.5347089995899897, "learning_rate": 3.7065693430656935e-05, "loss": 0.6176, "step": 11400 }, { "epoch": 0.33286619368777554, "grad_norm": 0.5763951252504644, "learning_rate": 3.706407137064071e-05, "loss": 0.7442, "step": 11401 }, { "epoch": 0.3328953899156229, "grad_norm": 0.5173502741778349, "learning_rate": 3.70624493106245e-05, "loss": 0.5915, "step": 11402 }, { "epoch": 0.33292458614347026, "grad_norm": 0.5432780357313516, "learning_rate": 3.7060827250608274e-05, "loss": 0.6457, "step": 11403 }, { "epoch": 0.3329537823713176, "grad_norm": 0.5143916128863771, "learning_rate": 3.7059205190592055e-05, "loss": 0.5713, "step": 11404 }, { "epoch": 0.332982978599165, "grad_norm": 0.5261922445437422, "learning_rate": 3.705758313057584e-05, "loss": 0.604, "step": 11405 }, { "epoch": 0.33301217482701234, "grad_norm": 1.3555659896119223, "learning_rate": 3.705596107055961e-05, "loss": 0.6508, "step": 11406 }, { "epoch": 0.3330413710548597, "grad_norm": 0.5292352000947022, "learning_rate": 3.7054339010543394e-05, "loss": 0.6361, "step": 11407 }, { "epoch": 0.33307056728270706, "grad_norm": 0.5761204163608273, "learning_rate": 3.705271695052717e-05, "loss": 0.7333, "step": 11408 }, { "epoch": 0.3330997635105544, "grad_norm": 0.5761917303267091, "learning_rate": 3.705109489051095e-05, "loss": 0.689, "step": 11409 }, { "epoch": 0.3331289597384018, "grad_norm": 0.5860442107829886, "learning_rate": 3.704947283049473e-05, "loss": 0.6729, "step": 11410 }, { "epoch": 0.33315815596624915, "grad_norm": 0.5115968766904699, "learning_rate": 3.704785077047851e-05, "loss": 0.5823, "step": 11411 }, { "epoch": 0.3331873521940965, "grad_norm": 0.5552323641421453, "learning_rate": 3.704622871046229e-05, "loss": 0.6825, "step": 11412 }, { "epoch": 0.33321654842194387, "grad_norm": 0.5467665810882991, "learning_rate": 3.704460665044607e-05, "loss": 0.6811, "step": 11413 }, { "epoch": 0.33324574464979123, "grad_norm": 0.5593338835352657, "learning_rate": 3.7042984590429846e-05, "loss": 0.6326, "step": 11414 }, { "epoch": 0.3332749408776386, "grad_norm": 0.6210798093545702, "learning_rate": 3.704136253041363e-05, "loss": 0.6203, "step": 11415 }, { "epoch": 0.33330413710548595, "grad_norm": 0.5499364273745097, "learning_rate": 3.70397404703974e-05, "loss": 0.6452, "step": 11416 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5363768088262677, "learning_rate": 3.7038118410381184e-05, "loss": 0.6337, "step": 11417 }, { "epoch": 0.3333625295611807, "grad_norm": 0.603752605824575, "learning_rate": 3.7036496350364966e-05, "loss": 0.6979, "step": 11418 }, { "epoch": 0.33339172578902804, "grad_norm": 0.5652415981008437, "learning_rate": 3.703487429034874e-05, "loss": 0.5816, "step": 11419 }, { "epoch": 0.3334209220168754, "grad_norm": 0.559628330195464, "learning_rate": 3.703325223033252e-05, "loss": 0.6676, "step": 11420 }, { "epoch": 0.33345011824472276, "grad_norm": 0.5881959550280372, "learning_rate": 3.7031630170316305e-05, "loss": 0.6693, "step": 11421 }, { "epoch": 0.3334793144725701, "grad_norm": 0.4830342867543091, "learning_rate": 3.7030008110300086e-05, "loss": 0.5425, "step": 11422 }, { "epoch": 0.3335085107004175, "grad_norm": 0.5513292071458248, "learning_rate": 3.702838605028386e-05, "loss": 0.6386, "step": 11423 }, { "epoch": 0.33353770692826484, "grad_norm": 0.5604347231041543, "learning_rate": 3.702676399026764e-05, "loss": 0.6003, "step": 11424 }, { "epoch": 0.3335669031561122, "grad_norm": 0.562516619755946, "learning_rate": 3.7025141930251425e-05, "loss": 0.6554, "step": 11425 }, { "epoch": 0.33359609938395957, "grad_norm": 0.5498430188623789, "learning_rate": 3.70235198702352e-05, "loss": 0.6836, "step": 11426 }, { "epoch": 0.3336252956118069, "grad_norm": 0.6202648020378471, "learning_rate": 3.702189781021898e-05, "loss": 0.6817, "step": 11427 }, { "epoch": 0.33365449183965434, "grad_norm": 0.5603146314502517, "learning_rate": 3.7020275750202757e-05, "loss": 0.6785, "step": 11428 }, { "epoch": 0.3336836880675017, "grad_norm": 0.6090671719790868, "learning_rate": 3.701865369018654e-05, "loss": 0.731, "step": 11429 }, { "epoch": 0.33371288429534907, "grad_norm": 0.5080146489430293, "learning_rate": 3.701703163017032e-05, "loss": 0.5915, "step": 11430 }, { "epoch": 0.3337420805231964, "grad_norm": 0.6355878869773651, "learning_rate": 3.7015409570154095e-05, "loss": 0.7488, "step": 11431 }, { "epoch": 0.3337712767510438, "grad_norm": 0.5105878150167096, "learning_rate": 3.701378751013788e-05, "loss": 0.596, "step": 11432 }, { "epoch": 0.33380047297889115, "grad_norm": 0.5464734231947794, "learning_rate": 3.701216545012166e-05, "loss": 0.7159, "step": 11433 }, { "epoch": 0.3338296692067385, "grad_norm": 0.5241534347621264, "learning_rate": 3.7010543390105434e-05, "loss": 0.5989, "step": 11434 }, { "epoch": 0.33385886543458587, "grad_norm": 0.4957165689056104, "learning_rate": 3.7008921330089215e-05, "loss": 0.5585, "step": 11435 }, { "epoch": 0.33388806166243323, "grad_norm": 0.5194079038907231, "learning_rate": 3.700729927007299e-05, "loss": 0.6119, "step": 11436 }, { "epoch": 0.3339172578902806, "grad_norm": 0.563829371919239, "learning_rate": 3.700567721005677e-05, "loss": 0.6673, "step": 11437 }, { "epoch": 0.33394645411812796, "grad_norm": 0.5381971624892296, "learning_rate": 3.7004055150040554e-05, "loss": 0.6282, "step": 11438 }, { "epoch": 0.3339756503459753, "grad_norm": 0.6074869771852681, "learning_rate": 3.7002433090024336e-05, "loss": 0.6835, "step": 11439 }, { "epoch": 0.3340048465738227, "grad_norm": 0.5729851669179743, "learning_rate": 3.700081103000812e-05, "loss": 0.7185, "step": 11440 }, { "epoch": 0.33403404280167004, "grad_norm": 0.5458363795464116, "learning_rate": 3.699918896999189e-05, "loss": 0.6678, "step": 11441 }, { "epoch": 0.3340632390295174, "grad_norm": 0.4804786137207897, "learning_rate": 3.6997566909975674e-05, "loss": 0.4988, "step": 11442 }, { "epoch": 0.33409243525736476, "grad_norm": 0.5512954768198125, "learning_rate": 3.699594484995945e-05, "loss": 0.6661, "step": 11443 }, { "epoch": 0.3341216314852121, "grad_norm": 0.5373192017864907, "learning_rate": 3.699432278994323e-05, "loss": 0.6305, "step": 11444 }, { "epoch": 0.3341508277130595, "grad_norm": 0.6016674945150036, "learning_rate": 3.699270072992701e-05, "loss": 0.6603, "step": 11445 }, { "epoch": 0.33418002394090685, "grad_norm": 0.5323478523292161, "learning_rate": 3.699107866991079e-05, "loss": 0.6373, "step": 11446 }, { "epoch": 0.3342092201687542, "grad_norm": 0.5233912935547183, "learning_rate": 3.698945660989457e-05, "loss": 0.562, "step": 11447 }, { "epoch": 0.33423841639660157, "grad_norm": 0.551821962404251, "learning_rate": 3.6987834549878344e-05, "loss": 0.6362, "step": 11448 }, { "epoch": 0.33426761262444893, "grad_norm": 0.8291961548121747, "learning_rate": 3.6986212489862126e-05, "loss": 0.6705, "step": 11449 }, { "epoch": 0.3342968088522963, "grad_norm": 0.5902209027024, "learning_rate": 3.698459042984591e-05, "loss": 0.7087, "step": 11450 }, { "epoch": 0.33432600508014365, "grad_norm": 0.5749364468237633, "learning_rate": 3.698296836982968e-05, "loss": 0.7462, "step": 11451 }, { "epoch": 0.334355201307991, "grad_norm": 0.5348109358307908, "learning_rate": 3.6981346309813465e-05, "loss": 0.6206, "step": 11452 }, { "epoch": 0.3343843975358384, "grad_norm": 0.5651896125617176, "learning_rate": 3.697972424979724e-05, "loss": 0.643, "step": 11453 }, { "epoch": 0.33441359376368573, "grad_norm": 0.5123470404658145, "learning_rate": 3.697810218978102e-05, "loss": 0.6135, "step": 11454 }, { "epoch": 0.3344427899915331, "grad_norm": 0.5749683134347772, "learning_rate": 3.69764801297648e-05, "loss": 0.6431, "step": 11455 }, { "epoch": 0.33447198621938046, "grad_norm": 0.5249121698985278, "learning_rate": 3.697485806974858e-05, "loss": 0.5858, "step": 11456 }, { "epoch": 0.3345011824472278, "grad_norm": 0.5362190571855678, "learning_rate": 3.697323600973236e-05, "loss": 0.657, "step": 11457 }, { "epoch": 0.3345303786750752, "grad_norm": 0.5597421927169539, "learning_rate": 3.697161394971614e-05, "loss": 0.6489, "step": 11458 }, { "epoch": 0.33455957490292254, "grad_norm": 0.5175420311870581, "learning_rate": 3.6969991889699923e-05, "loss": 0.5608, "step": 11459 }, { "epoch": 0.3345887711307699, "grad_norm": 0.48814231037359046, "learning_rate": 3.6968369829683705e-05, "loss": 0.5466, "step": 11460 }, { "epoch": 0.33461796735861726, "grad_norm": 0.5458872317104322, "learning_rate": 3.696674776966748e-05, "loss": 0.6313, "step": 11461 }, { "epoch": 0.3346471635864646, "grad_norm": 0.5789077778995986, "learning_rate": 3.696512570965126e-05, "loss": 0.729, "step": 11462 }, { "epoch": 0.334676359814312, "grad_norm": 0.5548755109133399, "learning_rate": 3.696350364963504e-05, "loss": 0.6328, "step": 11463 }, { "epoch": 0.33470555604215935, "grad_norm": 0.5716664477873965, "learning_rate": 3.696188158961882e-05, "loss": 0.6099, "step": 11464 }, { "epoch": 0.3347347522700067, "grad_norm": 0.5824742054080632, "learning_rate": 3.69602595296026e-05, "loss": 0.648, "step": 11465 }, { "epoch": 0.33476394849785407, "grad_norm": 0.5205249322233271, "learning_rate": 3.6958637469586375e-05, "loss": 0.5698, "step": 11466 }, { "epoch": 0.33479314472570143, "grad_norm": 0.5376805239870714, "learning_rate": 3.695701540957016e-05, "loss": 0.6465, "step": 11467 }, { "epoch": 0.3348223409535488, "grad_norm": 0.5195825055803338, "learning_rate": 3.695539334955393e-05, "loss": 0.5886, "step": 11468 }, { "epoch": 0.33485153718139615, "grad_norm": 0.5198146966192371, "learning_rate": 3.6953771289537714e-05, "loss": 0.6148, "step": 11469 }, { "epoch": 0.3348807334092435, "grad_norm": 0.49864986632821656, "learning_rate": 3.6952149229521496e-05, "loss": 0.5064, "step": 11470 }, { "epoch": 0.3349099296370909, "grad_norm": 0.6079029918031112, "learning_rate": 3.695052716950527e-05, "loss": 0.6463, "step": 11471 }, { "epoch": 0.33493912586493824, "grad_norm": 0.5283007572513173, "learning_rate": 3.694890510948905e-05, "loss": 0.6254, "step": 11472 }, { "epoch": 0.3349683220927856, "grad_norm": 0.5645080672067432, "learning_rate": 3.694728304947283e-05, "loss": 0.6083, "step": 11473 }, { "epoch": 0.33499751832063296, "grad_norm": 0.542609038811829, "learning_rate": 3.694566098945661e-05, "loss": 0.6356, "step": 11474 }, { "epoch": 0.3350267145484803, "grad_norm": 0.6355884659686217, "learning_rate": 3.694403892944039e-05, "loss": 0.7739, "step": 11475 }, { "epoch": 0.3350559107763277, "grad_norm": 0.5908946754067292, "learning_rate": 3.6942416869424166e-05, "loss": 0.7022, "step": 11476 }, { "epoch": 0.33508510700417504, "grad_norm": 0.5335542975933302, "learning_rate": 3.6940794809407955e-05, "loss": 0.6021, "step": 11477 }, { "epoch": 0.3351143032320224, "grad_norm": 0.5322171415116871, "learning_rate": 3.693917274939173e-05, "loss": 0.6394, "step": 11478 }, { "epoch": 0.33514349945986976, "grad_norm": 0.553234439301648, "learning_rate": 3.693755068937551e-05, "loss": 0.683, "step": 11479 }, { "epoch": 0.3351726956877171, "grad_norm": 0.5434705481304885, "learning_rate": 3.693592862935929e-05, "loss": 0.6496, "step": 11480 }, { "epoch": 0.3352018919155645, "grad_norm": 0.5510571200635812, "learning_rate": 3.693430656934307e-05, "loss": 0.6315, "step": 11481 }, { "epoch": 0.33523108814341185, "grad_norm": 0.5187069138027982, "learning_rate": 3.693268450932685e-05, "loss": 0.5914, "step": 11482 }, { "epoch": 0.3352602843712592, "grad_norm": 0.5858246498983669, "learning_rate": 3.6931062449310625e-05, "loss": 0.7028, "step": 11483 }, { "epoch": 0.33528948059910657, "grad_norm": 0.5454074487082444, "learning_rate": 3.6929440389294407e-05, "loss": 0.6707, "step": 11484 }, { "epoch": 0.33531867682695393, "grad_norm": 0.5501310935429993, "learning_rate": 3.692781832927819e-05, "loss": 0.623, "step": 11485 }, { "epoch": 0.3353478730548013, "grad_norm": 0.5333931554737286, "learning_rate": 3.692619626926196e-05, "loss": 0.6245, "step": 11486 }, { "epoch": 0.33537706928264865, "grad_norm": 0.5109167988610637, "learning_rate": 3.6924574209245745e-05, "loss": 0.6165, "step": 11487 }, { "epoch": 0.33540626551049607, "grad_norm": 0.5416933677503755, "learning_rate": 3.692295214922952e-05, "loss": 0.6261, "step": 11488 }, { "epoch": 0.33543546173834343, "grad_norm": 0.568494109263896, "learning_rate": 3.69213300892133e-05, "loss": 0.6327, "step": 11489 }, { "epoch": 0.3354646579661908, "grad_norm": 0.5220076055072956, "learning_rate": 3.6919708029197084e-05, "loss": 0.6084, "step": 11490 }, { "epoch": 0.33549385419403815, "grad_norm": 0.5418548393345268, "learning_rate": 3.691808596918086e-05, "loss": 0.5676, "step": 11491 }, { "epoch": 0.3355230504218855, "grad_norm": 0.5561246159487667, "learning_rate": 3.691646390916464e-05, "loss": 0.6209, "step": 11492 }, { "epoch": 0.3355522466497329, "grad_norm": 0.544633182903347, "learning_rate": 3.6914841849148415e-05, "loss": 0.6213, "step": 11493 }, { "epoch": 0.33558144287758024, "grad_norm": 0.5346532448232948, "learning_rate": 3.69132197891322e-05, "loss": 0.6327, "step": 11494 }, { "epoch": 0.3356106391054276, "grad_norm": 0.5026130147557802, "learning_rate": 3.691159772911598e-05, "loss": 0.5658, "step": 11495 }, { "epoch": 0.33563983533327496, "grad_norm": 0.5395599317123378, "learning_rate": 3.690997566909976e-05, "loss": 0.6559, "step": 11496 }, { "epoch": 0.3356690315611223, "grad_norm": 0.5826822813542389, "learning_rate": 3.690835360908354e-05, "loss": 0.6699, "step": 11497 }, { "epoch": 0.3356982277889697, "grad_norm": 0.5713887741169209, "learning_rate": 3.690673154906732e-05, "loss": 0.7346, "step": 11498 }, { "epoch": 0.33572742401681704, "grad_norm": 0.5630987922924954, "learning_rate": 3.69051094890511e-05, "loss": 0.7027, "step": 11499 }, { "epoch": 0.3357566202446644, "grad_norm": 0.5334108824103075, "learning_rate": 3.690348742903488e-05, "loss": 0.6432, "step": 11500 }, { "epoch": 0.33578581647251177, "grad_norm": 0.5401555313483467, "learning_rate": 3.6901865369018656e-05, "loss": 0.6718, "step": 11501 }, { "epoch": 0.33581501270035913, "grad_norm": 0.5313152173517324, "learning_rate": 3.690024330900244e-05, "loss": 0.6291, "step": 11502 }, { "epoch": 0.3358442089282065, "grad_norm": 0.525992543853134, "learning_rate": 3.689862124898621e-05, "loss": 0.606, "step": 11503 }, { "epoch": 0.33587340515605385, "grad_norm": 0.5885641284544233, "learning_rate": 3.6896999188969994e-05, "loss": 0.6609, "step": 11504 }, { "epoch": 0.3359026013839012, "grad_norm": 0.5126356943075535, "learning_rate": 3.6895377128953776e-05, "loss": 0.5853, "step": 11505 }, { "epoch": 0.3359317976117486, "grad_norm": 0.5077811772574519, "learning_rate": 3.689375506893755e-05, "loss": 0.5461, "step": 11506 }, { "epoch": 0.33596099383959593, "grad_norm": 0.48938600095146123, "learning_rate": 3.689213300892133e-05, "loss": 0.562, "step": 11507 }, { "epoch": 0.3359901900674433, "grad_norm": 0.5511924064741396, "learning_rate": 3.689051094890511e-05, "loss": 0.6947, "step": 11508 }, { "epoch": 0.33601938629529066, "grad_norm": 0.5295135459339093, "learning_rate": 3.688888888888889e-05, "loss": 0.5927, "step": 11509 }, { "epoch": 0.336048582523138, "grad_norm": 0.5379025228842864, "learning_rate": 3.688726682887267e-05, "loss": 0.6946, "step": 11510 }, { "epoch": 0.3360777787509854, "grad_norm": 0.5178828847927966, "learning_rate": 3.6885644768856446e-05, "loss": 0.6006, "step": 11511 }, { "epoch": 0.33610697497883274, "grad_norm": 0.5859870132656859, "learning_rate": 3.688402270884023e-05, "loss": 0.7219, "step": 11512 }, { "epoch": 0.3361361712066801, "grad_norm": 0.4814510423325005, "learning_rate": 3.6882400648824e-05, "loss": 0.5204, "step": 11513 }, { "epoch": 0.33616536743452746, "grad_norm": 0.5367526015105097, "learning_rate": 3.6880778588807785e-05, "loss": 0.6594, "step": 11514 }, { "epoch": 0.3361945636623748, "grad_norm": 0.5239376316255004, "learning_rate": 3.6879156528791567e-05, "loss": 0.618, "step": 11515 }, { "epoch": 0.3362237598902222, "grad_norm": 0.573405317436227, "learning_rate": 3.687753446877535e-05, "loss": 0.6659, "step": 11516 }, { "epoch": 0.33625295611806955, "grad_norm": 0.532673306806965, "learning_rate": 3.687591240875913e-05, "loss": 0.6526, "step": 11517 }, { "epoch": 0.3362821523459169, "grad_norm": 0.5042443696204026, "learning_rate": 3.6874290348742905e-05, "loss": 0.5809, "step": 11518 }, { "epoch": 0.33631134857376427, "grad_norm": 0.5969956371061624, "learning_rate": 3.687266828872669e-05, "loss": 0.723, "step": 11519 }, { "epoch": 0.33634054480161163, "grad_norm": 0.5340089284658684, "learning_rate": 3.687104622871046e-05, "loss": 0.6005, "step": 11520 }, { "epoch": 0.336369741029459, "grad_norm": 0.6951768947785535, "learning_rate": 3.6869424168694244e-05, "loss": 0.5851, "step": 11521 }, { "epoch": 0.33639893725730635, "grad_norm": 0.5475354901710668, "learning_rate": 3.6867802108678025e-05, "loss": 0.6598, "step": 11522 }, { "epoch": 0.3364281334851537, "grad_norm": 0.5603613044049595, "learning_rate": 3.68661800486618e-05, "loss": 0.7259, "step": 11523 }, { "epoch": 0.3364573297130011, "grad_norm": 0.5350147518905468, "learning_rate": 3.686455798864558e-05, "loss": 0.5857, "step": 11524 }, { "epoch": 0.33648652594084844, "grad_norm": 0.5569033825572766, "learning_rate": 3.6862935928629364e-05, "loss": 0.6719, "step": 11525 }, { "epoch": 0.3365157221686958, "grad_norm": 0.5442006443897397, "learning_rate": 3.686131386861314e-05, "loss": 0.6169, "step": 11526 }, { "epoch": 0.33654491839654316, "grad_norm": 0.5399741161075735, "learning_rate": 3.685969180859692e-05, "loss": 0.6812, "step": 11527 }, { "epoch": 0.3365741146243905, "grad_norm": 0.5342864301444423, "learning_rate": 3.6858069748580696e-05, "loss": 0.6296, "step": 11528 }, { "epoch": 0.3366033108522379, "grad_norm": 0.5094252353557089, "learning_rate": 3.685644768856448e-05, "loss": 0.5869, "step": 11529 }, { "epoch": 0.33663250708008524, "grad_norm": 0.5202581976627182, "learning_rate": 3.685482562854826e-05, "loss": 0.6195, "step": 11530 }, { "epoch": 0.3366617033079326, "grad_norm": 0.52463661100248, "learning_rate": 3.6853203568532034e-05, "loss": 0.6112, "step": 11531 }, { "epoch": 0.33669089953577996, "grad_norm": 0.5550585859264321, "learning_rate": 3.6851581508515816e-05, "loss": 0.6124, "step": 11532 }, { "epoch": 0.3367200957636273, "grad_norm": 0.5119844579470587, "learning_rate": 3.684995944849959e-05, "loss": 0.5607, "step": 11533 }, { "epoch": 0.3367492919914747, "grad_norm": 0.5602674221060916, "learning_rate": 3.684833738848338e-05, "loss": 0.658, "step": 11534 }, { "epoch": 0.33677848821932205, "grad_norm": 0.5373220620562835, "learning_rate": 3.6846715328467154e-05, "loss": 0.6529, "step": 11535 }, { "epoch": 0.3368076844471694, "grad_norm": 0.5591613778106702, "learning_rate": 3.6845093268450936e-05, "loss": 0.6567, "step": 11536 }, { "epoch": 0.33683688067501677, "grad_norm": 0.5564970243619612, "learning_rate": 3.684347120843472e-05, "loss": 0.6733, "step": 11537 }, { "epoch": 0.33686607690286413, "grad_norm": 0.5606861125790424, "learning_rate": 3.684184914841849e-05, "loss": 0.6868, "step": 11538 }, { "epoch": 0.3368952731307115, "grad_norm": 0.5224596947088468, "learning_rate": 3.6840227088402275e-05, "loss": 0.6145, "step": 11539 }, { "epoch": 0.33692446935855885, "grad_norm": 0.5446761221815548, "learning_rate": 3.683860502838605e-05, "loss": 0.6555, "step": 11540 }, { "epoch": 0.3369536655864062, "grad_norm": 0.5458203182336024, "learning_rate": 3.683698296836983e-05, "loss": 0.62, "step": 11541 }, { "epoch": 0.3369828618142536, "grad_norm": 0.5304175722585927, "learning_rate": 3.683536090835361e-05, "loss": 0.6101, "step": 11542 }, { "epoch": 0.33701205804210094, "grad_norm": 0.5434379906007791, "learning_rate": 3.683373884833739e-05, "loss": 0.6282, "step": 11543 }, { "epoch": 0.3370412542699483, "grad_norm": 0.49577303060794203, "learning_rate": 3.683211678832117e-05, "loss": 0.586, "step": 11544 }, { "epoch": 0.33707045049779566, "grad_norm": 0.5546752112188938, "learning_rate": 3.683049472830495e-05, "loss": 0.6024, "step": 11545 }, { "epoch": 0.337099646725643, "grad_norm": 0.5170695545977949, "learning_rate": 3.682887266828873e-05, "loss": 0.5722, "step": 11546 }, { "epoch": 0.3371288429534904, "grad_norm": 0.5535130633906016, "learning_rate": 3.682725060827251e-05, "loss": 0.6165, "step": 11547 }, { "epoch": 0.3371580391813378, "grad_norm": 0.5371237642997451, "learning_rate": 3.6825628548256283e-05, "loss": 0.6182, "step": 11548 }, { "epoch": 0.33718723540918516, "grad_norm": 0.5043971669194821, "learning_rate": 3.6824006488240065e-05, "loss": 0.5729, "step": 11549 }, { "epoch": 0.3372164316370325, "grad_norm": 0.5284189970629114, "learning_rate": 3.682238442822385e-05, "loss": 0.6353, "step": 11550 }, { "epoch": 0.3372456278648799, "grad_norm": 0.5285166834750464, "learning_rate": 3.682076236820762e-05, "loss": 0.5804, "step": 11551 }, { "epoch": 0.33727482409272724, "grad_norm": 0.598061312368519, "learning_rate": 3.6819140308191404e-05, "loss": 0.6467, "step": 11552 }, { "epoch": 0.3373040203205746, "grad_norm": 0.5462076639220024, "learning_rate": 3.6817518248175185e-05, "loss": 0.676, "step": 11553 }, { "epoch": 0.33733321654842197, "grad_norm": 0.5755767473971912, "learning_rate": 3.681589618815897e-05, "loss": 0.6404, "step": 11554 }, { "epoch": 0.3373624127762693, "grad_norm": 0.5193898270663517, "learning_rate": 3.681427412814274e-05, "loss": 0.5935, "step": 11555 }, { "epoch": 0.3373916090041167, "grad_norm": 0.5328722514684797, "learning_rate": 3.6812652068126524e-05, "loss": 0.6353, "step": 11556 }, { "epoch": 0.33742080523196405, "grad_norm": 0.5531221765785836, "learning_rate": 3.6811030008110306e-05, "loss": 0.7088, "step": 11557 }, { "epoch": 0.3374500014598114, "grad_norm": 0.5488279574764129, "learning_rate": 3.680940794809408e-05, "loss": 0.6273, "step": 11558 }, { "epoch": 0.33747919768765877, "grad_norm": 0.5821504701955128, "learning_rate": 3.680778588807786e-05, "loss": 0.6935, "step": 11559 }, { "epoch": 0.33750839391550613, "grad_norm": 0.5884598406120722, "learning_rate": 3.680616382806164e-05, "loss": 0.6617, "step": 11560 }, { "epoch": 0.3375375901433535, "grad_norm": 0.5047969788771363, "learning_rate": 3.680454176804542e-05, "loss": 0.5574, "step": 11561 }, { "epoch": 0.33756678637120086, "grad_norm": 0.5616191375765923, "learning_rate": 3.68029197080292e-05, "loss": 0.6397, "step": 11562 }, { "epoch": 0.3375959825990482, "grad_norm": 0.5078766020649733, "learning_rate": 3.6801297648012976e-05, "loss": 0.572, "step": 11563 }, { "epoch": 0.3376251788268956, "grad_norm": 0.5364808864336049, "learning_rate": 3.679967558799676e-05, "loss": 0.6096, "step": 11564 }, { "epoch": 0.33765437505474294, "grad_norm": 0.5952093556846547, "learning_rate": 3.679805352798053e-05, "loss": 0.7426, "step": 11565 }, { "epoch": 0.3376835712825903, "grad_norm": 0.5496820728827386, "learning_rate": 3.6796431467964314e-05, "loss": 0.6312, "step": 11566 }, { "epoch": 0.33771276751043766, "grad_norm": 0.5529528069066454, "learning_rate": 3.6794809407948096e-05, "loss": 0.6464, "step": 11567 }, { "epoch": 0.337741963738285, "grad_norm": 0.5477564093822089, "learning_rate": 3.679318734793187e-05, "loss": 0.6169, "step": 11568 }, { "epoch": 0.3377711599661324, "grad_norm": 0.5521205618217806, "learning_rate": 3.679156528791565e-05, "loss": 0.6498, "step": 11569 }, { "epoch": 0.33780035619397975, "grad_norm": 0.5360004721302051, "learning_rate": 3.6789943227899435e-05, "loss": 0.5845, "step": 11570 }, { "epoch": 0.3378295524218271, "grad_norm": 0.5349719741945184, "learning_rate": 3.678832116788321e-05, "loss": 0.6269, "step": 11571 }, { "epoch": 0.33785874864967447, "grad_norm": 0.5627574682550215, "learning_rate": 3.6786699107867e-05, "loss": 0.7078, "step": 11572 }, { "epoch": 0.33788794487752183, "grad_norm": 0.4968031259076925, "learning_rate": 3.678507704785077e-05, "loss": 0.5818, "step": 11573 }, { "epoch": 0.3379171411053692, "grad_norm": 0.503818923641532, "learning_rate": 3.6783454987834555e-05, "loss": 0.6149, "step": 11574 }, { "epoch": 0.33794633733321655, "grad_norm": 0.5487347095723584, "learning_rate": 3.678183292781833e-05, "loss": 0.6818, "step": 11575 }, { "epoch": 0.3379755335610639, "grad_norm": 0.5205718009089327, "learning_rate": 3.678021086780211e-05, "loss": 0.6189, "step": 11576 }, { "epoch": 0.3380047297889113, "grad_norm": 0.53490488270289, "learning_rate": 3.6778588807785894e-05, "loss": 0.6604, "step": 11577 }, { "epoch": 0.33803392601675863, "grad_norm": 0.527063443224925, "learning_rate": 3.677696674776967e-05, "loss": 0.624, "step": 11578 }, { "epoch": 0.338063122244606, "grad_norm": 0.5230764105792385, "learning_rate": 3.677534468775345e-05, "loss": 0.6378, "step": 11579 }, { "epoch": 0.33809231847245336, "grad_norm": 0.5211787161149172, "learning_rate": 3.6773722627737225e-05, "loss": 0.5646, "step": 11580 }, { "epoch": 0.3381215147003007, "grad_norm": 0.5420041688011797, "learning_rate": 3.677210056772101e-05, "loss": 0.667, "step": 11581 }, { "epoch": 0.3381507109281481, "grad_norm": 0.5487657237581637, "learning_rate": 3.677047850770479e-05, "loss": 0.6607, "step": 11582 }, { "epoch": 0.33817990715599544, "grad_norm": 0.5269220252750698, "learning_rate": 3.6768856447688564e-05, "loss": 0.6186, "step": 11583 }, { "epoch": 0.3382091033838428, "grad_norm": 0.5409096938285844, "learning_rate": 3.6767234387672346e-05, "loss": 0.6358, "step": 11584 }, { "epoch": 0.33823829961169016, "grad_norm": 0.5653598381218611, "learning_rate": 3.676561232765612e-05, "loss": 0.6308, "step": 11585 }, { "epoch": 0.3382674958395375, "grad_norm": 0.5334595366174677, "learning_rate": 3.67639902676399e-05, "loss": 0.6473, "step": 11586 }, { "epoch": 0.3382966920673849, "grad_norm": 0.5782848467083516, "learning_rate": 3.6762368207623684e-05, "loss": 0.6954, "step": 11587 }, { "epoch": 0.33832588829523225, "grad_norm": 0.5340385773085274, "learning_rate": 3.676074614760746e-05, "loss": 0.64, "step": 11588 }, { "epoch": 0.3383550845230796, "grad_norm": 0.5469994996203439, "learning_rate": 3.675912408759124e-05, "loss": 0.6773, "step": 11589 }, { "epoch": 0.33838428075092697, "grad_norm": 0.5067491244887089, "learning_rate": 3.675750202757502e-05, "loss": 0.5835, "step": 11590 }, { "epoch": 0.33841347697877433, "grad_norm": 0.5302793327814066, "learning_rate": 3.6755879967558804e-05, "loss": 0.6676, "step": 11591 }, { "epoch": 0.3384426732066217, "grad_norm": 0.5577687775051619, "learning_rate": 3.6754257907542586e-05, "loss": 0.6847, "step": 11592 }, { "epoch": 0.33847186943446905, "grad_norm": 0.570470813247021, "learning_rate": 3.675263584752636e-05, "loss": 0.6658, "step": 11593 }, { "epoch": 0.3385010656623164, "grad_norm": 0.5290421629235053, "learning_rate": 3.675101378751014e-05, "loss": 0.6367, "step": 11594 }, { "epoch": 0.3385302618901638, "grad_norm": 0.5081837183200179, "learning_rate": 3.674939172749392e-05, "loss": 0.5432, "step": 11595 }, { "epoch": 0.33855945811801114, "grad_norm": 0.6027451012701618, "learning_rate": 3.67477696674777e-05, "loss": 0.7228, "step": 11596 }, { "epoch": 0.3385886543458585, "grad_norm": 0.5465835099600911, "learning_rate": 3.674614760746148e-05, "loss": 0.6652, "step": 11597 }, { "epoch": 0.33861785057370586, "grad_norm": 0.5447251544441276, "learning_rate": 3.6744525547445256e-05, "loss": 0.6172, "step": 11598 }, { "epoch": 0.3386470468015532, "grad_norm": 0.5498718798540028, "learning_rate": 3.674290348742904e-05, "loss": 0.6566, "step": 11599 }, { "epoch": 0.3386762430294006, "grad_norm": 0.5467345128407519, "learning_rate": 3.674128142741281e-05, "loss": 0.6429, "step": 11600 }, { "epoch": 0.33870543925724794, "grad_norm": 0.5541684479544227, "learning_rate": 3.6739659367396595e-05, "loss": 0.6869, "step": 11601 }, { "epoch": 0.3387346354850953, "grad_norm": 0.5642773862743391, "learning_rate": 3.6738037307380377e-05, "loss": 0.6857, "step": 11602 }, { "epoch": 0.33876383171294266, "grad_norm": 0.6032258420818712, "learning_rate": 3.673641524736415e-05, "loss": 0.7276, "step": 11603 }, { "epoch": 0.33879302794079, "grad_norm": 0.5174330149903766, "learning_rate": 3.673479318734793e-05, "loss": 0.594, "step": 11604 }, { "epoch": 0.3388222241686374, "grad_norm": 0.5849668052522433, "learning_rate": 3.673317112733171e-05, "loss": 0.6906, "step": 11605 }, { "epoch": 0.33885142039648475, "grad_norm": 0.49579226859599723, "learning_rate": 3.673154906731549e-05, "loss": 0.5403, "step": 11606 }, { "epoch": 0.3388806166243321, "grad_norm": 0.5486234997554617, "learning_rate": 3.672992700729927e-05, "loss": 0.6681, "step": 11607 }, { "epoch": 0.3389098128521795, "grad_norm": 0.5552828393343411, "learning_rate": 3.672830494728305e-05, "loss": 0.6733, "step": 11608 }, { "epoch": 0.3389390090800269, "grad_norm": 0.5659863401668386, "learning_rate": 3.6726682887266835e-05, "loss": 0.6539, "step": 11609 }, { "epoch": 0.33896820530787425, "grad_norm": 0.5783794727775077, "learning_rate": 3.672506082725061e-05, "loss": 0.6872, "step": 11610 }, { "epoch": 0.3389974015357216, "grad_norm": 0.5289213024853843, "learning_rate": 3.672343876723439e-05, "loss": 0.6299, "step": 11611 }, { "epoch": 0.33902659776356897, "grad_norm": 0.594884627674061, "learning_rate": 3.6721816707218174e-05, "loss": 0.6571, "step": 11612 }, { "epoch": 0.33905579399141633, "grad_norm": 0.557440675530838, "learning_rate": 3.672019464720195e-05, "loss": 0.6375, "step": 11613 }, { "epoch": 0.3390849902192637, "grad_norm": 0.5362486792546989, "learning_rate": 3.671857258718573e-05, "loss": 0.5988, "step": 11614 }, { "epoch": 0.33911418644711105, "grad_norm": 0.5012329668627019, "learning_rate": 3.6716950527169506e-05, "loss": 0.5592, "step": 11615 }, { "epoch": 0.3391433826749584, "grad_norm": 0.5820808768217859, "learning_rate": 3.671532846715329e-05, "loss": 0.7013, "step": 11616 }, { "epoch": 0.3391725789028058, "grad_norm": 0.546569384249682, "learning_rate": 3.671370640713707e-05, "loss": 0.7323, "step": 11617 }, { "epoch": 0.33920177513065314, "grad_norm": 0.5235564100577071, "learning_rate": 3.6712084347120844e-05, "loss": 0.5784, "step": 11618 }, { "epoch": 0.3392309713585005, "grad_norm": 0.5469586870686813, "learning_rate": 3.6710462287104626e-05, "loss": 0.6563, "step": 11619 }, { "epoch": 0.33926016758634786, "grad_norm": 0.561930675731243, "learning_rate": 3.67088402270884e-05, "loss": 0.7098, "step": 11620 }, { "epoch": 0.3392893638141952, "grad_norm": 0.507013003012996, "learning_rate": 3.670721816707218e-05, "loss": 0.5583, "step": 11621 }, { "epoch": 0.3393185600420426, "grad_norm": 0.5191827828529127, "learning_rate": 3.6705596107055964e-05, "loss": 0.5818, "step": 11622 }, { "epoch": 0.33934775626988994, "grad_norm": 0.5507270514845728, "learning_rate": 3.670397404703974e-05, "loss": 0.6347, "step": 11623 }, { "epoch": 0.3393769524977373, "grad_norm": 0.5315797968005316, "learning_rate": 3.670235198702352e-05, "loss": 0.6404, "step": 11624 }, { "epoch": 0.33940614872558467, "grad_norm": 0.5892091546771583, "learning_rate": 3.6700729927007296e-05, "loss": 0.7232, "step": 11625 }, { "epoch": 0.339435344953432, "grad_norm": 0.535381770500199, "learning_rate": 3.669910786699108e-05, "loss": 0.6368, "step": 11626 }, { "epoch": 0.3394645411812794, "grad_norm": 0.5840337494557205, "learning_rate": 3.669748580697486e-05, "loss": 0.672, "step": 11627 }, { "epoch": 0.33949373740912675, "grad_norm": 0.4920010128257812, "learning_rate": 3.669586374695864e-05, "loss": 0.5985, "step": 11628 }, { "epoch": 0.3395229336369741, "grad_norm": 0.532533396145424, "learning_rate": 3.669424168694242e-05, "loss": 0.6147, "step": 11629 }, { "epoch": 0.3395521298648215, "grad_norm": 0.48556096897297235, "learning_rate": 3.66926196269262e-05, "loss": 0.5415, "step": 11630 }, { "epoch": 0.33958132609266883, "grad_norm": 0.5216121367117067, "learning_rate": 3.669099756690998e-05, "loss": 0.6198, "step": 11631 }, { "epoch": 0.3396105223205162, "grad_norm": 0.5237175169825552, "learning_rate": 3.668937550689376e-05, "loss": 0.5908, "step": 11632 }, { "epoch": 0.33963971854836356, "grad_norm": 0.5042652223458776, "learning_rate": 3.668775344687754e-05, "loss": 0.5934, "step": 11633 }, { "epoch": 0.3396689147762109, "grad_norm": 0.5754792152823149, "learning_rate": 3.668613138686132e-05, "loss": 0.7078, "step": 11634 }, { "epoch": 0.3396981110040583, "grad_norm": 0.5698650530906286, "learning_rate": 3.6684509326845093e-05, "loss": 0.7115, "step": 11635 }, { "epoch": 0.33972730723190564, "grad_norm": 0.5463513204513143, "learning_rate": 3.6682887266828875e-05, "loss": 0.6473, "step": 11636 }, { "epoch": 0.339756503459753, "grad_norm": 0.4997386629091573, "learning_rate": 3.668126520681266e-05, "loss": 0.5605, "step": 11637 }, { "epoch": 0.33978569968760036, "grad_norm": 0.5074433999854255, "learning_rate": 3.667964314679643e-05, "loss": 0.5349, "step": 11638 }, { "epoch": 0.3398148959154477, "grad_norm": 0.5486683081330435, "learning_rate": 3.6678021086780214e-05, "loss": 0.6437, "step": 11639 }, { "epoch": 0.3398440921432951, "grad_norm": 0.5210896623102965, "learning_rate": 3.667639902676399e-05, "loss": 0.5955, "step": 11640 }, { "epoch": 0.33987328837114245, "grad_norm": 0.5398449055134789, "learning_rate": 3.667477696674777e-05, "loss": 0.6022, "step": 11641 }, { "epoch": 0.3399024845989898, "grad_norm": 0.5587819835228439, "learning_rate": 3.667315490673155e-05, "loss": 0.6438, "step": 11642 }, { "epoch": 0.33993168082683717, "grad_norm": 0.5371219587497211, "learning_rate": 3.667153284671533e-05, "loss": 0.6237, "step": 11643 }, { "epoch": 0.33996087705468453, "grad_norm": 0.531745114232697, "learning_rate": 3.666991078669911e-05, "loss": 0.5881, "step": 11644 }, { "epoch": 0.3399900732825319, "grad_norm": 0.5200828018788999, "learning_rate": 3.6668288726682884e-05, "loss": 0.5753, "step": 11645 }, { "epoch": 0.34001926951037925, "grad_norm": 0.5372659076561965, "learning_rate": 3.6666666666666666e-05, "loss": 0.6283, "step": 11646 }, { "epoch": 0.3400484657382266, "grad_norm": 0.4870295980668006, "learning_rate": 3.666504460665045e-05, "loss": 0.5686, "step": 11647 }, { "epoch": 0.340077661966074, "grad_norm": 0.5512454908575768, "learning_rate": 3.666342254663423e-05, "loss": 0.646, "step": 11648 }, { "epoch": 0.34010685819392134, "grad_norm": 0.5614310154373328, "learning_rate": 3.666180048661801e-05, "loss": 0.6282, "step": 11649 }, { "epoch": 0.3401360544217687, "grad_norm": 0.525267335872389, "learning_rate": 3.6660178426601786e-05, "loss": 0.5922, "step": 11650 }, { "epoch": 0.34016525064961606, "grad_norm": 0.5241226560571683, "learning_rate": 3.665855636658557e-05, "loss": 0.6105, "step": 11651 }, { "epoch": 0.3401944468774634, "grad_norm": 0.5295026481486893, "learning_rate": 3.665693430656934e-05, "loss": 0.5898, "step": 11652 }, { "epoch": 0.3402236431053108, "grad_norm": 0.5510587093907893, "learning_rate": 3.6655312246553124e-05, "loss": 0.6661, "step": 11653 }, { "epoch": 0.34025283933315814, "grad_norm": 0.575881077713479, "learning_rate": 3.6653690186536906e-05, "loss": 0.6802, "step": 11654 }, { "epoch": 0.3402820355610055, "grad_norm": 0.55920516545499, "learning_rate": 3.665206812652068e-05, "loss": 0.6404, "step": 11655 }, { "epoch": 0.34031123178885286, "grad_norm": 0.5316686795471729, "learning_rate": 3.665044606650446e-05, "loss": 0.6064, "step": 11656 }, { "epoch": 0.3403404280167002, "grad_norm": 0.5634391974977357, "learning_rate": 3.6648824006488245e-05, "loss": 0.688, "step": 11657 }, { "epoch": 0.3403696242445476, "grad_norm": 0.4846230779675506, "learning_rate": 3.664720194647202e-05, "loss": 0.5502, "step": 11658 }, { "epoch": 0.34039882047239495, "grad_norm": 0.48737495183016755, "learning_rate": 3.66455798864558e-05, "loss": 0.5685, "step": 11659 }, { "epoch": 0.3404280167002423, "grad_norm": 0.5426647029154197, "learning_rate": 3.6643957826439577e-05, "loss": 0.6827, "step": 11660 }, { "epoch": 0.34045721292808967, "grad_norm": 0.5269165635806317, "learning_rate": 3.664233576642336e-05, "loss": 0.6369, "step": 11661 }, { "epoch": 0.34048640915593703, "grad_norm": 0.5350417370803573, "learning_rate": 3.664071370640714e-05, "loss": 0.634, "step": 11662 }, { "epoch": 0.3405156053837844, "grad_norm": 0.523985063957825, "learning_rate": 3.6639091646390915e-05, "loss": 0.564, "step": 11663 }, { "epoch": 0.34054480161163175, "grad_norm": 0.523393101302126, "learning_rate": 3.66374695863747e-05, "loss": 0.593, "step": 11664 }, { "epoch": 0.3405739978394791, "grad_norm": 0.5331237090388841, "learning_rate": 3.663584752635847e-05, "loss": 0.6467, "step": 11665 }, { "epoch": 0.3406031940673265, "grad_norm": 0.6047933063933073, "learning_rate": 3.663422546634226e-05, "loss": 0.7322, "step": 11666 }, { "epoch": 0.34063239029517384, "grad_norm": 0.5721931333082997, "learning_rate": 3.6632603406326035e-05, "loss": 0.6929, "step": 11667 }, { "epoch": 0.3406615865230212, "grad_norm": 0.5190212892519714, "learning_rate": 3.663098134630982e-05, "loss": 0.5819, "step": 11668 }, { "epoch": 0.3406907827508686, "grad_norm": 0.5784361326443198, "learning_rate": 3.66293592862936e-05, "loss": 0.7309, "step": 11669 }, { "epoch": 0.340719978978716, "grad_norm": 0.5209918601816725, "learning_rate": 3.6627737226277374e-05, "loss": 0.6279, "step": 11670 }, { "epoch": 0.34074917520656334, "grad_norm": 0.512609012131964, "learning_rate": 3.6626115166261156e-05, "loss": 0.5753, "step": 11671 }, { "epoch": 0.3407783714344107, "grad_norm": 0.5433897038494215, "learning_rate": 3.662449310624493e-05, "loss": 0.6712, "step": 11672 }, { "epoch": 0.34080756766225806, "grad_norm": 0.5815041142796842, "learning_rate": 3.662287104622871e-05, "loss": 0.6869, "step": 11673 }, { "epoch": 0.3408367638901054, "grad_norm": 0.5140127269767614, "learning_rate": 3.6621248986212494e-05, "loss": 0.537, "step": 11674 }, { "epoch": 0.3408659601179528, "grad_norm": 0.5441185037587941, "learning_rate": 3.661962692619627e-05, "loss": 0.6677, "step": 11675 }, { "epoch": 0.34089515634580014, "grad_norm": 0.5122577893600055, "learning_rate": 3.661800486618005e-05, "loss": 0.5859, "step": 11676 }, { "epoch": 0.3409243525736475, "grad_norm": 0.5161208629963484, "learning_rate": 3.661638280616383e-05, "loss": 0.5257, "step": 11677 }, { "epoch": 0.34095354880149487, "grad_norm": 0.5709999267371623, "learning_rate": 3.661476074614761e-05, "loss": 0.6799, "step": 11678 }, { "epoch": 0.3409827450293422, "grad_norm": 0.601795232253123, "learning_rate": 3.661313868613139e-05, "loss": 0.7344, "step": 11679 }, { "epoch": 0.3410119412571896, "grad_norm": 0.5589124329535614, "learning_rate": 3.6611516626115164e-05, "loss": 0.5569, "step": 11680 }, { "epoch": 0.34104113748503695, "grad_norm": 0.578478008899623, "learning_rate": 3.6609894566098946e-05, "loss": 0.707, "step": 11681 }, { "epoch": 0.3410703337128843, "grad_norm": 0.47304130260267907, "learning_rate": 3.660827250608273e-05, "loss": 0.491, "step": 11682 }, { "epoch": 0.34109952994073167, "grad_norm": 0.4975944561802464, "learning_rate": 3.66066504460665e-05, "loss": 0.5903, "step": 11683 }, { "epoch": 0.34112872616857903, "grad_norm": 0.567475130708576, "learning_rate": 3.6605028386050285e-05, "loss": 0.7081, "step": 11684 }, { "epoch": 0.3411579223964264, "grad_norm": 0.5511598354928691, "learning_rate": 3.6603406326034066e-05, "loss": 0.629, "step": 11685 }, { "epoch": 0.34118711862427376, "grad_norm": 0.5041253004813011, "learning_rate": 3.660178426601785e-05, "loss": 0.543, "step": 11686 }, { "epoch": 0.3412163148521211, "grad_norm": 0.5682654804562762, "learning_rate": 3.660016220600162e-05, "loss": 0.7142, "step": 11687 }, { "epoch": 0.3412455110799685, "grad_norm": 0.588847085782288, "learning_rate": 3.6598540145985405e-05, "loss": 0.6749, "step": 11688 }, { "epoch": 0.34127470730781584, "grad_norm": 0.5561557400043216, "learning_rate": 3.659691808596919e-05, "loss": 0.6409, "step": 11689 }, { "epoch": 0.3413039035356632, "grad_norm": 0.5515142338794663, "learning_rate": 3.659529602595296e-05, "loss": 0.73, "step": 11690 }, { "epoch": 0.34133309976351056, "grad_norm": 0.5716258275515338, "learning_rate": 3.659367396593674e-05, "loss": 0.689, "step": 11691 }, { "epoch": 0.3413622959913579, "grad_norm": 0.5806859539561167, "learning_rate": 3.659205190592052e-05, "loss": 0.7291, "step": 11692 }, { "epoch": 0.3413914922192053, "grad_norm": 0.5700165763382975, "learning_rate": 3.65904298459043e-05, "loss": 0.6563, "step": 11693 }, { "epoch": 0.34142068844705264, "grad_norm": 0.5660997894284383, "learning_rate": 3.658880778588808e-05, "loss": 0.7054, "step": 11694 }, { "epoch": 0.3414498846749, "grad_norm": 0.6004333606602407, "learning_rate": 3.658718572587186e-05, "loss": 0.647, "step": 11695 }, { "epoch": 0.34147908090274737, "grad_norm": 0.5342147999586299, "learning_rate": 3.658556366585564e-05, "loss": 0.6387, "step": 11696 }, { "epoch": 0.34150827713059473, "grad_norm": 0.5106224779938289, "learning_rate": 3.6583941605839414e-05, "loss": 0.6099, "step": 11697 }, { "epoch": 0.3415374733584421, "grad_norm": 0.5547335452017894, "learning_rate": 3.6582319545823195e-05, "loss": 0.6841, "step": 11698 }, { "epoch": 0.34156666958628945, "grad_norm": 0.5476748268227518, "learning_rate": 3.658069748580698e-05, "loss": 0.641, "step": 11699 }, { "epoch": 0.3415958658141368, "grad_norm": 0.5070371070238159, "learning_rate": 3.657907542579075e-05, "loss": 0.5557, "step": 11700 }, { "epoch": 0.3416250620419842, "grad_norm": 0.540853185896855, "learning_rate": 3.6577453365774534e-05, "loss": 0.6909, "step": 11701 }, { "epoch": 0.34165425826983153, "grad_norm": 0.5482505925423762, "learning_rate": 3.6575831305758316e-05, "loss": 0.6636, "step": 11702 }, { "epoch": 0.3416834544976789, "grad_norm": 0.5374833394397396, "learning_rate": 3.657420924574209e-05, "loss": 0.6923, "step": 11703 }, { "epoch": 0.34171265072552626, "grad_norm": 0.561854644217565, "learning_rate": 3.657258718572588e-05, "loss": 0.7078, "step": 11704 }, { "epoch": 0.3417418469533736, "grad_norm": 0.5317490011233211, "learning_rate": 3.6570965125709654e-05, "loss": 0.6629, "step": 11705 }, { "epoch": 0.341771043181221, "grad_norm": 0.5285367992880842, "learning_rate": 3.6569343065693436e-05, "loss": 0.5985, "step": 11706 }, { "epoch": 0.34180023940906834, "grad_norm": 0.5353311295549867, "learning_rate": 3.656772100567721e-05, "loss": 0.621, "step": 11707 }, { "epoch": 0.3418294356369157, "grad_norm": 0.564146971379856, "learning_rate": 3.656609894566099e-05, "loss": 0.7233, "step": 11708 }, { "epoch": 0.34185863186476306, "grad_norm": 0.5534863503479751, "learning_rate": 3.6564476885644774e-05, "loss": 0.6773, "step": 11709 }, { "epoch": 0.3418878280926104, "grad_norm": 0.5549319919849773, "learning_rate": 3.656285482562855e-05, "loss": 0.7161, "step": 11710 }, { "epoch": 0.3419170243204578, "grad_norm": 0.5441844585633375, "learning_rate": 3.656123276561233e-05, "loss": 0.6497, "step": 11711 }, { "epoch": 0.34194622054830515, "grad_norm": 0.5510991386445613, "learning_rate": 3.6559610705596106e-05, "loss": 0.7112, "step": 11712 }, { "epoch": 0.3419754167761525, "grad_norm": 0.5337332484207717, "learning_rate": 3.655798864557989e-05, "loss": 0.6322, "step": 11713 }, { "epoch": 0.34200461300399987, "grad_norm": 0.5271165805871759, "learning_rate": 3.655636658556367e-05, "loss": 0.6005, "step": 11714 }, { "epoch": 0.34203380923184723, "grad_norm": 0.5429756689494425, "learning_rate": 3.6554744525547445e-05, "loss": 0.6775, "step": 11715 }, { "epoch": 0.3420630054596946, "grad_norm": 0.6004019125388904, "learning_rate": 3.6553122465531226e-05, "loss": 0.74, "step": 11716 }, { "epoch": 0.34209220168754195, "grad_norm": 0.5533813376324578, "learning_rate": 3.6551500405515e-05, "loss": 0.7252, "step": 11717 }, { "epoch": 0.3421213979153893, "grad_norm": 0.5554231591155186, "learning_rate": 3.654987834549878e-05, "loss": 0.6786, "step": 11718 }, { "epoch": 0.3421505941432367, "grad_norm": 0.5519551408329819, "learning_rate": 3.6548256285482565e-05, "loss": 0.6653, "step": 11719 }, { "epoch": 0.34217979037108404, "grad_norm": 0.5422214048380235, "learning_rate": 3.654663422546634e-05, "loss": 0.655, "step": 11720 }, { "epoch": 0.3422089865989314, "grad_norm": 0.5674694415652989, "learning_rate": 3.654501216545012e-05, "loss": 0.6986, "step": 11721 }, { "epoch": 0.34223818282677876, "grad_norm": 0.5386333123684974, "learning_rate": 3.6543390105433903e-05, "loss": 0.6234, "step": 11722 }, { "epoch": 0.3422673790546261, "grad_norm": 0.5780563812604099, "learning_rate": 3.6541768045417685e-05, "loss": 0.6789, "step": 11723 }, { "epoch": 0.3422965752824735, "grad_norm": 0.5406339070821246, "learning_rate": 3.654014598540147e-05, "loss": 0.6468, "step": 11724 }, { "epoch": 0.34232577151032084, "grad_norm": 0.555789297904781, "learning_rate": 3.653852392538524e-05, "loss": 0.6572, "step": 11725 }, { "epoch": 0.3423549677381682, "grad_norm": 0.5306493743876083, "learning_rate": 3.6536901865369024e-05, "loss": 0.6163, "step": 11726 }, { "epoch": 0.34238416396601556, "grad_norm": 0.5527076661829317, "learning_rate": 3.65352798053528e-05, "loss": 0.6757, "step": 11727 }, { "epoch": 0.3424133601938629, "grad_norm": 0.5212985647507054, "learning_rate": 3.653365774533658e-05, "loss": 0.6407, "step": 11728 }, { "epoch": 0.34244255642171034, "grad_norm": 0.5565743867930409, "learning_rate": 3.653203568532036e-05, "loss": 0.6215, "step": 11729 }, { "epoch": 0.3424717526495577, "grad_norm": 0.5751085098138566, "learning_rate": 3.653041362530414e-05, "loss": 0.6829, "step": 11730 }, { "epoch": 0.34250094887740506, "grad_norm": 0.5155315675192673, "learning_rate": 3.652879156528792e-05, "loss": 0.6277, "step": 11731 }, { "epoch": 0.3425301451052524, "grad_norm": 0.49022326396974514, "learning_rate": 3.6527169505271694e-05, "loss": 0.5392, "step": 11732 }, { "epoch": 0.3425593413330998, "grad_norm": 0.5146111407400012, "learning_rate": 3.6525547445255476e-05, "loss": 0.6045, "step": 11733 }, { "epoch": 0.34258853756094715, "grad_norm": 0.500164510899686, "learning_rate": 3.652392538523926e-05, "loss": 0.5899, "step": 11734 }, { "epoch": 0.3426177337887945, "grad_norm": 0.8647754754954978, "learning_rate": 3.652230332522303e-05, "loss": 0.6614, "step": 11735 }, { "epoch": 0.34264693001664187, "grad_norm": 0.5354539621603226, "learning_rate": 3.6520681265206814e-05, "loss": 0.6617, "step": 11736 }, { "epoch": 0.34267612624448923, "grad_norm": 0.5630678067852055, "learning_rate": 3.651905920519059e-05, "loss": 0.6915, "step": 11737 }, { "epoch": 0.3427053224723366, "grad_norm": 0.4955997942980739, "learning_rate": 3.651743714517437e-05, "loss": 0.598, "step": 11738 }, { "epoch": 0.34273451870018395, "grad_norm": 0.4992544622291385, "learning_rate": 3.651581508515815e-05, "loss": 0.5421, "step": 11739 }, { "epoch": 0.3427637149280313, "grad_norm": 0.5157413752643252, "learning_rate": 3.651419302514193e-05, "loss": 0.6438, "step": 11740 }, { "epoch": 0.3427929111558787, "grad_norm": 0.5348980761208939, "learning_rate": 3.651257096512571e-05, "loss": 0.6495, "step": 11741 }, { "epoch": 0.34282210738372604, "grad_norm": 0.5775371500520736, "learning_rate": 3.651094890510949e-05, "loss": 0.7205, "step": 11742 }, { "epoch": 0.3428513036115734, "grad_norm": 0.5224354421076247, "learning_rate": 3.650932684509327e-05, "loss": 0.6049, "step": 11743 }, { "epoch": 0.34288049983942076, "grad_norm": 0.5524663425294161, "learning_rate": 3.6507704785077055e-05, "loss": 0.6631, "step": 11744 }, { "epoch": 0.3429096960672681, "grad_norm": 0.5305742975137924, "learning_rate": 3.650608272506083e-05, "loss": 0.62, "step": 11745 }, { "epoch": 0.3429388922951155, "grad_norm": 0.5375718391763369, "learning_rate": 3.650446066504461e-05, "loss": 0.6418, "step": 11746 }, { "epoch": 0.34296808852296284, "grad_norm": 0.5590523591609408, "learning_rate": 3.6502838605028387e-05, "loss": 0.6402, "step": 11747 }, { "epoch": 0.3429972847508102, "grad_norm": 0.5495682587397097, "learning_rate": 3.650121654501217e-05, "loss": 0.6421, "step": 11748 }, { "epoch": 0.34302648097865757, "grad_norm": 0.5522145061344599, "learning_rate": 3.649959448499595e-05, "loss": 0.644, "step": 11749 }, { "epoch": 0.3430556772065049, "grad_norm": 0.558235928321429, "learning_rate": 3.6497972424979725e-05, "loss": 0.676, "step": 11750 }, { "epoch": 0.3430848734343523, "grad_norm": 0.543460303778575, "learning_rate": 3.649635036496351e-05, "loss": 0.6286, "step": 11751 }, { "epoch": 0.34311406966219965, "grad_norm": 0.5240544560240759, "learning_rate": 3.649472830494728e-05, "loss": 0.599, "step": 11752 }, { "epoch": 0.343143265890047, "grad_norm": 0.6099367097150648, "learning_rate": 3.6493106244931064e-05, "loss": 0.7708, "step": 11753 }, { "epoch": 0.3431724621178944, "grad_norm": 0.5406397730691448, "learning_rate": 3.6491484184914845e-05, "loss": 0.6082, "step": 11754 }, { "epoch": 0.34320165834574173, "grad_norm": 0.5526178417725311, "learning_rate": 3.648986212489862e-05, "loss": 0.7139, "step": 11755 }, { "epoch": 0.3432308545735891, "grad_norm": 0.6079334737056247, "learning_rate": 3.64882400648824e-05, "loss": 0.7803, "step": 11756 }, { "epoch": 0.34326005080143646, "grad_norm": 0.5261819824505971, "learning_rate": 3.648661800486618e-05, "loss": 0.6442, "step": 11757 }, { "epoch": 0.3432892470292838, "grad_norm": 0.518349821235534, "learning_rate": 3.648499594484996e-05, "loss": 0.6411, "step": 11758 }, { "epoch": 0.3433184432571312, "grad_norm": 0.5892870284286579, "learning_rate": 3.648337388483374e-05, "loss": 0.6905, "step": 11759 }, { "epoch": 0.34334763948497854, "grad_norm": 0.5449447512381033, "learning_rate": 3.648175182481752e-05, "loss": 0.6644, "step": 11760 }, { "epoch": 0.3433768357128259, "grad_norm": 0.546001100977098, "learning_rate": 3.6480129764801304e-05, "loss": 0.6533, "step": 11761 }, { "epoch": 0.34340603194067326, "grad_norm": 0.5938275299846018, "learning_rate": 3.647850770478508e-05, "loss": 0.6803, "step": 11762 }, { "epoch": 0.3434352281685206, "grad_norm": 0.5791111570736341, "learning_rate": 3.647688564476886e-05, "loss": 0.6855, "step": 11763 }, { "epoch": 0.343464424396368, "grad_norm": 0.49126963728731216, "learning_rate": 3.6475263584752636e-05, "loss": 0.5313, "step": 11764 }, { "epoch": 0.34349362062421535, "grad_norm": 0.5674504605918921, "learning_rate": 3.647364152473642e-05, "loss": 0.6899, "step": 11765 }, { "epoch": 0.3435228168520627, "grad_norm": 0.524638477900819, "learning_rate": 3.64720194647202e-05, "loss": 0.6466, "step": 11766 }, { "epoch": 0.34355201307991007, "grad_norm": 0.556822661179067, "learning_rate": 3.6470397404703974e-05, "loss": 0.667, "step": 11767 }, { "epoch": 0.34358120930775743, "grad_norm": 0.5625050876658032, "learning_rate": 3.6468775344687756e-05, "loss": 0.6583, "step": 11768 }, { "epoch": 0.3436104055356048, "grad_norm": 0.5225268351467076, "learning_rate": 3.646715328467154e-05, "loss": 0.6181, "step": 11769 }, { "epoch": 0.34363960176345215, "grad_norm": 0.5279623493965003, "learning_rate": 3.646553122465531e-05, "loss": 0.6133, "step": 11770 }, { "epoch": 0.3436687979912995, "grad_norm": 0.5260431115875798, "learning_rate": 3.6463909164639095e-05, "loss": 0.6495, "step": 11771 }, { "epoch": 0.3436979942191469, "grad_norm": 0.512289160749127, "learning_rate": 3.646228710462287e-05, "loss": 0.5921, "step": 11772 }, { "epoch": 0.34372719044699424, "grad_norm": 0.5435254769164196, "learning_rate": 3.646066504460665e-05, "loss": 0.6206, "step": 11773 }, { "epoch": 0.3437563866748416, "grad_norm": 0.5566085196106608, "learning_rate": 3.645904298459043e-05, "loss": 0.6426, "step": 11774 }, { "epoch": 0.34378558290268896, "grad_norm": 0.5674538689991832, "learning_rate": 3.645742092457421e-05, "loss": 0.7135, "step": 11775 }, { "epoch": 0.3438147791305363, "grad_norm": 0.5187895644360973, "learning_rate": 3.645579886455799e-05, "loss": 0.5752, "step": 11776 }, { "epoch": 0.3438439753583837, "grad_norm": 0.5594256885985947, "learning_rate": 3.6454176804541765e-05, "loss": 0.6812, "step": 11777 }, { "epoch": 0.34387317158623104, "grad_norm": 0.5409955168814272, "learning_rate": 3.6452554744525547e-05, "loss": 0.5704, "step": 11778 }, { "epoch": 0.3439023678140784, "grad_norm": 0.5233851237205478, "learning_rate": 3.645093268450933e-05, "loss": 0.5869, "step": 11779 }, { "epoch": 0.34393156404192576, "grad_norm": 0.548612497250495, "learning_rate": 3.644931062449311e-05, "loss": 0.6512, "step": 11780 }, { "epoch": 0.3439607602697731, "grad_norm": 0.5784510681086945, "learning_rate": 3.644768856447689e-05, "loss": 0.6425, "step": 11781 }, { "epoch": 0.3439899564976205, "grad_norm": 0.5237155129511154, "learning_rate": 3.644606650446067e-05, "loss": 0.6055, "step": 11782 }, { "epoch": 0.34401915272546785, "grad_norm": 0.5346717645896237, "learning_rate": 3.644444444444445e-05, "loss": 0.6408, "step": 11783 }, { "epoch": 0.3440483489533152, "grad_norm": 0.565305615694806, "learning_rate": 3.6442822384428224e-05, "loss": 0.6226, "step": 11784 }, { "epoch": 0.34407754518116257, "grad_norm": 0.5418539078095966, "learning_rate": 3.6441200324412005e-05, "loss": 0.6801, "step": 11785 }, { "epoch": 0.34410674140900993, "grad_norm": 0.5762738192894857, "learning_rate": 3.643957826439579e-05, "loss": 0.6955, "step": 11786 }, { "epoch": 0.3441359376368573, "grad_norm": 0.5604813956577427, "learning_rate": 3.643795620437956e-05, "loss": 0.6664, "step": 11787 }, { "epoch": 0.34416513386470465, "grad_norm": 0.5506572613492147, "learning_rate": 3.6436334144363344e-05, "loss": 0.6671, "step": 11788 }, { "epoch": 0.34419433009255207, "grad_norm": 0.4718536706634237, "learning_rate": 3.6434712084347126e-05, "loss": 0.569, "step": 11789 }, { "epoch": 0.34422352632039943, "grad_norm": 0.5381908840499212, "learning_rate": 3.64330900243309e-05, "loss": 0.6117, "step": 11790 }, { "epoch": 0.3442527225482468, "grad_norm": 0.5429751580130856, "learning_rate": 3.643146796431468e-05, "loss": 0.6679, "step": 11791 }, { "epoch": 0.34428191877609415, "grad_norm": 0.521191358284963, "learning_rate": 3.642984590429846e-05, "loss": 0.6065, "step": 11792 }, { "epoch": 0.3443111150039415, "grad_norm": 0.5106862569676289, "learning_rate": 3.642822384428224e-05, "loss": 0.5813, "step": 11793 }, { "epoch": 0.3443403112317889, "grad_norm": 0.5317113309344793, "learning_rate": 3.642660178426602e-05, "loss": 0.6239, "step": 11794 }, { "epoch": 0.34436950745963624, "grad_norm": 0.5422751588683938, "learning_rate": 3.6424979724249796e-05, "loss": 0.6134, "step": 11795 }, { "epoch": 0.3443987036874836, "grad_norm": 0.5438695007578412, "learning_rate": 3.642335766423358e-05, "loss": 0.6206, "step": 11796 }, { "epoch": 0.34442789991533096, "grad_norm": 0.5743985871107677, "learning_rate": 3.642173560421735e-05, "loss": 0.675, "step": 11797 }, { "epoch": 0.3444570961431783, "grad_norm": 0.5115944907913538, "learning_rate": 3.642011354420114e-05, "loss": 0.5772, "step": 11798 }, { "epoch": 0.3444862923710257, "grad_norm": 0.5325033489248042, "learning_rate": 3.6418491484184916e-05, "loss": 0.5889, "step": 11799 }, { "epoch": 0.34451548859887304, "grad_norm": 0.5467830826319298, "learning_rate": 3.64168694241687e-05, "loss": 0.6929, "step": 11800 }, { "epoch": 0.3445446848267204, "grad_norm": 0.6153943335043625, "learning_rate": 3.641524736415248e-05, "loss": 0.674, "step": 11801 }, { "epoch": 0.34457388105456777, "grad_norm": 0.5880392788548685, "learning_rate": 3.6413625304136255e-05, "loss": 0.747, "step": 11802 }, { "epoch": 0.3446030772824151, "grad_norm": 0.660877092569265, "learning_rate": 3.6412003244120036e-05, "loss": 0.8458, "step": 11803 }, { "epoch": 0.3446322735102625, "grad_norm": 0.5684184548553942, "learning_rate": 3.641038118410381e-05, "loss": 0.6423, "step": 11804 }, { "epoch": 0.34466146973810985, "grad_norm": 0.576382316692164, "learning_rate": 3.640875912408759e-05, "loss": 0.6725, "step": 11805 }, { "epoch": 0.3446906659659572, "grad_norm": 0.5508796488687637, "learning_rate": 3.6407137064071375e-05, "loss": 0.6688, "step": 11806 }, { "epoch": 0.34471986219380457, "grad_norm": 0.5837913875521593, "learning_rate": 3.640551500405515e-05, "loss": 0.6913, "step": 11807 }, { "epoch": 0.34474905842165193, "grad_norm": 0.5690850377022033, "learning_rate": 3.640389294403893e-05, "loss": 0.6862, "step": 11808 }, { "epoch": 0.3447782546494993, "grad_norm": 0.4928164904550391, "learning_rate": 3.640227088402271e-05, "loss": 0.5404, "step": 11809 }, { "epoch": 0.34480745087734666, "grad_norm": 0.5366626614786536, "learning_rate": 3.640064882400649e-05, "loss": 0.5959, "step": 11810 }, { "epoch": 0.344836647105194, "grad_norm": 0.4963382666706203, "learning_rate": 3.639902676399027e-05, "loss": 0.5359, "step": 11811 }, { "epoch": 0.3448658433330414, "grad_norm": 0.649734103720294, "learning_rate": 3.6397404703974045e-05, "loss": 0.8207, "step": 11812 }, { "epoch": 0.34489503956088874, "grad_norm": 0.5818851176792427, "learning_rate": 3.639578264395783e-05, "loss": 0.6698, "step": 11813 }, { "epoch": 0.3449242357887361, "grad_norm": 0.5184087518546105, "learning_rate": 3.639416058394161e-05, "loss": 0.5948, "step": 11814 }, { "epoch": 0.34495343201658346, "grad_norm": 0.5451678824031169, "learning_rate": 3.6392538523925384e-05, "loss": 0.662, "step": 11815 }, { "epoch": 0.3449826282444308, "grad_norm": 0.5256771718864617, "learning_rate": 3.6390916463909165e-05, "loss": 0.6334, "step": 11816 }, { "epoch": 0.3450118244722782, "grad_norm": 0.5122293882832406, "learning_rate": 3.638929440389295e-05, "loss": 0.578, "step": 11817 }, { "epoch": 0.34504102070012554, "grad_norm": 0.5427547476923372, "learning_rate": 3.638767234387673e-05, "loss": 0.6417, "step": 11818 }, { "epoch": 0.3450702169279729, "grad_norm": 0.5828928655849249, "learning_rate": 3.6386050283860504e-05, "loss": 0.6292, "step": 11819 }, { "epoch": 0.34509941315582027, "grad_norm": 0.5042513149934278, "learning_rate": 3.6384428223844286e-05, "loss": 0.5793, "step": 11820 }, { "epoch": 0.34512860938366763, "grad_norm": 0.52922816487077, "learning_rate": 3.638280616382807e-05, "loss": 0.6281, "step": 11821 }, { "epoch": 0.345157805611515, "grad_norm": 0.5321250397364757, "learning_rate": 3.638118410381184e-05, "loss": 0.5999, "step": 11822 }, { "epoch": 0.34518700183936235, "grad_norm": 0.519060165570982, "learning_rate": 3.6379562043795624e-05, "loss": 0.6002, "step": 11823 }, { "epoch": 0.3452161980672097, "grad_norm": 0.5580426606576347, "learning_rate": 3.63779399837794e-05, "loss": 0.7161, "step": 11824 }, { "epoch": 0.3452453942950571, "grad_norm": 0.5217627660370544, "learning_rate": 3.637631792376318e-05, "loss": 0.6133, "step": 11825 }, { "epoch": 0.34527459052290443, "grad_norm": 0.5182456894866936, "learning_rate": 3.637469586374696e-05, "loss": 0.6382, "step": 11826 }, { "epoch": 0.3453037867507518, "grad_norm": 0.544465256922211, "learning_rate": 3.637307380373074e-05, "loss": 0.6714, "step": 11827 }, { "epoch": 0.34533298297859916, "grad_norm": 0.6086309733633056, "learning_rate": 3.637145174371452e-05, "loss": 0.7202, "step": 11828 }, { "epoch": 0.3453621792064465, "grad_norm": 0.5579046588060305, "learning_rate": 3.6369829683698294e-05, "loss": 0.7044, "step": 11829 }, { "epoch": 0.3453913754342939, "grad_norm": 0.5475707052655703, "learning_rate": 3.6368207623682076e-05, "loss": 0.6662, "step": 11830 }, { "epoch": 0.34542057166214124, "grad_norm": 0.5434494703420844, "learning_rate": 3.636658556366586e-05, "loss": 0.6319, "step": 11831 }, { "epoch": 0.3454497678899886, "grad_norm": 0.5860880597547079, "learning_rate": 3.636496350364963e-05, "loss": 0.7598, "step": 11832 }, { "epoch": 0.34547896411783596, "grad_norm": 0.5943824629957242, "learning_rate": 3.6363341443633415e-05, "loss": 0.629, "step": 11833 }, { "epoch": 0.3455081603456833, "grad_norm": 0.5281040805526271, "learning_rate": 3.6361719383617197e-05, "loss": 0.6096, "step": 11834 }, { "epoch": 0.3455373565735307, "grad_norm": 0.5410985429901662, "learning_rate": 3.636009732360097e-05, "loss": 0.6112, "step": 11835 }, { "epoch": 0.34556655280137805, "grad_norm": 0.5442977693113666, "learning_rate": 3.635847526358476e-05, "loss": 0.6269, "step": 11836 }, { "epoch": 0.3455957490292254, "grad_norm": 0.546141421880628, "learning_rate": 3.6356853203568535e-05, "loss": 0.6404, "step": 11837 }, { "epoch": 0.34562494525707277, "grad_norm": 0.5183313865037712, "learning_rate": 3.635523114355232e-05, "loss": 0.6307, "step": 11838 }, { "epoch": 0.34565414148492013, "grad_norm": 0.5372524362661492, "learning_rate": 3.635360908353609e-05, "loss": 0.6499, "step": 11839 }, { "epoch": 0.3456833377127675, "grad_norm": 0.5901254533475263, "learning_rate": 3.6351987023519874e-05, "loss": 0.692, "step": 11840 }, { "epoch": 0.34571253394061485, "grad_norm": 0.5177590065431836, "learning_rate": 3.6350364963503655e-05, "loss": 0.5593, "step": 11841 }, { "epoch": 0.3457417301684622, "grad_norm": 0.5308995630484894, "learning_rate": 3.634874290348743e-05, "loss": 0.6278, "step": 11842 }, { "epoch": 0.3457709263963096, "grad_norm": 0.599644982298583, "learning_rate": 3.634712084347121e-05, "loss": 0.6582, "step": 11843 }, { "epoch": 0.34580012262415694, "grad_norm": 0.5760974488490899, "learning_rate": 3.634549878345499e-05, "loss": 0.7295, "step": 11844 }, { "epoch": 0.3458293188520043, "grad_norm": 0.5787700747290898, "learning_rate": 3.634387672343877e-05, "loss": 0.6894, "step": 11845 }, { "epoch": 0.34585851507985166, "grad_norm": 0.572046685476483, "learning_rate": 3.634225466342255e-05, "loss": 0.7066, "step": 11846 }, { "epoch": 0.345887711307699, "grad_norm": 0.5276020923805265, "learning_rate": 3.6340632603406326e-05, "loss": 0.5694, "step": 11847 }, { "epoch": 0.3459169075355464, "grad_norm": 0.6025315801667895, "learning_rate": 3.633901054339011e-05, "loss": 0.7175, "step": 11848 }, { "epoch": 0.34594610376339374, "grad_norm": 0.5863119103815525, "learning_rate": 3.633738848337388e-05, "loss": 0.7463, "step": 11849 }, { "epoch": 0.34597529999124116, "grad_norm": 0.5347279589026369, "learning_rate": 3.6335766423357664e-05, "loss": 0.6331, "step": 11850 }, { "epoch": 0.3460044962190885, "grad_norm": 0.5126119819597901, "learning_rate": 3.6334144363341446e-05, "loss": 0.5554, "step": 11851 }, { "epoch": 0.3460336924469359, "grad_norm": 0.510361547379664, "learning_rate": 3.633252230332522e-05, "loss": 0.5723, "step": 11852 }, { "epoch": 0.34606288867478324, "grad_norm": 0.5649865581351118, "learning_rate": 3.6330900243309e-05, "loss": 0.6333, "step": 11853 }, { "epoch": 0.3460920849026306, "grad_norm": 0.5434930026530828, "learning_rate": 3.632927818329278e-05, "loss": 0.6271, "step": 11854 }, { "epoch": 0.34612128113047796, "grad_norm": 0.5599536757975082, "learning_rate": 3.6327656123276566e-05, "loss": 0.6685, "step": 11855 }, { "epoch": 0.3461504773583253, "grad_norm": 0.5261471816531418, "learning_rate": 3.632603406326035e-05, "loss": 0.597, "step": 11856 }, { "epoch": 0.3461796735861727, "grad_norm": 0.5883370670510804, "learning_rate": 3.632441200324412e-05, "loss": 0.6473, "step": 11857 }, { "epoch": 0.34620886981402005, "grad_norm": 0.5268126060901371, "learning_rate": 3.6322789943227905e-05, "loss": 0.5948, "step": 11858 }, { "epoch": 0.3462380660418674, "grad_norm": 0.505165087985953, "learning_rate": 3.632116788321168e-05, "loss": 0.5833, "step": 11859 }, { "epoch": 0.34626726226971477, "grad_norm": 0.541281556201944, "learning_rate": 3.631954582319546e-05, "loss": 0.6188, "step": 11860 }, { "epoch": 0.34629645849756213, "grad_norm": 0.5045155987530013, "learning_rate": 3.631792376317924e-05, "loss": 0.57, "step": 11861 }, { "epoch": 0.3463256547254095, "grad_norm": 0.5210997482297405, "learning_rate": 3.631630170316302e-05, "loss": 0.5898, "step": 11862 }, { "epoch": 0.34635485095325685, "grad_norm": 0.6462348962774723, "learning_rate": 3.63146796431468e-05, "loss": 0.5945, "step": 11863 }, { "epoch": 0.3463840471811042, "grad_norm": 0.568513622154447, "learning_rate": 3.6313057583130575e-05, "loss": 0.671, "step": 11864 }, { "epoch": 0.3464132434089516, "grad_norm": 0.5311595092610163, "learning_rate": 3.6311435523114357e-05, "loss": 0.6463, "step": 11865 }, { "epoch": 0.34644243963679894, "grad_norm": 0.528497668607871, "learning_rate": 3.630981346309814e-05, "loss": 0.6103, "step": 11866 }, { "epoch": 0.3464716358646463, "grad_norm": 0.5454790812644983, "learning_rate": 3.630819140308191e-05, "loss": 0.611, "step": 11867 }, { "epoch": 0.34650083209249366, "grad_norm": 0.5448912295210537, "learning_rate": 3.6306569343065695e-05, "loss": 0.6586, "step": 11868 }, { "epoch": 0.346530028320341, "grad_norm": 0.5182876867355172, "learning_rate": 3.630494728304947e-05, "loss": 0.6092, "step": 11869 }, { "epoch": 0.3465592245481884, "grad_norm": 0.5525568931926981, "learning_rate": 3.630332522303325e-05, "loss": 0.6667, "step": 11870 }, { "epoch": 0.34658842077603574, "grad_norm": 0.5185044606011304, "learning_rate": 3.6301703163017034e-05, "loss": 0.5972, "step": 11871 }, { "epoch": 0.3466176170038831, "grad_norm": 0.5286771407561074, "learning_rate": 3.630008110300081e-05, "loss": 0.5872, "step": 11872 }, { "epoch": 0.34664681323173047, "grad_norm": 0.521785190770534, "learning_rate": 3.629845904298459e-05, "loss": 0.6127, "step": 11873 }, { "epoch": 0.3466760094595778, "grad_norm": 0.5158049441614337, "learning_rate": 3.629683698296837e-05, "loss": 0.5813, "step": 11874 }, { "epoch": 0.3467052056874252, "grad_norm": 0.6055624994077287, "learning_rate": 3.6295214922952154e-05, "loss": 0.6169, "step": 11875 }, { "epoch": 0.34673440191527255, "grad_norm": 0.5136558060324117, "learning_rate": 3.6293592862935936e-05, "loss": 0.5478, "step": 11876 }, { "epoch": 0.3467635981431199, "grad_norm": 0.5036901593826673, "learning_rate": 3.629197080291971e-05, "loss": 0.5914, "step": 11877 }, { "epoch": 0.34679279437096727, "grad_norm": 0.586412787709479, "learning_rate": 3.629034874290349e-05, "loss": 0.7436, "step": 11878 }, { "epoch": 0.34682199059881463, "grad_norm": 0.5528624175247558, "learning_rate": 3.628872668288727e-05, "loss": 0.6708, "step": 11879 }, { "epoch": 0.346851186826662, "grad_norm": 0.5607144842310609, "learning_rate": 3.628710462287105e-05, "loss": 0.6563, "step": 11880 }, { "epoch": 0.34688038305450936, "grad_norm": 0.5663058012473776, "learning_rate": 3.628548256285483e-05, "loss": 0.7366, "step": 11881 }, { "epoch": 0.3469095792823567, "grad_norm": 0.5764301362060855, "learning_rate": 3.6283860502838606e-05, "loss": 0.6429, "step": 11882 }, { "epoch": 0.3469387755102041, "grad_norm": 0.5089112553691939, "learning_rate": 3.628223844282239e-05, "loss": 0.5812, "step": 11883 }, { "epoch": 0.34696797173805144, "grad_norm": 0.5532978033131762, "learning_rate": 3.628061638280616e-05, "loss": 0.7266, "step": 11884 }, { "epoch": 0.3469971679658988, "grad_norm": 0.5134436167023042, "learning_rate": 3.6278994322789944e-05, "loss": 0.5831, "step": 11885 }, { "epoch": 0.34702636419374616, "grad_norm": 0.5073302899200803, "learning_rate": 3.6277372262773726e-05, "loss": 0.5795, "step": 11886 }, { "epoch": 0.3470555604215935, "grad_norm": 0.5502006415385472, "learning_rate": 3.62757502027575e-05, "loss": 0.6588, "step": 11887 }, { "epoch": 0.3470847566494409, "grad_norm": 0.511473188871231, "learning_rate": 3.627412814274128e-05, "loss": 0.5915, "step": 11888 }, { "epoch": 0.34711395287728825, "grad_norm": 0.5307240332716189, "learning_rate": 3.627250608272506e-05, "loss": 0.6331, "step": 11889 }, { "epoch": 0.3471431491051356, "grad_norm": 0.5492557703763069, "learning_rate": 3.627088402270884e-05, "loss": 0.6297, "step": 11890 }, { "epoch": 0.34717234533298297, "grad_norm": 0.548142584199985, "learning_rate": 3.626926196269262e-05, "loss": 0.666, "step": 11891 }, { "epoch": 0.34720154156083033, "grad_norm": 0.533437058410839, "learning_rate": 3.6267639902676396e-05, "loss": 0.6362, "step": 11892 }, { "epoch": 0.3472307377886777, "grad_norm": 0.6826163287186988, "learning_rate": 3.6266017842660185e-05, "loss": 0.7456, "step": 11893 }, { "epoch": 0.34725993401652505, "grad_norm": 0.494064059302656, "learning_rate": 3.626439578264396e-05, "loss": 0.5411, "step": 11894 }, { "epoch": 0.3472891302443724, "grad_norm": 0.5329689705828533, "learning_rate": 3.626277372262774e-05, "loss": 0.6613, "step": 11895 }, { "epoch": 0.3473183264722198, "grad_norm": 0.5399252865791613, "learning_rate": 3.626115166261152e-05, "loss": 0.6156, "step": 11896 }, { "epoch": 0.34734752270006714, "grad_norm": 0.5605935442383421, "learning_rate": 3.62595296025953e-05, "loss": 0.6872, "step": 11897 }, { "epoch": 0.3473767189279145, "grad_norm": 0.47857806170048856, "learning_rate": 3.625790754257908e-05, "loss": 0.5104, "step": 11898 }, { "epoch": 0.34740591515576186, "grad_norm": 0.5317059372610782, "learning_rate": 3.6256285482562855e-05, "loss": 0.6209, "step": 11899 }, { "epoch": 0.3474351113836092, "grad_norm": 0.5823633514524218, "learning_rate": 3.625466342254664e-05, "loss": 0.6957, "step": 11900 }, { "epoch": 0.3474643076114566, "grad_norm": 0.5393057976725125, "learning_rate": 3.625304136253042e-05, "loss": 0.6339, "step": 11901 }, { "epoch": 0.34749350383930394, "grad_norm": 0.535811926018321, "learning_rate": 3.6251419302514194e-05, "loss": 0.6885, "step": 11902 }, { "epoch": 0.3475227000671513, "grad_norm": 0.4658585058619126, "learning_rate": 3.6249797242497975e-05, "loss": 0.4817, "step": 11903 }, { "epoch": 0.34755189629499866, "grad_norm": 0.5217991082489836, "learning_rate": 3.624817518248175e-05, "loss": 0.6581, "step": 11904 }, { "epoch": 0.347581092522846, "grad_norm": 0.5395239167844894, "learning_rate": 3.624655312246553e-05, "loss": 0.6582, "step": 11905 }, { "epoch": 0.3476102887506934, "grad_norm": 0.5610339377645434, "learning_rate": 3.6244931062449314e-05, "loss": 0.7119, "step": 11906 }, { "epoch": 0.34763948497854075, "grad_norm": 0.5608218101859399, "learning_rate": 3.624330900243309e-05, "loss": 0.6662, "step": 11907 }, { "epoch": 0.3476686812063881, "grad_norm": 0.5014529611595567, "learning_rate": 3.624168694241687e-05, "loss": 0.567, "step": 11908 }, { "epoch": 0.34769787743423547, "grad_norm": 0.49376115182130237, "learning_rate": 3.6240064882400646e-05, "loss": 0.5672, "step": 11909 }, { "epoch": 0.3477270736620829, "grad_norm": 0.5101828476460148, "learning_rate": 3.623844282238443e-05, "loss": 0.6035, "step": 11910 }, { "epoch": 0.34775626988993025, "grad_norm": 0.5405435903125875, "learning_rate": 3.623682076236821e-05, "loss": 0.6135, "step": 11911 }, { "epoch": 0.3477854661177776, "grad_norm": 0.5718348471131705, "learning_rate": 3.623519870235199e-05, "loss": 0.6742, "step": 11912 }, { "epoch": 0.34781466234562497, "grad_norm": 0.5228608776439083, "learning_rate": 3.623357664233577e-05, "loss": 0.5914, "step": 11913 }, { "epoch": 0.34784385857347233, "grad_norm": 0.5440695551309997, "learning_rate": 3.623195458231955e-05, "loss": 0.6426, "step": 11914 }, { "epoch": 0.3478730548013197, "grad_norm": 0.5455711388274402, "learning_rate": 3.623033252230333e-05, "loss": 0.6348, "step": 11915 }, { "epoch": 0.34790225102916705, "grad_norm": 0.5404758752679921, "learning_rate": 3.6228710462287104e-05, "loss": 0.6326, "step": 11916 }, { "epoch": 0.3479314472570144, "grad_norm": 0.5321181989990887, "learning_rate": 3.6227088402270886e-05, "loss": 0.6202, "step": 11917 }, { "epoch": 0.3479606434848618, "grad_norm": 0.5925010461405356, "learning_rate": 3.622546634225467e-05, "loss": 0.6515, "step": 11918 }, { "epoch": 0.34798983971270914, "grad_norm": 0.5111958147038207, "learning_rate": 3.622384428223844e-05, "loss": 0.5834, "step": 11919 }, { "epoch": 0.3480190359405565, "grad_norm": 0.5003715057883882, "learning_rate": 3.6222222222222225e-05, "loss": 0.5702, "step": 11920 }, { "epoch": 0.34804823216840386, "grad_norm": 0.6667707395129538, "learning_rate": 3.6220600162206007e-05, "loss": 0.667, "step": 11921 }, { "epoch": 0.3480774283962512, "grad_norm": 0.5428159169384239, "learning_rate": 3.621897810218978e-05, "loss": 0.6603, "step": 11922 }, { "epoch": 0.3481066246240986, "grad_norm": 0.5226666337400402, "learning_rate": 3.621735604217356e-05, "loss": 0.5848, "step": 11923 }, { "epoch": 0.34813582085194594, "grad_norm": 0.5511763477314336, "learning_rate": 3.621573398215734e-05, "loss": 0.6577, "step": 11924 }, { "epoch": 0.3481650170797933, "grad_norm": 0.5467728087474523, "learning_rate": 3.621411192214112e-05, "loss": 0.6193, "step": 11925 }, { "epoch": 0.34819421330764067, "grad_norm": 0.5003223794567911, "learning_rate": 3.62124898621249e-05, "loss": 0.5745, "step": 11926 }, { "epoch": 0.348223409535488, "grad_norm": 0.558003928026246, "learning_rate": 3.621086780210868e-05, "loss": 0.6453, "step": 11927 }, { "epoch": 0.3482526057633354, "grad_norm": 0.5177877229294529, "learning_rate": 3.620924574209246e-05, "loss": 0.6051, "step": 11928 }, { "epoch": 0.34828180199118275, "grad_norm": 0.5061759874027039, "learning_rate": 3.6207623682076234e-05, "loss": 0.5857, "step": 11929 }, { "epoch": 0.3483109982190301, "grad_norm": 0.6206752154439282, "learning_rate": 3.620600162206002e-05, "loss": 0.6279, "step": 11930 }, { "epoch": 0.34834019444687747, "grad_norm": 0.601142235785097, "learning_rate": 3.62043795620438e-05, "loss": 0.6886, "step": 11931 }, { "epoch": 0.34836939067472483, "grad_norm": 0.5249313448009008, "learning_rate": 3.620275750202758e-05, "loss": 0.622, "step": 11932 }, { "epoch": 0.3483985869025722, "grad_norm": 0.5640988817890559, "learning_rate": 3.620113544201136e-05, "loss": 0.6735, "step": 11933 }, { "epoch": 0.34842778313041955, "grad_norm": 0.5203186137679917, "learning_rate": 3.6199513381995136e-05, "loss": 0.6263, "step": 11934 }, { "epoch": 0.3484569793582669, "grad_norm": 0.5499837989577043, "learning_rate": 3.619789132197892e-05, "loss": 0.6814, "step": 11935 }, { "epoch": 0.3484861755861143, "grad_norm": 0.5545680111872786, "learning_rate": 3.619626926196269e-05, "loss": 0.6932, "step": 11936 }, { "epoch": 0.34851537181396164, "grad_norm": 0.5233943574923728, "learning_rate": 3.6194647201946474e-05, "loss": 0.5808, "step": 11937 }, { "epoch": 0.348544568041809, "grad_norm": 0.5140989059519768, "learning_rate": 3.6193025141930256e-05, "loss": 0.559, "step": 11938 }, { "epoch": 0.34857376426965636, "grad_norm": 0.523056978409481, "learning_rate": 3.619140308191403e-05, "loss": 0.6002, "step": 11939 }, { "epoch": 0.3486029604975037, "grad_norm": 0.48268195997396524, "learning_rate": 3.618978102189781e-05, "loss": 0.5308, "step": 11940 }, { "epoch": 0.3486321567253511, "grad_norm": 0.5205754161816196, "learning_rate": 3.618815896188159e-05, "loss": 0.5607, "step": 11941 }, { "epoch": 0.34866135295319844, "grad_norm": 0.570400039584699, "learning_rate": 3.618653690186537e-05, "loss": 0.7092, "step": 11942 }, { "epoch": 0.3486905491810458, "grad_norm": 0.5490739051332411, "learning_rate": 3.618491484184915e-05, "loss": 0.6368, "step": 11943 }, { "epoch": 0.34871974540889317, "grad_norm": 0.5642010322397165, "learning_rate": 3.6183292781832926e-05, "loss": 0.6911, "step": 11944 }, { "epoch": 0.34874894163674053, "grad_norm": 0.541368230913478, "learning_rate": 3.618167072181671e-05, "loss": 0.6335, "step": 11945 }, { "epoch": 0.3487781378645879, "grad_norm": 0.5541061032394973, "learning_rate": 3.618004866180049e-05, "loss": 0.6645, "step": 11946 }, { "epoch": 0.34880733409243525, "grad_norm": 0.5239066130341751, "learning_rate": 3.6178426601784265e-05, "loss": 0.5868, "step": 11947 }, { "epoch": 0.3488365303202826, "grad_norm": 0.5478043503556216, "learning_rate": 3.6176804541768046e-05, "loss": 0.6553, "step": 11948 }, { "epoch": 0.34886572654813, "grad_norm": 0.5400098265329438, "learning_rate": 3.617518248175183e-05, "loss": 0.6155, "step": 11949 }, { "epoch": 0.34889492277597733, "grad_norm": 0.49233676774037227, "learning_rate": 3.617356042173561e-05, "loss": 0.5357, "step": 11950 }, { "epoch": 0.3489241190038247, "grad_norm": 0.5812101972601567, "learning_rate": 3.6171938361719385e-05, "loss": 0.6594, "step": 11951 }, { "epoch": 0.34895331523167206, "grad_norm": 0.5538528007653549, "learning_rate": 3.617031630170317e-05, "loss": 0.6521, "step": 11952 }, { "epoch": 0.3489825114595194, "grad_norm": 0.52957258459567, "learning_rate": 3.616869424168695e-05, "loss": 0.6324, "step": 11953 }, { "epoch": 0.3490117076873668, "grad_norm": 0.5497195820428238, "learning_rate": 3.616707218167072e-05, "loss": 0.6525, "step": 11954 }, { "epoch": 0.34904090391521414, "grad_norm": 0.5485789866132049, "learning_rate": 3.6165450121654505e-05, "loss": 0.6369, "step": 11955 }, { "epoch": 0.3490701001430615, "grad_norm": 0.5126977937855511, "learning_rate": 3.616382806163828e-05, "loss": 0.574, "step": 11956 }, { "epoch": 0.34909929637090886, "grad_norm": 0.5725272842571978, "learning_rate": 3.616220600162206e-05, "loss": 0.679, "step": 11957 }, { "epoch": 0.3491284925987562, "grad_norm": 0.5210156021051869, "learning_rate": 3.6160583941605844e-05, "loss": 0.5996, "step": 11958 }, { "epoch": 0.3491576888266036, "grad_norm": 0.5932862516398605, "learning_rate": 3.615896188158962e-05, "loss": 0.7471, "step": 11959 }, { "epoch": 0.34918688505445095, "grad_norm": 0.551464463405121, "learning_rate": 3.61573398215734e-05, "loss": 0.6421, "step": 11960 }, { "epoch": 0.3492160812822983, "grad_norm": 0.49601203492049983, "learning_rate": 3.6155717761557175e-05, "loss": 0.5507, "step": 11961 }, { "epoch": 0.34924527751014567, "grad_norm": 0.5378866672136506, "learning_rate": 3.615409570154096e-05, "loss": 0.6304, "step": 11962 }, { "epoch": 0.34927447373799303, "grad_norm": 0.5224961983147708, "learning_rate": 3.615247364152474e-05, "loss": 0.5663, "step": 11963 }, { "epoch": 0.3493036699658404, "grad_norm": 0.5371358006074578, "learning_rate": 3.6150851581508514e-05, "loss": 0.6326, "step": 11964 }, { "epoch": 0.34933286619368775, "grad_norm": 0.5444628560862549, "learning_rate": 3.6149229521492296e-05, "loss": 0.6261, "step": 11965 }, { "epoch": 0.3493620624215351, "grad_norm": 0.5725987707064484, "learning_rate": 3.614760746147608e-05, "loss": 0.654, "step": 11966 }, { "epoch": 0.3493912586493825, "grad_norm": 0.5882572352045737, "learning_rate": 3.614598540145985e-05, "loss": 0.6661, "step": 11967 }, { "epoch": 0.34942045487722984, "grad_norm": 0.5134234598957048, "learning_rate": 3.614436334144364e-05, "loss": 0.6064, "step": 11968 }, { "epoch": 0.3494496511050772, "grad_norm": 0.4958078436312921, "learning_rate": 3.6142741281427416e-05, "loss": 0.5822, "step": 11969 }, { "epoch": 0.3494788473329246, "grad_norm": 0.541479631986109, "learning_rate": 3.61411192214112e-05, "loss": 0.6162, "step": 11970 }, { "epoch": 0.349508043560772, "grad_norm": 0.5582899100199883, "learning_rate": 3.613949716139497e-05, "loss": 0.6688, "step": 11971 }, { "epoch": 0.34953723978861934, "grad_norm": 0.5872910995456387, "learning_rate": 3.6137875101378754e-05, "loss": 0.68, "step": 11972 }, { "epoch": 0.3495664360164667, "grad_norm": 0.6789554644893703, "learning_rate": 3.6136253041362536e-05, "loss": 0.6607, "step": 11973 }, { "epoch": 0.34959563224431406, "grad_norm": 0.5592673080283563, "learning_rate": 3.613463098134631e-05, "loss": 0.6468, "step": 11974 }, { "epoch": 0.3496248284721614, "grad_norm": 0.5656772877140608, "learning_rate": 3.613300892133009e-05, "loss": 0.6559, "step": 11975 }, { "epoch": 0.3496540247000088, "grad_norm": 0.4949038082070151, "learning_rate": 3.613138686131387e-05, "loss": 0.5205, "step": 11976 }, { "epoch": 0.34968322092785614, "grad_norm": 0.5323087727939343, "learning_rate": 3.612976480129765e-05, "loss": 0.6148, "step": 11977 }, { "epoch": 0.3497124171557035, "grad_norm": 0.5302909700710944, "learning_rate": 3.612814274128143e-05, "loss": 0.6529, "step": 11978 }, { "epoch": 0.34974161338355086, "grad_norm": 0.5526218855005333, "learning_rate": 3.6126520681265206e-05, "loss": 0.6585, "step": 11979 }, { "epoch": 0.3497708096113982, "grad_norm": 0.5290071461279018, "learning_rate": 3.612489862124899e-05, "loss": 0.6671, "step": 11980 }, { "epoch": 0.3498000058392456, "grad_norm": 0.5326407552717657, "learning_rate": 3.612327656123276e-05, "loss": 0.6372, "step": 11981 }, { "epoch": 0.34982920206709295, "grad_norm": 0.590802311080496, "learning_rate": 3.6121654501216545e-05, "loss": 0.6914, "step": 11982 }, { "epoch": 0.3498583982949403, "grad_norm": 0.5426073762925708, "learning_rate": 3.612003244120033e-05, "loss": 0.5841, "step": 11983 }, { "epoch": 0.34988759452278767, "grad_norm": 0.5490866369492435, "learning_rate": 3.61184103811841e-05, "loss": 0.6371, "step": 11984 }, { "epoch": 0.34991679075063503, "grad_norm": 0.5520966725691406, "learning_rate": 3.6116788321167883e-05, "loss": 0.6355, "step": 11985 }, { "epoch": 0.3499459869784824, "grad_norm": 0.5364511124061696, "learning_rate": 3.611516626115166e-05, "loss": 0.6497, "step": 11986 }, { "epoch": 0.34997518320632975, "grad_norm": 0.4945020579944093, "learning_rate": 3.611354420113545e-05, "loss": 0.5678, "step": 11987 }, { "epoch": 0.3500043794341771, "grad_norm": 0.5476876129503544, "learning_rate": 3.611192214111923e-05, "loss": 0.6514, "step": 11988 }, { "epoch": 0.3500335756620245, "grad_norm": 0.5410224649579608, "learning_rate": 3.6110300081103004e-05, "loss": 0.6241, "step": 11989 }, { "epoch": 0.35006277188987184, "grad_norm": 0.5169271382841846, "learning_rate": 3.6108678021086785e-05, "loss": 0.5953, "step": 11990 }, { "epoch": 0.3500919681177192, "grad_norm": 0.5876184972937177, "learning_rate": 3.610705596107056e-05, "loss": 0.67, "step": 11991 }, { "epoch": 0.35012116434556656, "grad_norm": 0.5435425955525793, "learning_rate": 3.610543390105434e-05, "loss": 0.7182, "step": 11992 }, { "epoch": 0.3501503605734139, "grad_norm": 0.5398611849996158, "learning_rate": 3.6103811841038124e-05, "loss": 0.6335, "step": 11993 }, { "epoch": 0.3501795568012613, "grad_norm": 0.5312755153856001, "learning_rate": 3.61021897810219e-05, "loss": 0.6597, "step": 11994 }, { "epoch": 0.35020875302910864, "grad_norm": 0.5338478477927956, "learning_rate": 3.610056772100568e-05, "loss": 0.6508, "step": 11995 }, { "epoch": 0.350237949256956, "grad_norm": 0.5251269519356, "learning_rate": 3.6098945660989456e-05, "loss": 0.6107, "step": 11996 }, { "epoch": 0.35026714548480337, "grad_norm": 0.5541291671288254, "learning_rate": 3.609732360097324e-05, "loss": 0.6427, "step": 11997 }, { "epoch": 0.3502963417126507, "grad_norm": 0.5224139636440596, "learning_rate": 3.609570154095702e-05, "loss": 0.6131, "step": 11998 }, { "epoch": 0.3503255379404981, "grad_norm": 0.4967778876196009, "learning_rate": 3.6094079480940794e-05, "loss": 0.564, "step": 11999 }, { "epoch": 0.35035473416834545, "grad_norm": 0.5097423905380221, "learning_rate": 3.6092457420924576e-05, "loss": 0.6086, "step": 12000 }, { "epoch": 0.3503839303961928, "grad_norm": 0.5499246361400599, "learning_rate": 3.609083536090835e-05, "loss": 0.651, "step": 12001 }, { "epoch": 0.35041312662404017, "grad_norm": 0.5780941643443578, "learning_rate": 3.608921330089213e-05, "loss": 0.7062, "step": 12002 }, { "epoch": 0.35044232285188753, "grad_norm": 0.5407153672267111, "learning_rate": 3.6087591240875915e-05, "loss": 0.6555, "step": 12003 }, { "epoch": 0.3504715190797349, "grad_norm": 0.5206030895830985, "learning_rate": 3.608596918085969e-05, "loss": 0.6364, "step": 12004 }, { "epoch": 0.35050071530758226, "grad_norm": 0.5296530006891387, "learning_rate": 3.608434712084347e-05, "loss": 0.6075, "step": 12005 }, { "epoch": 0.3505299115354296, "grad_norm": 0.4816403913938296, "learning_rate": 3.608272506082725e-05, "loss": 0.5367, "step": 12006 }, { "epoch": 0.350559107763277, "grad_norm": 0.5421911673900139, "learning_rate": 3.6081103000811035e-05, "loss": 0.6238, "step": 12007 }, { "epoch": 0.35058830399112434, "grad_norm": 0.5593153873241071, "learning_rate": 3.607948094079481e-05, "loss": 0.679, "step": 12008 }, { "epoch": 0.3506175002189717, "grad_norm": 0.5273080063669612, "learning_rate": 3.607785888077859e-05, "loss": 0.6393, "step": 12009 }, { "epoch": 0.35064669644681906, "grad_norm": 0.5344992059449679, "learning_rate": 3.607623682076237e-05, "loss": 0.6225, "step": 12010 }, { "epoch": 0.3506758926746664, "grad_norm": 0.5512990726845145, "learning_rate": 3.607461476074615e-05, "loss": 0.6801, "step": 12011 }, { "epoch": 0.3507050889025138, "grad_norm": 0.5343576817643975, "learning_rate": 3.607299270072993e-05, "loss": 0.6559, "step": 12012 }, { "epoch": 0.35073428513036115, "grad_norm": 0.4883304507656347, "learning_rate": 3.607137064071371e-05, "loss": 0.5819, "step": 12013 }, { "epoch": 0.3507634813582085, "grad_norm": 0.5292833554296617, "learning_rate": 3.606974858069749e-05, "loss": 0.6524, "step": 12014 }, { "epoch": 0.35079267758605587, "grad_norm": 0.5327383397979197, "learning_rate": 3.606812652068127e-05, "loss": 0.6313, "step": 12015 }, { "epoch": 0.35082187381390323, "grad_norm": 0.5317174272289945, "learning_rate": 3.6066504460665044e-05, "loss": 0.6357, "step": 12016 }, { "epoch": 0.3508510700417506, "grad_norm": 0.512439352122265, "learning_rate": 3.6064882400648825e-05, "loss": 0.5843, "step": 12017 }, { "epoch": 0.35088026626959795, "grad_norm": 0.5074306686042854, "learning_rate": 3.606326034063261e-05, "loss": 0.5904, "step": 12018 }, { "epoch": 0.3509094624974453, "grad_norm": 0.5396788699002532, "learning_rate": 3.606163828061638e-05, "loss": 0.6186, "step": 12019 }, { "epoch": 0.3509386587252927, "grad_norm": 0.5270468096248216, "learning_rate": 3.6060016220600164e-05, "loss": 0.5957, "step": 12020 }, { "epoch": 0.35096785495314003, "grad_norm": 0.6101887153053318, "learning_rate": 3.605839416058394e-05, "loss": 0.7073, "step": 12021 }, { "epoch": 0.3509970511809874, "grad_norm": 0.531545328163061, "learning_rate": 3.605677210056772e-05, "loss": 0.6409, "step": 12022 }, { "epoch": 0.35102624740883476, "grad_norm": 0.5313666949995386, "learning_rate": 3.60551500405515e-05, "loss": 0.566, "step": 12023 }, { "epoch": 0.3510554436366821, "grad_norm": 0.5843469501966787, "learning_rate": 3.605352798053528e-05, "loss": 0.7286, "step": 12024 }, { "epoch": 0.3510846398645295, "grad_norm": 0.5872315069517001, "learning_rate": 3.6051905920519066e-05, "loss": 0.6736, "step": 12025 }, { "epoch": 0.35111383609237684, "grad_norm": 0.5454518161631555, "learning_rate": 3.605028386050284e-05, "loss": 0.6743, "step": 12026 }, { "epoch": 0.3511430323202242, "grad_norm": 0.5522180563165714, "learning_rate": 3.604866180048662e-05, "loss": 0.622, "step": 12027 }, { "epoch": 0.35117222854807156, "grad_norm": 0.5356452913922733, "learning_rate": 3.60470397404704e-05, "loss": 0.6214, "step": 12028 }, { "epoch": 0.3512014247759189, "grad_norm": 0.5994375347431309, "learning_rate": 3.604541768045418e-05, "loss": 0.7373, "step": 12029 }, { "epoch": 0.3512306210037663, "grad_norm": 0.5480620534015761, "learning_rate": 3.604379562043796e-05, "loss": 0.6379, "step": 12030 }, { "epoch": 0.3512598172316137, "grad_norm": 0.5629541164335168, "learning_rate": 3.6042173560421736e-05, "loss": 0.6895, "step": 12031 }, { "epoch": 0.35128901345946106, "grad_norm": 0.5268005008956299, "learning_rate": 3.604055150040552e-05, "loss": 0.6408, "step": 12032 }, { "epoch": 0.3513182096873084, "grad_norm": 0.596803778448212, "learning_rate": 3.60389294403893e-05, "loss": 0.7274, "step": 12033 }, { "epoch": 0.3513474059151558, "grad_norm": 0.6027158809861368, "learning_rate": 3.6037307380373075e-05, "loss": 0.6949, "step": 12034 }, { "epoch": 0.35137660214300315, "grad_norm": 0.4894255889671079, "learning_rate": 3.6035685320356856e-05, "loss": 0.5445, "step": 12035 }, { "epoch": 0.3514057983708505, "grad_norm": 0.5495512193672245, "learning_rate": 3.603406326034063e-05, "loss": 0.6313, "step": 12036 }, { "epoch": 0.35143499459869787, "grad_norm": 0.5599262361859575, "learning_rate": 3.603244120032441e-05, "loss": 0.6752, "step": 12037 }, { "epoch": 0.35146419082654523, "grad_norm": 0.5335565828356855, "learning_rate": 3.6030819140308195e-05, "loss": 0.666, "step": 12038 }, { "epoch": 0.3514933870543926, "grad_norm": 0.4965569527863839, "learning_rate": 3.602919708029197e-05, "loss": 0.5737, "step": 12039 }, { "epoch": 0.35152258328223995, "grad_norm": 0.5096607183176411, "learning_rate": 3.602757502027575e-05, "loss": 0.5874, "step": 12040 }, { "epoch": 0.3515517795100873, "grad_norm": 0.5946326565065182, "learning_rate": 3.6025952960259527e-05, "loss": 0.7127, "step": 12041 }, { "epoch": 0.3515809757379347, "grad_norm": 0.541514265245995, "learning_rate": 3.602433090024331e-05, "loss": 0.6648, "step": 12042 }, { "epoch": 0.35161017196578204, "grad_norm": 0.5172465688776722, "learning_rate": 3.602270884022709e-05, "loss": 0.6356, "step": 12043 }, { "epoch": 0.3516393681936294, "grad_norm": 0.6060109797105879, "learning_rate": 3.602108678021087e-05, "loss": 0.665, "step": 12044 }, { "epoch": 0.35166856442147676, "grad_norm": 0.6300734387122754, "learning_rate": 3.6019464720194654e-05, "loss": 0.6603, "step": 12045 }, { "epoch": 0.3516977606493241, "grad_norm": 0.5485800590992821, "learning_rate": 3.601784266017843e-05, "loss": 0.6823, "step": 12046 }, { "epoch": 0.3517269568771715, "grad_norm": 0.5541920696521653, "learning_rate": 3.601622060016221e-05, "loss": 0.6884, "step": 12047 }, { "epoch": 0.35175615310501884, "grad_norm": 0.5373620356756386, "learning_rate": 3.6014598540145985e-05, "loss": 0.6107, "step": 12048 }, { "epoch": 0.3517853493328662, "grad_norm": 0.5107210526862405, "learning_rate": 3.601297648012977e-05, "loss": 0.5848, "step": 12049 }, { "epoch": 0.35181454556071357, "grad_norm": 0.5808709130322843, "learning_rate": 3.601135442011355e-05, "loss": 0.6872, "step": 12050 }, { "epoch": 0.3518437417885609, "grad_norm": 0.575250431525666, "learning_rate": 3.6009732360097324e-05, "loss": 0.6813, "step": 12051 }, { "epoch": 0.3518729380164083, "grad_norm": 0.5387674374194812, "learning_rate": 3.6008110300081106e-05, "loss": 0.55, "step": 12052 }, { "epoch": 0.35190213424425565, "grad_norm": 0.5192075450051418, "learning_rate": 3.600648824006488e-05, "loss": 0.6224, "step": 12053 }, { "epoch": 0.351931330472103, "grad_norm": 0.6478964608152215, "learning_rate": 3.600486618004866e-05, "loss": 0.5774, "step": 12054 }, { "epoch": 0.35196052669995037, "grad_norm": 0.5288590368622812, "learning_rate": 3.6003244120032444e-05, "loss": 0.6371, "step": 12055 }, { "epoch": 0.35198972292779773, "grad_norm": 0.6554558670246619, "learning_rate": 3.600162206001622e-05, "loss": 0.5877, "step": 12056 }, { "epoch": 0.3520189191556451, "grad_norm": 0.5255481835319896, "learning_rate": 3.6e-05, "loss": 0.5667, "step": 12057 }, { "epoch": 0.35204811538349245, "grad_norm": 0.5648696938745751, "learning_rate": 3.599837793998378e-05, "loss": 0.6464, "step": 12058 }, { "epoch": 0.3520773116113398, "grad_norm": 0.52362627848529, "learning_rate": 3.599675587996756e-05, "loss": 0.6004, "step": 12059 }, { "epoch": 0.3521065078391872, "grad_norm": 0.4961004603641569, "learning_rate": 3.599513381995134e-05, "loss": 0.536, "step": 12060 }, { "epoch": 0.35213570406703454, "grad_norm": 0.5446646795801323, "learning_rate": 3.5993511759935114e-05, "loss": 0.6171, "step": 12061 }, { "epoch": 0.3521649002948819, "grad_norm": 0.5505870019278285, "learning_rate": 3.59918896999189e-05, "loss": 0.6097, "step": 12062 }, { "epoch": 0.35219409652272926, "grad_norm": 0.549658573646964, "learning_rate": 3.599026763990268e-05, "loss": 0.5929, "step": 12063 }, { "epoch": 0.3522232927505766, "grad_norm": 0.5361341955051456, "learning_rate": 3.598864557988646e-05, "loss": 0.6824, "step": 12064 }, { "epoch": 0.352252488978424, "grad_norm": 0.5134704196782698, "learning_rate": 3.598702351987024e-05, "loss": 0.5844, "step": 12065 }, { "epoch": 0.35228168520627134, "grad_norm": 0.5714784304893894, "learning_rate": 3.5985401459854016e-05, "loss": 0.7368, "step": 12066 }, { "epoch": 0.3523108814341187, "grad_norm": 0.5216666568651679, "learning_rate": 3.59837793998378e-05, "loss": 0.6342, "step": 12067 }, { "epoch": 0.35234007766196607, "grad_norm": 0.5405453037569914, "learning_rate": 3.598215733982157e-05, "loss": 0.6048, "step": 12068 }, { "epoch": 0.35236927388981343, "grad_norm": 0.544167437626346, "learning_rate": 3.5980535279805355e-05, "loss": 0.6607, "step": 12069 }, { "epoch": 0.3523984701176608, "grad_norm": 0.5098454751841839, "learning_rate": 3.597891321978914e-05, "loss": 0.5677, "step": 12070 }, { "epoch": 0.35242766634550815, "grad_norm": 0.5406193553572461, "learning_rate": 3.597729115977291e-05, "loss": 0.6328, "step": 12071 }, { "epoch": 0.3524568625733555, "grad_norm": 0.48366785051085726, "learning_rate": 3.5975669099756693e-05, "loss": 0.5753, "step": 12072 }, { "epoch": 0.3524860588012029, "grad_norm": 0.5164446498119719, "learning_rate": 3.597404703974047e-05, "loss": 0.5965, "step": 12073 }, { "epoch": 0.35251525502905023, "grad_norm": 0.5070801821402827, "learning_rate": 3.597242497972425e-05, "loss": 0.5676, "step": 12074 }, { "epoch": 0.3525444512568976, "grad_norm": 0.5133279329115531, "learning_rate": 3.597080291970803e-05, "loss": 0.6072, "step": 12075 }, { "epoch": 0.35257364748474496, "grad_norm": 0.528701595827601, "learning_rate": 3.596918085969181e-05, "loss": 0.6381, "step": 12076 }, { "epoch": 0.3526028437125923, "grad_norm": 0.577017064098773, "learning_rate": 3.596755879967559e-05, "loss": 0.594, "step": 12077 }, { "epoch": 0.3526320399404397, "grad_norm": 0.5116485034394678, "learning_rate": 3.596593673965937e-05, "loss": 0.5981, "step": 12078 }, { "epoch": 0.35266123616828704, "grad_norm": 0.5457607072595361, "learning_rate": 3.5964314679643145e-05, "loss": 0.6674, "step": 12079 }, { "epoch": 0.3526904323961344, "grad_norm": 0.5423930671890029, "learning_rate": 3.596269261962693e-05, "loss": 0.6852, "step": 12080 }, { "epoch": 0.35271962862398176, "grad_norm": 0.5009679731991772, "learning_rate": 3.596107055961071e-05, "loss": 0.5503, "step": 12081 }, { "epoch": 0.3527488248518291, "grad_norm": 0.508672189983121, "learning_rate": 3.595944849959449e-05, "loss": 0.5949, "step": 12082 }, { "epoch": 0.3527780210796765, "grad_norm": 0.5252191090993573, "learning_rate": 3.5957826439578266e-05, "loss": 0.6424, "step": 12083 }, { "epoch": 0.35280721730752385, "grad_norm": 0.5814685109253885, "learning_rate": 3.595620437956205e-05, "loss": 0.7012, "step": 12084 }, { "epoch": 0.3528364135353712, "grad_norm": 0.5321455935777774, "learning_rate": 3.595458231954583e-05, "loss": 0.6551, "step": 12085 }, { "epoch": 0.35286560976321857, "grad_norm": 0.49859992182372326, "learning_rate": 3.5952960259529604e-05, "loss": 0.5316, "step": 12086 }, { "epoch": 0.35289480599106593, "grad_norm": 0.5408961736734214, "learning_rate": 3.5951338199513386e-05, "loss": 0.652, "step": 12087 }, { "epoch": 0.3529240022189133, "grad_norm": 0.5224946432022194, "learning_rate": 3.594971613949716e-05, "loss": 0.6709, "step": 12088 }, { "epoch": 0.35295319844676065, "grad_norm": 0.5736708318444603, "learning_rate": 3.594809407948094e-05, "loss": 0.6767, "step": 12089 }, { "epoch": 0.352982394674608, "grad_norm": 0.5676687531487127, "learning_rate": 3.5946472019464725e-05, "loss": 0.6556, "step": 12090 }, { "epoch": 0.35301159090245543, "grad_norm": 0.5291213087521435, "learning_rate": 3.59448499594485e-05, "loss": 0.5736, "step": 12091 }, { "epoch": 0.3530407871303028, "grad_norm": 0.6109938590931545, "learning_rate": 3.594322789943228e-05, "loss": 0.6973, "step": 12092 }, { "epoch": 0.35306998335815015, "grad_norm": 0.5720589399915826, "learning_rate": 3.5941605839416056e-05, "loss": 0.6941, "step": 12093 }, { "epoch": 0.3530991795859975, "grad_norm": 0.5415036842807374, "learning_rate": 3.593998377939984e-05, "loss": 0.643, "step": 12094 }, { "epoch": 0.3531283758138449, "grad_norm": 0.5489657042499475, "learning_rate": 3.593836171938362e-05, "loss": 0.665, "step": 12095 }, { "epoch": 0.35315757204169224, "grad_norm": 0.5660675420792937, "learning_rate": 3.5936739659367395e-05, "loss": 0.6678, "step": 12096 }, { "epoch": 0.3531867682695396, "grad_norm": 0.5399268812986425, "learning_rate": 3.5935117599351177e-05, "loss": 0.639, "step": 12097 }, { "epoch": 0.35321596449738696, "grad_norm": 0.5345753969784056, "learning_rate": 3.593349553933495e-05, "loss": 0.6121, "step": 12098 }, { "epoch": 0.3532451607252343, "grad_norm": 0.4759016561633606, "learning_rate": 3.593187347931873e-05, "loss": 0.5517, "step": 12099 }, { "epoch": 0.3532743569530817, "grad_norm": 0.5566842610061249, "learning_rate": 3.593025141930252e-05, "loss": 0.6737, "step": 12100 }, { "epoch": 0.35330355318092904, "grad_norm": 0.5908642824111883, "learning_rate": 3.59286293592863e-05, "loss": 0.6578, "step": 12101 }, { "epoch": 0.3533327494087764, "grad_norm": 0.5640160625760221, "learning_rate": 3.592700729927008e-05, "loss": 0.6982, "step": 12102 }, { "epoch": 0.35336194563662376, "grad_norm": 0.6576441272860877, "learning_rate": 3.5925385239253854e-05, "loss": 0.5658, "step": 12103 }, { "epoch": 0.3533911418644711, "grad_norm": 0.5077501133496348, "learning_rate": 3.5923763179237635e-05, "loss": 0.5337, "step": 12104 }, { "epoch": 0.3534203380923185, "grad_norm": 0.5339065305817459, "learning_rate": 3.592214111922142e-05, "loss": 0.6276, "step": 12105 }, { "epoch": 0.35344953432016585, "grad_norm": 0.5924319681138313, "learning_rate": 3.592051905920519e-05, "loss": 0.6299, "step": 12106 }, { "epoch": 0.3534787305480132, "grad_norm": 0.5783260381530644, "learning_rate": 3.5918896999188974e-05, "loss": 0.72, "step": 12107 }, { "epoch": 0.35350792677586057, "grad_norm": 0.5873512397128584, "learning_rate": 3.591727493917275e-05, "loss": 0.7293, "step": 12108 }, { "epoch": 0.35353712300370793, "grad_norm": 0.5384474051557829, "learning_rate": 3.591565287915653e-05, "loss": 0.6151, "step": 12109 }, { "epoch": 0.3535663192315553, "grad_norm": 0.5386230508284364, "learning_rate": 3.591403081914031e-05, "loss": 0.6083, "step": 12110 }, { "epoch": 0.35359551545940265, "grad_norm": 0.5674831046555041, "learning_rate": 3.591240875912409e-05, "loss": 0.6729, "step": 12111 }, { "epoch": 0.35362471168725, "grad_norm": 0.5474377170583543, "learning_rate": 3.591078669910787e-05, "loss": 0.6948, "step": 12112 }, { "epoch": 0.3536539079150974, "grad_norm": 0.4968629688066339, "learning_rate": 3.5909164639091644e-05, "loss": 0.5596, "step": 12113 }, { "epoch": 0.35368310414294474, "grad_norm": 0.5205350293306387, "learning_rate": 3.5907542579075426e-05, "loss": 0.611, "step": 12114 }, { "epoch": 0.3537123003707921, "grad_norm": 0.5835498469394487, "learning_rate": 3.590592051905921e-05, "loss": 0.708, "step": 12115 }, { "epoch": 0.35374149659863946, "grad_norm": 0.5473742376435745, "learning_rate": 3.590429845904298e-05, "loss": 0.6394, "step": 12116 }, { "epoch": 0.3537706928264868, "grad_norm": 0.585395846733117, "learning_rate": 3.5902676399026764e-05, "loss": 0.734, "step": 12117 }, { "epoch": 0.3537998890543342, "grad_norm": 0.4738549091512702, "learning_rate": 3.590105433901054e-05, "loss": 0.5146, "step": 12118 }, { "epoch": 0.35382908528218154, "grad_norm": 0.5130402818231948, "learning_rate": 3.589943227899433e-05, "loss": 0.5732, "step": 12119 }, { "epoch": 0.3538582815100289, "grad_norm": 0.5118386110825864, "learning_rate": 3.589781021897811e-05, "loss": 0.5599, "step": 12120 }, { "epoch": 0.35388747773787627, "grad_norm": 0.4991537527907721, "learning_rate": 3.5896188158961885e-05, "loss": 0.5473, "step": 12121 }, { "epoch": 0.3539166739657236, "grad_norm": 0.567967222285241, "learning_rate": 3.5894566098945666e-05, "loss": 0.706, "step": 12122 }, { "epoch": 0.353945870193571, "grad_norm": 0.4906664033261563, "learning_rate": 3.589294403892944e-05, "loss": 0.5226, "step": 12123 }, { "epoch": 0.35397506642141835, "grad_norm": 0.5239520897633291, "learning_rate": 3.589132197891322e-05, "loss": 0.6144, "step": 12124 }, { "epoch": 0.3540042626492657, "grad_norm": 0.6109192074787488, "learning_rate": 3.5889699918897005e-05, "loss": 0.7786, "step": 12125 }, { "epoch": 0.35403345887711307, "grad_norm": 0.5369053403561573, "learning_rate": 3.588807785888078e-05, "loss": 0.6464, "step": 12126 }, { "epoch": 0.35406265510496043, "grad_norm": 0.5381529909671823, "learning_rate": 3.588645579886456e-05, "loss": 0.6439, "step": 12127 }, { "epoch": 0.3540918513328078, "grad_norm": 0.5435661714077851, "learning_rate": 3.5884833738848337e-05, "loss": 0.6195, "step": 12128 }, { "epoch": 0.35412104756065516, "grad_norm": 0.544217100798437, "learning_rate": 3.588321167883212e-05, "loss": 0.6377, "step": 12129 }, { "epoch": 0.3541502437885025, "grad_norm": 0.5340924822845489, "learning_rate": 3.58815896188159e-05, "loss": 0.5496, "step": 12130 }, { "epoch": 0.3541794400163499, "grad_norm": 0.5280154250808285, "learning_rate": 3.5879967558799675e-05, "loss": 0.61, "step": 12131 }, { "epoch": 0.35420863624419724, "grad_norm": 0.5420207591920532, "learning_rate": 3.587834549878346e-05, "loss": 0.6331, "step": 12132 }, { "epoch": 0.3542378324720446, "grad_norm": 0.5322594695915229, "learning_rate": 3.587672343876723e-05, "loss": 0.6389, "step": 12133 }, { "epoch": 0.35426702869989196, "grad_norm": 0.5447541847483814, "learning_rate": 3.5875101378751014e-05, "loss": 0.6423, "step": 12134 }, { "epoch": 0.3542962249277393, "grad_norm": 0.5636625298349887, "learning_rate": 3.5873479318734795e-05, "loss": 0.6626, "step": 12135 }, { "epoch": 0.3543254211555867, "grad_norm": 0.5479289245470012, "learning_rate": 3.587185725871857e-05, "loss": 0.6703, "step": 12136 }, { "epoch": 0.35435461738343405, "grad_norm": 0.5483755857096921, "learning_rate": 3.587023519870235e-05, "loss": 0.6793, "step": 12137 }, { "epoch": 0.3543838136112814, "grad_norm": 0.5086223056393276, "learning_rate": 3.5868613138686134e-05, "loss": 0.5872, "step": 12138 }, { "epoch": 0.35441300983912877, "grad_norm": 0.5161403959080257, "learning_rate": 3.5866991078669916e-05, "loss": 0.6246, "step": 12139 }, { "epoch": 0.35444220606697613, "grad_norm": 0.5792841938111529, "learning_rate": 3.586536901865369e-05, "loss": 0.7254, "step": 12140 }, { "epoch": 0.3544714022948235, "grad_norm": 0.5497664666382656, "learning_rate": 3.586374695863747e-05, "loss": 0.6478, "step": 12141 }, { "epoch": 0.35450059852267085, "grad_norm": 0.5768468685179814, "learning_rate": 3.5862124898621254e-05, "loss": 0.7165, "step": 12142 }, { "epoch": 0.3545297947505182, "grad_norm": 0.5525736183959932, "learning_rate": 3.586050283860503e-05, "loss": 0.6611, "step": 12143 }, { "epoch": 0.3545589909783656, "grad_norm": 0.48997896215258213, "learning_rate": 3.585888077858881e-05, "loss": 0.566, "step": 12144 }, { "epoch": 0.35458818720621293, "grad_norm": 0.5225384191036581, "learning_rate": 3.585725871857259e-05, "loss": 0.5754, "step": 12145 }, { "epoch": 0.3546173834340603, "grad_norm": 0.494304645033846, "learning_rate": 3.585563665855637e-05, "loss": 0.5567, "step": 12146 }, { "epoch": 0.35464657966190766, "grad_norm": 0.5188312269266263, "learning_rate": 3.585401459854015e-05, "loss": 0.6154, "step": 12147 }, { "epoch": 0.354675775889755, "grad_norm": 0.5529092703412751, "learning_rate": 3.5852392538523924e-05, "loss": 0.6184, "step": 12148 }, { "epoch": 0.3547049721176024, "grad_norm": 0.5672923489532216, "learning_rate": 3.5850770478507706e-05, "loss": 0.6699, "step": 12149 }, { "epoch": 0.35473416834544974, "grad_norm": 0.574959556430195, "learning_rate": 3.584914841849149e-05, "loss": 0.7016, "step": 12150 }, { "epoch": 0.35476336457329716, "grad_norm": 0.5896164406539296, "learning_rate": 3.584752635847526e-05, "loss": 0.7499, "step": 12151 }, { "epoch": 0.3547925608011445, "grad_norm": 0.542855164529258, "learning_rate": 3.5845904298459045e-05, "loss": 0.6289, "step": 12152 }, { "epoch": 0.3548217570289919, "grad_norm": 0.5250439749949358, "learning_rate": 3.584428223844282e-05, "loss": 0.6048, "step": 12153 }, { "epoch": 0.35485095325683924, "grad_norm": 0.602086679948112, "learning_rate": 3.58426601784266e-05, "loss": 0.6951, "step": 12154 }, { "epoch": 0.3548801494846866, "grad_norm": 0.5250441514451292, "learning_rate": 3.584103811841038e-05, "loss": 0.5653, "step": 12155 }, { "epoch": 0.35490934571253396, "grad_norm": 0.5481763860654174, "learning_rate": 3.583941605839416e-05, "loss": 0.6596, "step": 12156 }, { "epoch": 0.3549385419403813, "grad_norm": 0.5470993738168766, "learning_rate": 3.583779399837795e-05, "loss": 0.6618, "step": 12157 }, { "epoch": 0.3549677381682287, "grad_norm": 0.5419172057478227, "learning_rate": 3.583617193836172e-05, "loss": 0.6527, "step": 12158 }, { "epoch": 0.35499693439607605, "grad_norm": 0.5260731742247275, "learning_rate": 3.5834549878345503e-05, "loss": 0.554, "step": 12159 }, { "epoch": 0.3550261306239234, "grad_norm": 0.5823770076870378, "learning_rate": 3.583292781832928e-05, "loss": 0.6837, "step": 12160 }, { "epoch": 0.35505532685177077, "grad_norm": 0.5605149574773579, "learning_rate": 3.583130575831306e-05, "loss": 0.6465, "step": 12161 }, { "epoch": 0.35508452307961813, "grad_norm": 0.5765628230802172, "learning_rate": 3.582968369829684e-05, "loss": 0.7229, "step": 12162 }, { "epoch": 0.3551137193074655, "grad_norm": 0.5523579977977389, "learning_rate": 3.582806163828062e-05, "loss": 0.6854, "step": 12163 }, { "epoch": 0.35514291553531285, "grad_norm": 0.5908827527412562, "learning_rate": 3.58264395782644e-05, "loss": 0.6895, "step": 12164 }, { "epoch": 0.3551721117631602, "grad_norm": 0.5311641205715739, "learning_rate": 3.582481751824818e-05, "loss": 0.629, "step": 12165 }, { "epoch": 0.3552013079910076, "grad_norm": 0.5395772223351178, "learning_rate": 3.5823195458231955e-05, "loss": 0.6467, "step": 12166 }, { "epoch": 0.35523050421885494, "grad_norm": 0.49856981217478263, "learning_rate": 3.582157339821574e-05, "loss": 0.5897, "step": 12167 }, { "epoch": 0.3552597004467023, "grad_norm": 0.583009255268829, "learning_rate": 3.581995133819951e-05, "loss": 0.6765, "step": 12168 }, { "epoch": 0.35528889667454966, "grad_norm": 0.5133401914985973, "learning_rate": 3.5818329278183294e-05, "loss": 0.5778, "step": 12169 }, { "epoch": 0.355318092902397, "grad_norm": 0.5648387518912756, "learning_rate": 3.5816707218167076e-05, "loss": 0.6782, "step": 12170 }, { "epoch": 0.3553472891302444, "grad_norm": 0.5078936388146034, "learning_rate": 3.581508515815085e-05, "loss": 0.5563, "step": 12171 }, { "epoch": 0.35537648535809174, "grad_norm": 0.7147788330628561, "learning_rate": 3.581346309813463e-05, "loss": 0.6688, "step": 12172 }, { "epoch": 0.3554056815859391, "grad_norm": 0.5458064915990741, "learning_rate": 3.581184103811841e-05, "loss": 0.6812, "step": 12173 }, { "epoch": 0.35543487781378647, "grad_norm": 0.5190909346315715, "learning_rate": 3.581021897810219e-05, "loss": 0.6431, "step": 12174 }, { "epoch": 0.3554640740416338, "grad_norm": 0.5566702603487288, "learning_rate": 3.580859691808597e-05, "loss": 0.6945, "step": 12175 }, { "epoch": 0.3554932702694812, "grad_norm": 0.5512864376472146, "learning_rate": 3.580697485806975e-05, "loss": 0.709, "step": 12176 }, { "epoch": 0.35552246649732855, "grad_norm": 0.557676671648897, "learning_rate": 3.5805352798053535e-05, "loss": 0.6781, "step": 12177 }, { "epoch": 0.3555516627251759, "grad_norm": 0.5415964427672619, "learning_rate": 3.580373073803731e-05, "loss": 0.6869, "step": 12178 }, { "epoch": 0.35558085895302327, "grad_norm": 0.5449955843349414, "learning_rate": 3.580210867802109e-05, "loss": 0.6525, "step": 12179 }, { "epoch": 0.35561005518087063, "grad_norm": 0.559137398719955, "learning_rate": 3.5800486618004866e-05, "loss": 0.6574, "step": 12180 }, { "epoch": 0.355639251408718, "grad_norm": 0.5534292736464892, "learning_rate": 3.579886455798865e-05, "loss": 0.6457, "step": 12181 }, { "epoch": 0.35566844763656535, "grad_norm": 0.5585259649288632, "learning_rate": 3.579724249797243e-05, "loss": 0.6814, "step": 12182 }, { "epoch": 0.3556976438644127, "grad_norm": 0.5295215986121256, "learning_rate": 3.5795620437956205e-05, "loss": 0.6198, "step": 12183 }, { "epoch": 0.3557268400922601, "grad_norm": 0.5127247267594122, "learning_rate": 3.5793998377939987e-05, "loss": 0.6022, "step": 12184 }, { "epoch": 0.35575603632010744, "grad_norm": 0.538203080824937, "learning_rate": 3.579237631792376e-05, "loss": 0.5591, "step": 12185 }, { "epoch": 0.3557852325479548, "grad_norm": 0.5782319804567948, "learning_rate": 3.579075425790754e-05, "loss": 0.6117, "step": 12186 }, { "epoch": 0.35581442877580216, "grad_norm": 0.5151243906631019, "learning_rate": 3.5789132197891325e-05, "loss": 0.5976, "step": 12187 }, { "epoch": 0.3558436250036495, "grad_norm": 0.5206583182694681, "learning_rate": 3.57875101378751e-05, "loss": 0.6067, "step": 12188 }, { "epoch": 0.3558728212314969, "grad_norm": 0.5659610526906647, "learning_rate": 3.578588807785888e-05, "loss": 0.6833, "step": 12189 }, { "epoch": 0.35590201745934424, "grad_norm": 0.567565337113131, "learning_rate": 3.5784266017842664e-05, "loss": 0.6831, "step": 12190 }, { "epoch": 0.3559312136871916, "grad_norm": 0.49278596845175604, "learning_rate": 3.578264395782644e-05, "loss": 0.5669, "step": 12191 }, { "epoch": 0.35596040991503897, "grad_norm": 0.5271185514841493, "learning_rate": 3.578102189781022e-05, "loss": 0.5986, "step": 12192 }, { "epoch": 0.35598960614288633, "grad_norm": 0.5616450479039188, "learning_rate": 3.5779399837793995e-05, "loss": 0.6456, "step": 12193 }, { "epoch": 0.3560188023707337, "grad_norm": 0.531103616798512, "learning_rate": 3.577777777777778e-05, "loss": 0.6089, "step": 12194 }, { "epoch": 0.35604799859858105, "grad_norm": 0.5360589387639244, "learning_rate": 3.577615571776156e-05, "loss": 0.6461, "step": 12195 }, { "epoch": 0.3560771948264284, "grad_norm": 0.5199811803889591, "learning_rate": 3.577453365774534e-05, "loss": 0.6366, "step": 12196 }, { "epoch": 0.3561063910542758, "grad_norm": 0.5402077017867204, "learning_rate": 3.577291159772912e-05, "loss": 0.6035, "step": 12197 }, { "epoch": 0.35613558728212313, "grad_norm": 0.5399404420746016, "learning_rate": 3.57712895377129e-05, "loss": 0.666, "step": 12198 }, { "epoch": 0.3561647835099705, "grad_norm": 0.6104742411267187, "learning_rate": 3.576966747769668e-05, "loss": 0.6889, "step": 12199 }, { "epoch": 0.35619397973781786, "grad_norm": 0.5771990537036887, "learning_rate": 3.5768045417680454e-05, "loss": 0.6375, "step": 12200 }, { "epoch": 0.3562231759656652, "grad_norm": 0.5572821603063737, "learning_rate": 3.5766423357664236e-05, "loss": 0.6832, "step": 12201 }, { "epoch": 0.3562523721935126, "grad_norm": 0.5826134802268664, "learning_rate": 3.576480129764802e-05, "loss": 0.7371, "step": 12202 }, { "epoch": 0.35628156842135994, "grad_norm": 0.5565615369162648, "learning_rate": 3.576317923763179e-05, "loss": 0.6812, "step": 12203 }, { "epoch": 0.3563107646492073, "grad_norm": 0.5893884652602346, "learning_rate": 3.5761557177615574e-05, "loss": 0.6564, "step": 12204 }, { "epoch": 0.35633996087705466, "grad_norm": 0.5745989090472553, "learning_rate": 3.575993511759935e-05, "loss": 0.7392, "step": 12205 }, { "epoch": 0.356369157104902, "grad_norm": 0.5426802465425496, "learning_rate": 3.575831305758313e-05, "loss": 0.6311, "step": 12206 }, { "epoch": 0.3563983533327494, "grad_norm": 0.5803188068278546, "learning_rate": 3.575669099756691e-05, "loss": 0.6953, "step": 12207 }, { "epoch": 0.35642754956059675, "grad_norm": 0.5251476000185101, "learning_rate": 3.575506893755069e-05, "loss": 0.6064, "step": 12208 }, { "epoch": 0.3564567457884441, "grad_norm": 0.5573179417898774, "learning_rate": 3.575344687753447e-05, "loss": 0.6816, "step": 12209 }, { "epoch": 0.35648594201629147, "grad_norm": 0.5166971646804172, "learning_rate": 3.5751824817518245e-05, "loss": 0.5953, "step": 12210 }, { "epoch": 0.3565151382441389, "grad_norm": 0.5903720993776275, "learning_rate": 3.5750202757502026e-05, "loss": 0.7029, "step": 12211 }, { "epoch": 0.35654433447198625, "grad_norm": 0.5897910556305224, "learning_rate": 3.574858069748581e-05, "loss": 0.6673, "step": 12212 }, { "epoch": 0.3565735306998336, "grad_norm": 0.5622292850109274, "learning_rate": 3.574695863746959e-05, "loss": 0.6221, "step": 12213 }, { "epoch": 0.35660272692768097, "grad_norm": 0.5218971680107213, "learning_rate": 3.574533657745337e-05, "loss": 0.5882, "step": 12214 }, { "epoch": 0.35663192315552833, "grad_norm": 0.5244563194344704, "learning_rate": 3.574371451743715e-05, "loss": 0.6038, "step": 12215 }, { "epoch": 0.3566611193833757, "grad_norm": 0.5493101629946803, "learning_rate": 3.574209245742093e-05, "loss": 0.6819, "step": 12216 }, { "epoch": 0.35669031561122305, "grad_norm": 0.5372307449075852, "learning_rate": 3.574047039740471e-05, "loss": 0.6254, "step": 12217 }, { "epoch": 0.3567195118390704, "grad_norm": 0.5656494361030536, "learning_rate": 3.5738848337388485e-05, "loss": 0.6762, "step": 12218 }, { "epoch": 0.3567487080669178, "grad_norm": 0.5167167279304115, "learning_rate": 3.573722627737227e-05, "loss": 0.5799, "step": 12219 }, { "epoch": 0.35677790429476514, "grad_norm": 0.5295033445121949, "learning_rate": 3.573560421735604e-05, "loss": 0.619, "step": 12220 }, { "epoch": 0.3568071005226125, "grad_norm": 0.5486015116865194, "learning_rate": 3.5733982157339824e-05, "loss": 0.6548, "step": 12221 }, { "epoch": 0.35683629675045986, "grad_norm": 0.532730970797569, "learning_rate": 3.5732360097323605e-05, "loss": 0.621, "step": 12222 }, { "epoch": 0.3568654929783072, "grad_norm": 0.5224467460930173, "learning_rate": 3.573073803730738e-05, "loss": 0.6255, "step": 12223 }, { "epoch": 0.3568946892061546, "grad_norm": 0.5001090951936421, "learning_rate": 3.572911597729116e-05, "loss": 0.5484, "step": 12224 }, { "epoch": 0.35692388543400194, "grad_norm": 0.5284491144784759, "learning_rate": 3.572749391727494e-05, "loss": 0.5953, "step": 12225 }, { "epoch": 0.3569530816618493, "grad_norm": 0.5282611416380305, "learning_rate": 3.572587185725872e-05, "loss": 0.6405, "step": 12226 }, { "epoch": 0.35698227788969666, "grad_norm": 0.5069405586554601, "learning_rate": 3.57242497972425e-05, "loss": 0.5619, "step": 12227 }, { "epoch": 0.357011474117544, "grad_norm": 0.514974756020871, "learning_rate": 3.5722627737226276e-05, "loss": 0.6059, "step": 12228 }, { "epoch": 0.3570406703453914, "grad_norm": 0.5604717857868591, "learning_rate": 3.572100567721006e-05, "loss": 0.6442, "step": 12229 }, { "epoch": 0.35706986657323875, "grad_norm": 0.5078311725412056, "learning_rate": 3.571938361719383e-05, "loss": 0.5982, "step": 12230 }, { "epoch": 0.3570990628010861, "grad_norm": 0.5331392237011972, "learning_rate": 3.5717761557177614e-05, "loss": 0.6078, "step": 12231 }, { "epoch": 0.35712825902893347, "grad_norm": 0.5231830994465186, "learning_rate": 3.57161394971614e-05, "loss": 0.6128, "step": 12232 }, { "epoch": 0.35715745525678083, "grad_norm": 0.5273818454011142, "learning_rate": 3.571451743714518e-05, "loss": 0.559, "step": 12233 }, { "epoch": 0.3571866514846282, "grad_norm": 0.4894587779635172, "learning_rate": 3.571289537712896e-05, "loss": 0.5481, "step": 12234 }, { "epoch": 0.35721584771247555, "grad_norm": 0.5656872839536681, "learning_rate": 3.5711273317112734e-05, "loss": 0.7173, "step": 12235 }, { "epoch": 0.3572450439403229, "grad_norm": 0.561493739033271, "learning_rate": 3.5709651257096516e-05, "loss": 0.6908, "step": 12236 }, { "epoch": 0.3572742401681703, "grad_norm": 0.48818231160714215, "learning_rate": 3.57080291970803e-05, "loss": 0.5559, "step": 12237 }, { "epoch": 0.35730343639601764, "grad_norm": 0.5362489954350526, "learning_rate": 3.570640713706407e-05, "loss": 0.6695, "step": 12238 }, { "epoch": 0.357332632623865, "grad_norm": 0.598938054241898, "learning_rate": 3.5704785077047855e-05, "loss": 0.7701, "step": 12239 }, { "epoch": 0.35736182885171236, "grad_norm": 0.565000734990978, "learning_rate": 3.570316301703163e-05, "loss": 0.6739, "step": 12240 }, { "epoch": 0.3573910250795597, "grad_norm": 0.5347161211962534, "learning_rate": 3.570154095701541e-05, "loss": 0.634, "step": 12241 }, { "epoch": 0.3574202213074071, "grad_norm": 0.4874163681251145, "learning_rate": 3.569991889699919e-05, "loss": 0.5651, "step": 12242 }, { "epoch": 0.35744941753525444, "grad_norm": 0.4703102064190605, "learning_rate": 3.569829683698297e-05, "loss": 0.5547, "step": 12243 }, { "epoch": 0.3574786137631018, "grad_norm": 0.5416433712961795, "learning_rate": 3.569667477696675e-05, "loss": 0.6456, "step": 12244 }, { "epoch": 0.35750780999094917, "grad_norm": 0.5281919370596135, "learning_rate": 3.5695052716950525e-05, "loss": 0.6085, "step": 12245 }, { "epoch": 0.3575370062187965, "grad_norm": 0.5294475892408429, "learning_rate": 3.569343065693431e-05, "loss": 0.6323, "step": 12246 }, { "epoch": 0.3575662024466439, "grad_norm": 0.5475093910990425, "learning_rate": 3.569180859691809e-05, "loss": 0.6219, "step": 12247 }, { "epoch": 0.35759539867449125, "grad_norm": 0.5224238471204865, "learning_rate": 3.5690186536901863e-05, "loss": 0.6484, "step": 12248 }, { "epoch": 0.3576245949023386, "grad_norm": 0.5504988985132038, "learning_rate": 3.5688564476885645e-05, "loss": 0.6867, "step": 12249 }, { "epoch": 0.35765379113018597, "grad_norm": 0.5218062672339904, "learning_rate": 3.568694241686942e-05, "loss": 0.5879, "step": 12250 }, { "epoch": 0.35768298735803333, "grad_norm": 0.560984432523597, "learning_rate": 3.568532035685321e-05, "loss": 0.6534, "step": 12251 }, { "epoch": 0.3577121835858807, "grad_norm": 0.5258847253946478, "learning_rate": 3.5683698296836984e-05, "loss": 0.6542, "step": 12252 }, { "epoch": 0.35774137981372806, "grad_norm": 0.6061842799965784, "learning_rate": 3.5682076236820765e-05, "loss": 0.6936, "step": 12253 }, { "epoch": 0.3577705760415754, "grad_norm": 0.5508981588351167, "learning_rate": 3.568045417680455e-05, "loss": 0.6756, "step": 12254 }, { "epoch": 0.3577997722694228, "grad_norm": 0.5425669064336329, "learning_rate": 3.567883211678832e-05, "loss": 0.6384, "step": 12255 }, { "epoch": 0.35782896849727014, "grad_norm": 0.5173785821161571, "learning_rate": 3.5677210056772104e-05, "loss": 0.6101, "step": 12256 }, { "epoch": 0.3578581647251175, "grad_norm": 0.5017581304996979, "learning_rate": 3.5675587996755886e-05, "loss": 0.6246, "step": 12257 }, { "epoch": 0.35788736095296486, "grad_norm": 0.5288520636053466, "learning_rate": 3.567396593673966e-05, "loss": 0.6508, "step": 12258 }, { "epoch": 0.3579165571808122, "grad_norm": 0.5387323752953249, "learning_rate": 3.567234387672344e-05, "loss": 0.6582, "step": 12259 }, { "epoch": 0.3579457534086596, "grad_norm": 0.4865832895041226, "learning_rate": 3.567072181670722e-05, "loss": 0.5451, "step": 12260 }, { "epoch": 0.35797494963650694, "grad_norm": 0.5342244534122353, "learning_rate": 3.5669099756691e-05, "loss": 0.571, "step": 12261 }, { "epoch": 0.3580041458643543, "grad_norm": 0.5296067303079934, "learning_rate": 3.566747769667478e-05, "loss": 0.5754, "step": 12262 }, { "epoch": 0.35803334209220167, "grad_norm": 0.5304561779642967, "learning_rate": 3.5665855636658556e-05, "loss": 0.6503, "step": 12263 }, { "epoch": 0.35806253832004903, "grad_norm": 0.5573872543992883, "learning_rate": 3.566423357664234e-05, "loss": 0.6349, "step": 12264 }, { "epoch": 0.3580917345478964, "grad_norm": 0.5027014588089438, "learning_rate": 3.566261151662611e-05, "loss": 0.5545, "step": 12265 }, { "epoch": 0.35812093077574375, "grad_norm": 0.5349034006747029, "learning_rate": 3.5660989456609895e-05, "loss": 0.6424, "step": 12266 }, { "epoch": 0.3581501270035911, "grad_norm": 0.5390774644616292, "learning_rate": 3.5659367396593676e-05, "loss": 0.634, "step": 12267 }, { "epoch": 0.3581793232314385, "grad_norm": 0.4972060199473739, "learning_rate": 3.565774533657745e-05, "loss": 0.5818, "step": 12268 }, { "epoch": 0.35820851945928583, "grad_norm": 0.5228325221230823, "learning_rate": 3.565612327656123e-05, "loss": 0.624, "step": 12269 }, { "epoch": 0.3582377156871332, "grad_norm": 0.5194219355048135, "learning_rate": 3.5654501216545015e-05, "loss": 0.6151, "step": 12270 }, { "epoch": 0.35826691191498056, "grad_norm": 0.5536191113167444, "learning_rate": 3.5652879156528797e-05, "loss": 0.6309, "step": 12271 }, { "epoch": 0.358296108142828, "grad_norm": 0.5780924121180946, "learning_rate": 3.565125709651257e-05, "loss": 0.684, "step": 12272 }, { "epoch": 0.35832530437067533, "grad_norm": 0.5545084342671105, "learning_rate": 3.564963503649635e-05, "loss": 0.6, "step": 12273 }, { "epoch": 0.3583545005985227, "grad_norm": 0.526611704914958, "learning_rate": 3.5648012976480135e-05, "loss": 0.5664, "step": 12274 }, { "epoch": 0.35838369682637006, "grad_norm": 0.6717499487104519, "learning_rate": 3.564639091646391e-05, "loss": 0.648, "step": 12275 }, { "epoch": 0.3584128930542174, "grad_norm": 0.5761323024373918, "learning_rate": 3.564476885644769e-05, "loss": 0.6989, "step": 12276 }, { "epoch": 0.3584420892820648, "grad_norm": 0.5223003709760876, "learning_rate": 3.5643146796431474e-05, "loss": 0.6052, "step": 12277 }, { "epoch": 0.35847128550991214, "grad_norm": 0.5904376595822405, "learning_rate": 3.564152473641525e-05, "loss": 0.7274, "step": 12278 }, { "epoch": 0.3585004817377595, "grad_norm": 0.5647084445357059, "learning_rate": 3.563990267639903e-05, "loss": 0.6958, "step": 12279 }, { "epoch": 0.35852967796560686, "grad_norm": 0.5667239988922294, "learning_rate": 3.5638280616382805e-05, "loss": 0.6744, "step": 12280 }, { "epoch": 0.3585588741934542, "grad_norm": 0.5364346102044706, "learning_rate": 3.563665855636659e-05, "loss": 0.6549, "step": 12281 }, { "epoch": 0.3585880704213016, "grad_norm": 0.5376436805181725, "learning_rate": 3.563503649635037e-05, "loss": 0.6362, "step": 12282 }, { "epoch": 0.35861726664914895, "grad_norm": 0.5164107273434759, "learning_rate": 3.5633414436334144e-05, "loss": 0.5916, "step": 12283 }, { "epoch": 0.3586464628769963, "grad_norm": 0.5360193588750135, "learning_rate": 3.5631792376317926e-05, "loss": 0.6591, "step": 12284 }, { "epoch": 0.35867565910484367, "grad_norm": 0.5666318030452709, "learning_rate": 3.56301703163017e-05, "loss": 0.6738, "step": 12285 }, { "epoch": 0.35870485533269103, "grad_norm": 0.5837746241008195, "learning_rate": 3.562854825628548e-05, "loss": 0.6969, "step": 12286 }, { "epoch": 0.3587340515605384, "grad_norm": 0.5496315937520941, "learning_rate": 3.5626926196269264e-05, "loss": 0.6748, "step": 12287 }, { "epoch": 0.35876324778838575, "grad_norm": 0.5301888879908899, "learning_rate": 3.562530413625304e-05, "loss": 0.6329, "step": 12288 }, { "epoch": 0.3587924440162331, "grad_norm": 0.5310528717185904, "learning_rate": 3.562368207623683e-05, "loss": 0.5937, "step": 12289 }, { "epoch": 0.3588216402440805, "grad_norm": 0.5474222653529334, "learning_rate": 3.56220600162206e-05, "loss": 0.6364, "step": 12290 }, { "epoch": 0.35885083647192784, "grad_norm": 0.5199952648038444, "learning_rate": 3.5620437956204384e-05, "loss": 0.6387, "step": 12291 }, { "epoch": 0.3588800326997752, "grad_norm": 0.5405811612869516, "learning_rate": 3.561881589618816e-05, "loss": 0.655, "step": 12292 }, { "epoch": 0.35890922892762256, "grad_norm": 0.573346675056056, "learning_rate": 3.561719383617194e-05, "loss": 0.7005, "step": 12293 }, { "epoch": 0.3589384251554699, "grad_norm": 0.5702935871070808, "learning_rate": 3.561557177615572e-05, "loss": 0.6878, "step": 12294 }, { "epoch": 0.3589676213833173, "grad_norm": 0.5227362964529033, "learning_rate": 3.56139497161395e-05, "loss": 0.6454, "step": 12295 }, { "epoch": 0.35899681761116464, "grad_norm": 0.5475382404318818, "learning_rate": 3.561232765612328e-05, "loss": 0.6616, "step": 12296 }, { "epoch": 0.359026013839012, "grad_norm": 0.5789036890255249, "learning_rate": 3.5610705596107055e-05, "loss": 0.7064, "step": 12297 }, { "epoch": 0.35905521006685936, "grad_norm": 0.5300559769231024, "learning_rate": 3.5609083536090836e-05, "loss": 0.6275, "step": 12298 }, { "epoch": 0.3590844062947067, "grad_norm": 0.507439498064869, "learning_rate": 3.560746147607462e-05, "loss": 0.5961, "step": 12299 }, { "epoch": 0.3591136025225541, "grad_norm": 0.5811115274279699, "learning_rate": 3.560583941605839e-05, "loss": 0.6891, "step": 12300 }, { "epoch": 0.35914279875040145, "grad_norm": 0.5573584489305375, "learning_rate": 3.5604217356042175e-05, "loss": 0.7063, "step": 12301 }, { "epoch": 0.3591719949782488, "grad_norm": 0.5009305313446457, "learning_rate": 3.560259529602596e-05, "loss": 0.5886, "step": 12302 }, { "epoch": 0.35920119120609617, "grad_norm": 0.5361188066173487, "learning_rate": 3.560097323600973e-05, "loss": 0.6535, "step": 12303 }, { "epoch": 0.35923038743394353, "grad_norm": 0.5464641517865488, "learning_rate": 3.559935117599351e-05, "loss": 0.6714, "step": 12304 }, { "epoch": 0.3592595836617909, "grad_norm": 0.5150419314117705, "learning_rate": 3.559772911597729e-05, "loss": 0.5571, "step": 12305 }, { "epoch": 0.35928877988963825, "grad_norm": 0.49810639734484186, "learning_rate": 3.559610705596107e-05, "loss": 0.559, "step": 12306 }, { "epoch": 0.3593179761174856, "grad_norm": 0.559788618396442, "learning_rate": 3.559448499594485e-05, "loss": 0.6295, "step": 12307 }, { "epoch": 0.359347172345333, "grad_norm": 0.5623854911938071, "learning_rate": 3.5592862935928634e-05, "loss": 0.6981, "step": 12308 }, { "epoch": 0.35937636857318034, "grad_norm": 0.5150945504560778, "learning_rate": 3.5591240875912415e-05, "loss": 0.6233, "step": 12309 }, { "epoch": 0.3594055648010277, "grad_norm": 0.5203166069106722, "learning_rate": 3.558961881589619e-05, "loss": 0.5885, "step": 12310 }, { "epoch": 0.35943476102887506, "grad_norm": 0.5608581639035193, "learning_rate": 3.558799675587997e-05, "loss": 0.6934, "step": 12311 }, { "epoch": 0.3594639572567224, "grad_norm": 0.5300748113273516, "learning_rate": 3.558637469586375e-05, "loss": 0.599, "step": 12312 }, { "epoch": 0.3594931534845698, "grad_norm": 0.537483216251179, "learning_rate": 3.558475263584753e-05, "loss": 0.6842, "step": 12313 }, { "epoch": 0.35952234971241714, "grad_norm": 0.553497301339687, "learning_rate": 3.558313057583131e-05, "loss": 0.6411, "step": 12314 }, { "epoch": 0.3595515459402645, "grad_norm": 0.5439524533196014, "learning_rate": 3.5581508515815086e-05, "loss": 0.6105, "step": 12315 }, { "epoch": 0.35958074216811187, "grad_norm": 0.561438272358041, "learning_rate": 3.557988645579887e-05, "loss": 0.6111, "step": 12316 }, { "epoch": 0.3596099383959592, "grad_norm": 0.5392541905992776, "learning_rate": 3.557826439578264e-05, "loss": 0.6268, "step": 12317 }, { "epoch": 0.3596391346238066, "grad_norm": 0.5667913842716203, "learning_rate": 3.5576642335766424e-05, "loss": 0.6475, "step": 12318 }, { "epoch": 0.35966833085165395, "grad_norm": 0.523569008435412, "learning_rate": 3.5575020275750206e-05, "loss": 0.6446, "step": 12319 }, { "epoch": 0.3596975270795013, "grad_norm": 0.5489133251065894, "learning_rate": 3.557339821573398e-05, "loss": 0.6811, "step": 12320 }, { "epoch": 0.3597267233073487, "grad_norm": 0.5695670684999509, "learning_rate": 3.557177615571776e-05, "loss": 0.7427, "step": 12321 }, { "epoch": 0.35975591953519603, "grad_norm": 0.5473823726480433, "learning_rate": 3.5570154095701544e-05, "loss": 0.6749, "step": 12322 }, { "epoch": 0.3597851157630434, "grad_norm": 0.5346773800286512, "learning_rate": 3.556853203568532e-05, "loss": 0.6137, "step": 12323 }, { "epoch": 0.35981431199089076, "grad_norm": 0.5901617437874406, "learning_rate": 3.55669099756691e-05, "loss": 0.7114, "step": 12324 }, { "epoch": 0.3598435082187381, "grad_norm": 0.5378938714945329, "learning_rate": 3.5565287915652876e-05, "loss": 0.6517, "step": 12325 }, { "epoch": 0.3598727044465855, "grad_norm": 0.5368260879359524, "learning_rate": 3.556366585563666e-05, "loss": 0.6461, "step": 12326 }, { "epoch": 0.35990190067443284, "grad_norm": 0.5313717368781857, "learning_rate": 3.556204379562044e-05, "loss": 0.6181, "step": 12327 }, { "epoch": 0.3599310969022802, "grad_norm": 0.48814723189233705, "learning_rate": 3.556042173560422e-05, "loss": 0.5593, "step": 12328 }, { "epoch": 0.35996029313012756, "grad_norm": 0.5642615874916951, "learning_rate": 3.5558799675588e-05, "loss": 0.6486, "step": 12329 }, { "epoch": 0.3599894893579749, "grad_norm": 0.5573176658025637, "learning_rate": 3.555717761557178e-05, "loss": 0.6514, "step": 12330 }, { "epoch": 0.3600186855858223, "grad_norm": 0.5314183349274729, "learning_rate": 3.555555555555556e-05, "loss": 0.6236, "step": 12331 }, { "epoch": 0.3600478818136697, "grad_norm": 0.511645934502882, "learning_rate": 3.5553933495539335e-05, "loss": 0.5606, "step": 12332 }, { "epoch": 0.36007707804151706, "grad_norm": 0.5711791798051292, "learning_rate": 3.555231143552312e-05, "loss": 0.6858, "step": 12333 }, { "epoch": 0.3601062742693644, "grad_norm": 0.5683402660326757, "learning_rate": 3.55506893755069e-05, "loss": 0.662, "step": 12334 }, { "epoch": 0.3601354704972118, "grad_norm": 0.540472150622642, "learning_rate": 3.5549067315490673e-05, "loss": 0.605, "step": 12335 }, { "epoch": 0.36016466672505915, "grad_norm": 0.5770706188619832, "learning_rate": 3.5547445255474455e-05, "loss": 0.7207, "step": 12336 }, { "epoch": 0.3601938629529065, "grad_norm": 0.5210508232754956, "learning_rate": 3.554582319545823e-05, "loss": 0.6159, "step": 12337 }, { "epoch": 0.36022305918075387, "grad_norm": 0.5590949394851332, "learning_rate": 3.554420113544201e-05, "loss": 0.66, "step": 12338 }, { "epoch": 0.36025225540860123, "grad_norm": 0.5513672490609581, "learning_rate": 3.5542579075425794e-05, "loss": 0.6193, "step": 12339 }, { "epoch": 0.3602814516364486, "grad_norm": 0.5646002718988566, "learning_rate": 3.554095701540957e-05, "loss": 0.6994, "step": 12340 }, { "epoch": 0.36031064786429595, "grad_norm": 0.5215373390368604, "learning_rate": 3.553933495539335e-05, "loss": 0.5951, "step": 12341 }, { "epoch": 0.3603398440921433, "grad_norm": 0.5883049895158693, "learning_rate": 3.5537712895377125e-05, "loss": 0.6961, "step": 12342 }, { "epoch": 0.3603690403199907, "grad_norm": 0.5471221871726982, "learning_rate": 3.553609083536091e-05, "loss": 0.7032, "step": 12343 }, { "epoch": 0.36039823654783804, "grad_norm": 0.5179091228727482, "learning_rate": 3.553446877534469e-05, "loss": 0.6109, "step": 12344 }, { "epoch": 0.3604274327756854, "grad_norm": 0.5369486616150214, "learning_rate": 3.5532846715328464e-05, "loss": 0.5852, "step": 12345 }, { "epoch": 0.36045662900353276, "grad_norm": 0.556867130684565, "learning_rate": 3.553122465531225e-05, "loss": 0.6712, "step": 12346 }, { "epoch": 0.3604858252313801, "grad_norm": 0.5202164941672425, "learning_rate": 3.552960259529603e-05, "loss": 0.625, "step": 12347 }, { "epoch": 0.3605150214592275, "grad_norm": 0.4977784090484141, "learning_rate": 3.552798053527981e-05, "loss": 0.566, "step": 12348 }, { "epoch": 0.36054421768707484, "grad_norm": 0.523123331626357, "learning_rate": 3.552635847526359e-05, "loss": 0.6094, "step": 12349 }, { "epoch": 0.3605734139149222, "grad_norm": 0.5005266886492818, "learning_rate": 3.5524736415247366e-05, "loss": 0.5786, "step": 12350 }, { "epoch": 0.36060261014276956, "grad_norm": 0.5313675354838462, "learning_rate": 3.552311435523115e-05, "loss": 0.6374, "step": 12351 }, { "epoch": 0.3606318063706169, "grad_norm": 0.6227290044236825, "learning_rate": 3.552149229521492e-05, "loss": 0.6839, "step": 12352 }, { "epoch": 0.3606610025984643, "grad_norm": 0.5642307272518706, "learning_rate": 3.5519870235198705e-05, "loss": 0.652, "step": 12353 }, { "epoch": 0.36069019882631165, "grad_norm": 0.52018006359797, "learning_rate": 3.5518248175182486e-05, "loss": 0.6218, "step": 12354 }, { "epoch": 0.360719395054159, "grad_norm": 0.5377122079941464, "learning_rate": 3.551662611516626e-05, "loss": 0.657, "step": 12355 }, { "epoch": 0.36074859128200637, "grad_norm": 0.5359265473801087, "learning_rate": 3.551500405515004e-05, "loss": 0.5985, "step": 12356 }, { "epoch": 0.36077778750985373, "grad_norm": 0.5166514592251109, "learning_rate": 3.551338199513382e-05, "loss": 0.5723, "step": 12357 }, { "epoch": 0.3608069837377011, "grad_norm": 0.5444469338553742, "learning_rate": 3.55117599351176e-05, "loss": 0.6811, "step": 12358 }, { "epoch": 0.36083617996554845, "grad_norm": 0.5233612497737884, "learning_rate": 3.551013787510138e-05, "loss": 0.5901, "step": 12359 }, { "epoch": 0.3608653761933958, "grad_norm": 0.5505651246484727, "learning_rate": 3.5508515815085157e-05, "loss": 0.6524, "step": 12360 }, { "epoch": 0.3608945724212432, "grad_norm": 0.5323434954604092, "learning_rate": 3.550689375506894e-05, "loss": 0.6426, "step": 12361 }, { "epoch": 0.36092376864909054, "grad_norm": 0.48517978322082245, "learning_rate": 3.550527169505271e-05, "loss": 0.5521, "step": 12362 }, { "epoch": 0.3609529648769379, "grad_norm": 0.5454923487801401, "learning_rate": 3.5503649635036495e-05, "loss": 0.6751, "step": 12363 }, { "epoch": 0.36098216110478526, "grad_norm": 0.5245793396352262, "learning_rate": 3.5502027575020284e-05, "loss": 0.6194, "step": 12364 }, { "epoch": 0.3610113573326326, "grad_norm": 0.5502680070521415, "learning_rate": 3.550040551500406e-05, "loss": 0.6839, "step": 12365 }, { "epoch": 0.36104055356048, "grad_norm": 0.5693226992404752, "learning_rate": 3.549878345498784e-05, "loss": 0.6151, "step": 12366 }, { "epoch": 0.36106974978832734, "grad_norm": 0.5227304857635211, "learning_rate": 3.5497161394971615e-05, "loss": 0.6361, "step": 12367 }, { "epoch": 0.3610989460161747, "grad_norm": 0.534202109576812, "learning_rate": 3.54955393349554e-05, "loss": 0.5886, "step": 12368 }, { "epoch": 0.36112814224402207, "grad_norm": 0.527814305145515, "learning_rate": 3.549391727493918e-05, "loss": 0.6219, "step": 12369 }, { "epoch": 0.3611573384718694, "grad_norm": 0.5865193348403615, "learning_rate": 3.5492295214922954e-05, "loss": 0.6868, "step": 12370 }, { "epoch": 0.3611865346997168, "grad_norm": 0.5079571680076534, "learning_rate": 3.5490673154906736e-05, "loss": 0.594, "step": 12371 }, { "epoch": 0.36121573092756415, "grad_norm": 0.6053910248328914, "learning_rate": 3.548905109489051e-05, "loss": 0.7155, "step": 12372 }, { "epoch": 0.3612449271554115, "grad_norm": 0.609114374001804, "learning_rate": 3.548742903487429e-05, "loss": 0.6716, "step": 12373 }, { "epoch": 0.36127412338325887, "grad_norm": 0.5471185408938767, "learning_rate": 3.5485806974858074e-05, "loss": 0.6709, "step": 12374 }, { "epoch": 0.36130331961110623, "grad_norm": 0.5579033771931003, "learning_rate": 3.548418491484185e-05, "loss": 0.6419, "step": 12375 }, { "epoch": 0.3613325158389536, "grad_norm": 0.566805943242098, "learning_rate": 3.548256285482563e-05, "loss": 0.6858, "step": 12376 }, { "epoch": 0.36136171206680096, "grad_norm": 0.5357836890891357, "learning_rate": 3.5480940794809406e-05, "loss": 0.6019, "step": 12377 }, { "epoch": 0.3613909082946483, "grad_norm": 0.6126945338367011, "learning_rate": 3.547931873479319e-05, "loss": 0.7066, "step": 12378 }, { "epoch": 0.3614201045224957, "grad_norm": 0.5061815370795361, "learning_rate": 3.547769667477697e-05, "loss": 0.5855, "step": 12379 }, { "epoch": 0.36144930075034304, "grad_norm": 0.5521143964358708, "learning_rate": 3.5476074614760744e-05, "loss": 0.6347, "step": 12380 }, { "epoch": 0.3614784969781904, "grad_norm": 0.473260354504539, "learning_rate": 3.5474452554744526e-05, "loss": 0.5495, "step": 12381 }, { "epoch": 0.36150769320603776, "grad_norm": 0.5774374575593003, "learning_rate": 3.54728304947283e-05, "loss": 0.6845, "step": 12382 }, { "epoch": 0.3615368894338851, "grad_norm": 0.526947810110906, "learning_rate": 3.547120843471209e-05, "loss": 0.5744, "step": 12383 }, { "epoch": 0.3615660856617325, "grad_norm": 0.5117843880227558, "learning_rate": 3.5469586374695865e-05, "loss": 0.5856, "step": 12384 }, { "epoch": 0.36159528188957984, "grad_norm": 0.563648638673385, "learning_rate": 3.5467964314679646e-05, "loss": 0.7108, "step": 12385 }, { "epoch": 0.3616244781174272, "grad_norm": 0.5394745470395211, "learning_rate": 3.546634225466343e-05, "loss": 0.6345, "step": 12386 }, { "epoch": 0.36165367434527457, "grad_norm": 0.6547303957077202, "learning_rate": 3.54647201946472e-05, "loss": 0.5637, "step": 12387 }, { "epoch": 0.36168287057312193, "grad_norm": 0.5193562964441665, "learning_rate": 3.5463098134630985e-05, "loss": 0.6306, "step": 12388 }, { "epoch": 0.3617120668009693, "grad_norm": 0.561389032928778, "learning_rate": 3.546147607461477e-05, "loss": 0.6675, "step": 12389 }, { "epoch": 0.36174126302881665, "grad_norm": 0.5395537317789628, "learning_rate": 3.545985401459854e-05, "loss": 0.6433, "step": 12390 }, { "epoch": 0.361770459256664, "grad_norm": 0.5283463564955019, "learning_rate": 3.5458231954582323e-05, "loss": 0.6498, "step": 12391 }, { "epoch": 0.36179965548451143, "grad_norm": 0.537098467197796, "learning_rate": 3.54566098945661e-05, "loss": 0.6369, "step": 12392 }, { "epoch": 0.3618288517123588, "grad_norm": 0.5366553155044425, "learning_rate": 3.545498783454988e-05, "loss": 0.6226, "step": 12393 }, { "epoch": 0.36185804794020615, "grad_norm": 0.6048598828965386, "learning_rate": 3.545336577453366e-05, "loss": 0.7344, "step": 12394 }, { "epoch": 0.3618872441680535, "grad_norm": 0.568249513811357, "learning_rate": 3.545174371451744e-05, "loss": 0.6382, "step": 12395 }, { "epoch": 0.3619164403959009, "grad_norm": 0.5420225535622915, "learning_rate": 3.545012165450122e-05, "loss": 0.6688, "step": 12396 }, { "epoch": 0.36194563662374823, "grad_norm": 0.5283643381680516, "learning_rate": 3.5448499594484994e-05, "loss": 0.6329, "step": 12397 }, { "epoch": 0.3619748328515956, "grad_norm": 0.5196611170124468, "learning_rate": 3.5446877534468775e-05, "loss": 0.614, "step": 12398 }, { "epoch": 0.36200402907944296, "grad_norm": 0.5510422497277581, "learning_rate": 3.544525547445256e-05, "loss": 0.6786, "step": 12399 }, { "epoch": 0.3620332253072903, "grad_norm": 0.5425933275117023, "learning_rate": 3.544363341443633e-05, "loss": 0.6554, "step": 12400 }, { "epoch": 0.3620624215351377, "grad_norm": 0.5029628871396497, "learning_rate": 3.5442011354420114e-05, "loss": 0.5725, "step": 12401 }, { "epoch": 0.36209161776298504, "grad_norm": 0.5366523493026903, "learning_rate": 3.5440389294403896e-05, "loss": 0.6229, "step": 12402 }, { "epoch": 0.3621208139908324, "grad_norm": 0.5364046755976231, "learning_rate": 3.543876723438768e-05, "loss": 0.6592, "step": 12403 }, { "epoch": 0.36215001021867976, "grad_norm": 0.5209218671577288, "learning_rate": 3.543714517437145e-05, "loss": 0.6181, "step": 12404 }, { "epoch": 0.3621792064465271, "grad_norm": 0.5714471864981268, "learning_rate": 3.5435523114355234e-05, "loss": 0.6785, "step": 12405 }, { "epoch": 0.3622084026743745, "grad_norm": 0.5402141095581741, "learning_rate": 3.5433901054339016e-05, "loss": 0.6261, "step": 12406 }, { "epoch": 0.36223759890222185, "grad_norm": 0.558178536108735, "learning_rate": 3.543227899432279e-05, "loss": 0.6877, "step": 12407 }, { "epoch": 0.3622667951300692, "grad_norm": 0.5297640613076017, "learning_rate": 3.543065693430657e-05, "loss": 0.5883, "step": 12408 }, { "epoch": 0.36229599135791657, "grad_norm": 0.5971616744181697, "learning_rate": 3.5429034874290354e-05, "loss": 0.7217, "step": 12409 }, { "epoch": 0.36232518758576393, "grad_norm": 0.5408966939525136, "learning_rate": 3.542741281427413e-05, "loss": 0.6319, "step": 12410 }, { "epoch": 0.3623543838136113, "grad_norm": 0.5119610778614861, "learning_rate": 3.542579075425791e-05, "loss": 0.5803, "step": 12411 }, { "epoch": 0.36238358004145865, "grad_norm": 0.6304382659463932, "learning_rate": 3.5424168694241686e-05, "loss": 0.7144, "step": 12412 }, { "epoch": 0.362412776269306, "grad_norm": 0.5453249651150723, "learning_rate": 3.542254663422547e-05, "loss": 0.6555, "step": 12413 }, { "epoch": 0.3624419724971534, "grad_norm": 0.5168919736906356, "learning_rate": 3.542092457420925e-05, "loss": 0.6279, "step": 12414 }, { "epoch": 0.36247116872500074, "grad_norm": 0.6094899936067463, "learning_rate": 3.5419302514193025e-05, "loss": 0.7123, "step": 12415 }, { "epoch": 0.3625003649528481, "grad_norm": 0.48112953684095244, "learning_rate": 3.5417680454176806e-05, "loss": 0.6052, "step": 12416 }, { "epoch": 0.36252956118069546, "grad_norm": 0.5208605877181004, "learning_rate": 3.541605839416058e-05, "loss": 0.6271, "step": 12417 }, { "epoch": 0.3625587574085428, "grad_norm": 0.4929375278151663, "learning_rate": 3.541443633414436e-05, "loss": 0.5506, "step": 12418 }, { "epoch": 0.3625879536363902, "grad_norm": 0.5579834459147512, "learning_rate": 3.5412814274128145e-05, "loss": 0.6817, "step": 12419 }, { "epoch": 0.36261714986423754, "grad_norm": 0.5486981071391658, "learning_rate": 3.541119221411192e-05, "loss": 0.6262, "step": 12420 }, { "epoch": 0.3626463460920849, "grad_norm": 0.5520343978765629, "learning_rate": 3.540957015409571e-05, "loss": 0.5798, "step": 12421 }, { "epoch": 0.36267554231993226, "grad_norm": 0.5156886932494809, "learning_rate": 3.5407948094079483e-05, "loss": 0.6074, "step": 12422 }, { "epoch": 0.3627047385477796, "grad_norm": 0.572259863858956, "learning_rate": 3.5406326034063265e-05, "loss": 0.647, "step": 12423 }, { "epoch": 0.362733934775627, "grad_norm": 0.5146430417391525, "learning_rate": 3.540470397404704e-05, "loss": 0.5919, "step": 12424 }, { "epoch": 0.36276313100347435, "grad_norm": 0.4983053370313652, "learning_rate": 3.540308191403082e-05, "loss": 0.5526, "step": 12425 }, { "epoch": 0.3627923272313217, "grad_norm": 0.5144456578319867, "learning_rate": 3.5401459854014604e-05, "loss": 0.6018, "step": 12426 }, { "epoch": 0.36282152345916907, "grad_norm": 0.5076007293668167, "learning_rate": 3.539983779399838e-05, "loss": 0.5867, "step": 12427 }, { "epoch": 0.36285071968701643, "grad_norm": 0.5509959933911861, "learning_rate": 3.539821573398216e-05, "loss": 0.631, "step": 12428 }, { "epoch": 0.3628799159148638, "grad_norm": 0.5669350284217056, "learning_rate": 3.5396593673965935e-05, "loss": 0.6146, "step": 12429 }, { "epoch": 0.36290911214271115, "grad_norm": 0.5696166596496954, "learning_rate": 3.539497161394972e-05, "loss": 0.6916, "step": 12430 }, { "epoch": 0.3629383083705585, "grad_norm": 0.5792295844371892, "learning_rate": 3.53933495539335e-05, "loss": 0.6771, "step": 12431 }, { "epoch": 0.3629675045984059, "grad_norm": 0.5641923626319684, "learning_rate": 3.5391727493917274e-05, "loss": 0.6185, "step": 12432 }, { "epoch": 0.36299670082625324, "grad_norm": 0.4811086783093714, "learning_rate": 3.5390105433901056e-05, "loss": 0.5224, "step": 12433 }, { "epoch": 0.3630258970541006, "grad_norm": 0.5383037453875469, "learning_rate": 3.538848337388484e-05, "loss": 0.6192, "step": 12434 }, { "epoch": 0.36305509328194796, "grad_norm": 0.5397665052185532, "learning_rate": 3.538686131386861e-05, "loss": 0.639, "step": 12435 }, { "epoch": 0.3630842895097953, "grad_norm": 0.5842165342834409, "learning_rate": 3.5385239253852394e-05, "loss": 0.7288, "step": 12436 }, { "epoch": 0.3631134857376427, "grad_norm": 0.5149681054218769, "learning_rate": 3.538361719383617e-05, "loss": 0.5885, "step": 12437 }, { "epoch": 0.36314268196549004, "grad_norm": 0.8023033167632095, "learning_rate": 3.538199513381995e-05, "loss": 0.6548, "step": 12438 }, { "epoch": 0.3631718781933374, "grad_norm": 0.5590730989701964, "learning_rate": 3.538037307380373e-05, "loss": 0.6781, "step": 12439 }, { "epoch": 0.36320107442118477, "grad_norm": 0.5505638787435008, "learning_rate": 3.5378751013787515e-05, "loss": 0.695, "step": 12440 }, { "epoch": 0.3632302706490321, "grad_norm": 0.5680920993865655, "learning_rate": 3.5377128953771296e-05, "loss": 0.7514, "step": 12441 }, { "epoch": 0.3632594668768795, "grad_norm": 0.5295895525043784, "learning_rate": 3.537550689375507e-05, "loss": 0.6401, "step": 12442 }, { "epoch": 0.36328866310472685, "grad_norm": 0.5265050422460628, "learning_rate": 3.537388483373885e-05, "loss": 0.6238, "step": 12443 }, { "epoch": 0.3633178593325742, "grad_norm": 0.5523469137745198, "learning_rate": 3.537226277372263e-05, "loss": 0.6278, "step": 12444 }, { "epoch": 0.3633470555604216, "grad_norm": 0.5267782874574056, "learning_rate": 3.537064071370641e-05, "loss": 0.5977, "step": 12445 }, { "epoch": 0.36337625178826893, "grad_norm": 0.5499606741687324, "learning_rate": 3.536901865369019e-05, "loss": 0.672, "step": 12446 }, { "epoch": 0.3634054480161163, "grad_norm": 0.5547389857150912, "learning_rate": 3.5367396593673967e-05, "loss": 0.6697, "step": 12447 }, { "epoch": 0.36343464424396366, "grad_norm": 0.5423866981559969, "learning_rate": 3.536577453365775e-05, "loss": 0.665, "step": 12448 }, { "epoch": 0.363463840471811, "grad_norm": 0.5459204431076982, "learning_rate": 3.536415247364152e-05, "loss": 0.6528, "step": 12449 }, { "epoch": 0.3634930366996584, "grad_norm": 0.5427853897412714, "learning_rate": 3.5362530413625305e-05, "loss": 0.6702, "step": 12450 }, { "epoch": 0.36352223292750574, "grad_norm": 0.5408140606672597, "learning_rate": 3.536090835360909e-05, "loss": 0.6566, "step": 12451 }, { "epoch": 0.3635514291553531, "grad_norm": 0.5268964816215814, "learning_rate": 3.535928629359286e-05, "loss": 0.6073, "step": 12452 }, { "epoch": 0.3635806253832005, "grad_norm": 0.5569121255357894, "learning_rate": 3.5357664233576644e-05, "loss": 0.6271, "step": 12453 }, { "epoch": 0.3636098216110479, "grad_norm": 0.500639480399219, "learning_rate": 3.535604217356042e-05, "loss": 0.5613, "step": 12454 }, { "epoch": 0.36363901783889524, "grad_norm": 0.5159719179886637, "learning_rate": 3.53544201135442e-05, "loss": 0.5923, "step": 12455 }, { "epoch": 0.3636682140667426, "grad_norm": 0.5495855936667552, "learning_rate": 3.535279805352798e-05, "loss": 0.6596, "step": 12456 }, { "epoch": 0.36369741029458996, "grad_norm": 0.5221032077718313, "learning_rate": 3.535117599351176e-05, "loss": 0.6557, "step": 12457 }, { "epoch": 0.3637266065224373, "grad_norm": 0.5960843391242615, "learning_rate": 3.534955393349554e-05, "loss": 0.6226, "step": 12458 }, { "epoch": 0.3637558027502847, "grad_norm": 0.5090455221983662, "learning_rate": 3.534793187347932e-05, "loss": 0.5683, "step": 12459 }, { "epoch": 0.36378499897813205, "grad_norm": 0.5243755481055774, "learning_rate": 3.53463098134631e-05, "loss": 0.6019, "step": 12460 }, { "epoch": 0.3638141952059794, "grad_norm": 0.522591965467989, "learning_rate": 3.5344687753446884e-05, "loss": 0.6246, "step": 12461 }, { "epoch": 0.36384339143382677, "grad_norm": 0.5399611265935798, "learning_rate": 3.534306569343066e-05, "loss": 0.6139, "step": 12462 }, { "epoch": 0.36387258766167413, "grad_norm": 0.5426173317764265, "learning_rate": 3.534144363341444e-05, "loss": 0.6342, "step": 12463 }, { "epoch": 0.3639017838895215, "grad_norm": 0.498467821578622, "learning_rate": 3.5339821573398216e-05, "loss": 0.5774, "step": 12464 }, { "epoch": 0.36393098011736885, "grad_norm": 0.5236135355149202, "learning_rate": 3.5338199513382e-05, "loss": 0.6197, "step": 12465 }, { "epoch": 0.3639601763452162, "grad_norm": 0.5338787640299503, "learning_rate": 3.533657745336578e-05, "loss": 0.6196, "step": 12466 }, { "epoch": 0.3639893725730636, "grad_norm": 0.5792939983191135, "learning_rate": 3.5334955393349554e-05, "loss": 0.75, "step": 12467 }, { "epoch": 0.36401856880091094, "grad_norm": 0.5812955841126399, "learning_rate": 3.5333333333333336e-05, "loss": 0.7149, "step": 12468 }, { "epoch": 0.3640477650287583, "grad_norm": 0.5111268516310041, "learning_rate": 3.533171127331711e-05, "loss": 0.557, "step": 12469 }, { "epoch": 0.36407696125660566, "grad_norm": 0.529368643779144, "learning_rate": 3.533008921330089e-05, "loss": 0.6076, "step": 12470 }, { "epoch": 0.364106157484453, "grad_norm": 0.5401533309081735, "learning_rate": 3.5328467153284675e-05, "loss": 0.6335, "step": 12471 }, { "epoch": 0.3641353537123004, "grad_norm": 0.5140383457440366, "learning_rate": 3.532684509326845e-05, "loss": 0.5399, "step": 12472 }, { "epoch": 0.36416454994014774, "grad_norm": 0.5358009531016944, "learning_rate": 3.532522303325223e-05, "loss": 0.6054, "step": 12473 }, { "epoch": 0.3641937461679951, "grad_norm": 0.5076663212527328, "learning_rate": 3.5323600973236006e-05, "loss": 0.6051, "step": 12474 }, { "epoch": 0.36422294239584246, "grad_norm": 0.5459320565546698, "learning_rate": 3.532197891321979e-05, "loss": 0.6675, "step": 12475 }, { "epoch": 0.3642521386236898, "grad_norm": 0.47473644382216756, "learning_rate": 3.532035685320357e-05, "loss": 0.525, "step": 12476 }, { "epoch": 0.3642813348515372, "grad_norm": 0.5819096214965805, "learning_rate": 3.5318734793187345e-05, "loss": 0.6563, "step": 12477 }, { "epoch": 0.36431053107938455, "grad_norm": 0.5462519255550451, "learning_rate": 3.5317112733171133e-05, "loss": 0.6667, "step": 12478 }, { "epoch": 0.3643397273072319, "grad_norm": 0.5584226080595917, "learning_rate": 3.531549067315491e-05, "loss": 0.6738, "step": 12479 }, { "epoch": 0.36436892353507927, "grad_norm": 0.5393782043243147, "learning_rate": 3.531386861313869e-05, "loss": 0.6449, "step": 12480 }, { "epoch": 0.36439811976292663, "grad_norm": 0.486435449517194, "learning_rate": 3.531224655312247e-05, "loss": 0.5211, "step": 12481 }, { "epoch": 0.364427315990774, "grad_norm": 0.5514024300184597, "learning_rate": 3.531062449310625e-05, "loss": 0.6612, "step": 12482 }, { "epoch": 0.36445651221862135, "grad_norm": 0.5950727828981605, "learning_rate": 3.530900243309003e-05, "loss": 0.6871, "step": 12483 }, { "epoch": 0.3644857084464687, "grad_norm": 0.5556725565378022, "learning_rate": 3.5307380373073804e-05, "loss": 0.6757, "step": 12484 }, { "epoch": 0.3645149046743161, "grad_norm": 0.4993003113621244, "learning_rate": 3.5305758313057585e-05, "loss": 0.5754, "step": 12485 }, { "epoch": 0.36454410090216344, "grad_norm": 0.5832501664193142, "learning_rate": 3.530413625304137e-05, "loss": 0.7017, "step": 12486 }, { "epoch": 0.3645732971300108, "grad_norm": 0.4971396955680458, "learning_rate": 3.530251419302514e-05, "loss": 0.5533, "step": 12487 }, { "epoch": 0.36460249335785816, "grad_norm": 0.552053537489471, "learning_rate": 3.5300892133008924e-05, "loss": 0.6574, "step": 12488 }, { "epoch": 0.3646316895857055, "grad_norm": 0.530296038786498, "learning_rate": 3.52992700729927e-05, "loss": 0.6337, "step": 12489 }, { "epoch": 0.3646608858135529, "grad_norm": 0.5837295836199472, "learning_rate": 3.529764801297648e-05, "loss": 0.6494, "step": 12490 }, { "epoch": 0.36469008204140024, "grad_norm": 0.5694655003432807, "learning_rate": 3.529602595296026e-05, "loss": 0.6821, "step": 12491 }, { "epoch": 0.3647192782692476, "grad_norm": 0.5243864283870885, "learning_rate": 3.529440389294404e-05, "loss": 0.6316, "step": 12492 }, { "epoch": 0.36474847449709497, "grad_norm": 0.5557087648239483, "learning_rate": 3.529278183292782e-05, "loss": 0.7043, "step": 12493 }, { "epoch": 0.3647776707249423, "grad_norm": 0.5706562612726762, "learning_rate": 3.5291159772911594e-05, "loss": 0.6675, "step": 12494 }, { "epoch": 0.3648068669527897, "grad_norm": 0.6885723024542874, "learning_rate": 3.5289537712895376e-05, "loss": 0.7033, "step": 12495 }, { "epoch": 0.36483606318063705, "grad_norm": 0.5176076711225055, "learning_rate": 3.528791565287916e-05, "loss": 0.5998, "step": 12496 }, { "epoch": 0.3648652594084844, "grad_norm": 0.5462170551479835, "learning_rate": 3.528629359286294e-05, "loss": 0.6439, "step": 12497 }, { "epoch": 0.36489445563633177, "grad_norm": 0.5533074845560156, "learning_rate": 3.528467153284672e-05, "loss": 0.6636, "step": 12498 }, { "epoch": 0.36492365186417913, "grad_norm": 0.5752057925074112, "learning_rate": 3.5283049472830496e-05, "loss": 0.7359, "step": 12499 }, { "epoch": 0.3649528480920265, "grad_norm": 0.5158761834089518, "learning_rate": 3.528142741281428e-05, "loss": 0.6276, "step": 12500 }, { "epoch": 0.36498204431987386, "grad_norm": 0.4845855033004048, "learning_rate": 3.527980535279806e-05, "loss": 0.5303, "step": 12501 }, { "epoch": 0.3650112405477212, "grad_norm": 0.6270700990446993, "learning_rate": 3.5278183292781835e-05, "loss": 0.7577, "step": 12502 }, { "epoch": 0.3650404367755686, "grad_norm": 0.5178449695340014, "learning_rate": 3.5276561232765616e-05, "loss": 0.6138, "step": 12503 }, { "epoch": 0.36506963300341594, "grad_norm": 0.5117579999119007, "learning_rate": 3.527493917274939e-05, "loss": 0.6011, "step": 12504 }, { "epoch": 0.3650988292312633, "grad_norm": 0.5313517429962777, "learning_rate": 3.527331711273317e-05, "loss": 0.6753, "step": 12505 }, { "epoch": 0.36512802545911066, "grad_norm": 0.5182028633059919, "learning_rate": 3.5271695052716955e-05, "loss": 0.6453, "step": 12506 }, { "epoch": 0.365157221686958, "grad_norm": 0.5018368283444145, "learning_rate": 3.527007299270073e-05, "loss": 0.5391, "step": 12507 }, { "epoch": 0.3651864179148054, "grad_norm": 0.5679626414290463, "learning_rate": 3.526845093268451e-05, "loss": 0.633, "step": 12508 }, { "epoch": 0.36521561414265274, "grad_norm": 0.5532067232043397, "learning_rate": 3.526682887266829e-05, "loss": 0.625, "step": 12509 }, { "epoch": 0.3652448103705001, "grad_norm": 0.566559544335424, "learning_rate": 3.526520681265207e-05, "loss": 0.6734, "step": 12510 }, { "epoch": 0.36527400659834747, "grad_norm": 0.5897565705485073, "learning_rate": 3.526358475263585e-05, "loss": 0.594, "step": 12511 }, { "epoch": 0.36530320282619483, "grad_norm": 0.5457285217740513, "learning_rate": 3.5261962692619625e-05, "loss": 0.6427, "step": 12512 }, { "epoch": 0.36533239905404225, "grad_norm": 0.5248640798049858, "learning_rate": 3.526034063260341e-05, "loss": 0.622, "step": 12513 }, { "epoch": 0.3653615952818896, "grad_norm": 0.5471144281666008, "learning_rate": 3.525871857258718e-05, "loss": 0.6469, "step": 12514 }, { "epoch": 0.36539079150973697, "grad_norm": 0.5581161068269396, "learning_rate": 3.525709651257097e-05, "loss": 0.6788, "step": 12515 }, { "epoch": 0.36541998773758433, "grad_norm": 0.501365505302341, "learning_rate": 3.5255474452554745e-05, "loss": 0.5668, "step": 12516 }, { "epoch": 0.3654491839654317, "grad_norm": 0.6038100886966531, "learning_rate": 3.525385239253853e-05, "loss": 0.7336, "step": 12517 }, { "epoch": 0.36547838019327905, "grad_norm": 0.5192678849885065, "learning_rate": 3.525223033252231e-05, "loss": 0.602, "step": 12518 }, { "epoch": 0.3655075764211264, "grad_norm": 0.5378289083407044, "learning_rate": 3.5250608272506084e-05, "loss": 0.6169, "step": 12519 }, { "epoch": 0.3655367726489738, "grad_norm": 0.541344748609639, "learning_rate": 3.5248986212489866e-05, "loss": 0.6505, "step": 12520 }, { "epoch": 0.36556596887682113, "grad_norm": 0.48149261467400584, "learning_rate": 3.524736415247365e-05, "loss": 0.5446, "step": 12521 }, { "epoch": 0.3655951651046685, "grad_norm": 0.5872889939732654, "learning_rate": 3.524574209245742e-05, "loss": 0.6759, "step": 12522 }, { "epoch": 0.36562436133251586, "grad_norm": 0.563330617687975, "learning_rate": 3.5244120032441204e-05, "loss": 0.6124, "step": 12523 }, { "epoch": 0.3656535575603632, "grad_norm": 0.5385144825186701, "learning_rate": 3.524249797242498e-05, "loss": 0.5939, "step": 12524 }, { "epoch": 0.3656827537882106, "grad_norm": 0.5636384115114335, "learning_rate": 3.524087591240876e-05, "loss": 0.6539, "step": 12525 }, { "epoch": 0.36571195001605794, "grad_norm": 0.5858915330980864, "learning_rate": 3.523925385239254e-05, "loss": 0.6781, "step": 12526 }, { "epoch": 0.3657411462439053, "grad_norm": 0.5575001191037103, "learning_rate": 3.523763179237632e-05, "loss": 0.6717, "step": 12527 }, { "epoch": 0.36577034247175266, "grad_norm": 0.5526084333326112, "learning_rate": 3.52360097323601e-05, "loss": 0.7114, "step": 12528 }, { "epoch": 0.3657995386996, "grad_norm": 0.6133398445776714, "learning_rate": 3.5234387672343875e-05, "loss": 0.7168, "step": 12529 }, { "epoch": 0.3658287349274474, "grad_norm": 0.5729348081520783, "learning_rate": 3.5232765612327656e-05, "loss": 0.6877, "step": 12530 }, { "epoch": 0.36585793115529475, "grad_norm": 0.519123542181147, "learning_rate": 3.523114355231144e-05, "loss": 0.6196, "step": 12531 }, { "epoch": 0.3658871273831421, "grad_norm": 0.5159386210408595, "learning_rate": 3.522952149229521e-05, "loss": 0.626, "step": 12532 }, { "epoch": 0.36591632361098947, "grad_norm": 0.578239627833554, "learning_rate": 3.5227899432278995e-05, "loss": 0.7311, "step": 12533 }, { "epoch": 0.36594551983883683, "grad_norm": 0.5295847785317952, "learning_rate": 3.5226277372262777e-05, "loss": 0.61, "step": 12534 }, { "epoch": 0.3659747160666842, "grad_norm": 0.5559959166959684, "learning_rate": 3.522465531224656e-05, "loss": 0.6185, "step": 12535 }, { "epoch": 0.36600391229453155, "grad_norm": 0.5732431756415656, "learning_rate": 3.522303325223033e-05, "loss": 0.728, "step": 12536 }, { "epoch": 0.3660331085223789, "grad_norm": 0.5101770981889682, "learning_rate": 3.5221411192214115e-05, "loss": 0.6083, "step": 12537 }, { "epoch": 0.3660623047502263, "grad_norm": 0.5635967130442938, "learning_rate": 3.52197891321979e-05, "loss": 0.7068, "step": 12538 }, { "epoch": 0.36609150097807364, "grad_norm": 0.6097019571775703, "learning_rate": 3.521816707218167e-05, "loss": 0.7383, "step": 12539 }, { "epoch": 0.366120697205921, "grad_norm": 0.5734383339852405, "learning_rate": 3.5216545012165454e-05, "loss": 0.7347, "step": 12540 }, { "epoch": 0.36614989343376836, "grad_norm": 0.5028974339232758, "learning_rate": 3.521492295214923e-05, "loss": 0.5702, "step": 12541 }, { "epoch": 0.3661790896616157, "grad_norm": 0.5301383750677806, "learning_rate": 3.521330089213301e-05, "loss": 0.6227, "step": 12542 }, { "epoch": 0.3662082858894631, "grad_norm": 0.543192230882528, "learning_rate": 3.521167883211679e-05, "loss": 0.6936, "step": 12543 }, { "epoch": 0.36623748211731044, "grad_norm": 0.5199751896224382, "learning_rate": 3.521005677210057e-05, "loss": 0.608, "step": 12544 }, { "epoch": 0.3662666783451578, "grad_norm": 0.5573680233566177, "learning_rate": 3.520843471208435e-05, "loss": 0.5716, "step": 12545 }, { "epoch": 0.36629587457300516, "grad_norm": 0.4983157093793521, "learning_rate": 3.520681265206813e-05, "loss": 0.5859, "step": 12546 }, { "epoch": 0.3663250708008525, "grad_norm": 0.5055921853524089, "learning_rate": 3.5205190592051906e-05, "loss": 0.5509, "step": 12547 }, { "epoch": 0.3663542670286999, "grad_norm": 0.5230171351626437, "learning_rate": 3.520356853203569e-05, "loss": 0.6243, "step": 12548 }, { "epoch": 0.36638346325654725, "grad_norm": 0.5606696106059299, "learning_rate": 3.520194647201946e-05, "loss": 0.6842, "step": 12549 }, { "epoch": 0.3664126594843946, "grad_norm": 0.5453527658836301, "learning_rate": 3.5200324412003244e-05, "loss": 0.6328, "step": 12550 }, { "epoch": 0.36644185571224197, "grad_norm": 0.4843779810118276, "learning_rate": 3.5198702351987026e-05, "loss": 0.5487, "step": 12551 }, { "epoch": 0.36647105194008933, "grad_norm": 0.5496002380126336, "learning_rate": 3.51970802919708e-05, "loss": 0.6711, "step": 12552 }, { "epoch": 0.3665002481679367, "grad_norm": 0.5285068061989286, "learning_rate": 3.519545823195459e-05, "loss": 0.5974, "step": 12553 }, { "epoch": 0.36652944439578405, "grad_norm": 0.5387373056634688, "learning_rate": 3.5193836171938364e-05, "loss": 0.6531, "step": 12554 }, { "epoch": 0.3665586406236314, "grad_norm": 0.5647865913678741, "learning_rate": 3.5192214111922146e-05, "loss": 0.6055, "step": 12555 }, { "epoch": 0.3665878368514788, "grad_norm": 0.5913726901149952, "learning_rate": 3.519059205190592e-05, "loss": 0.6699, "step": 12556 }, { "epoch": 0.36661703307932614, "grad_norm": 0.519912435956568, "learning_rate": 3.51889699918897e-05, "loss": 0.588, "step": 12557 }, { "epoch": 0.3666462293071735, "grad_norm": 0.4927487350097766, "learning_rate": 3.5187347931873485e-05, "loss": 0.5576, "step": 12558 }, { "epoch": 0.36667542553502086, "grad_norm": 0.5351842787948513, "learning_rate": 3.518572587185726e-05, "loss": 0.6384, "step": 12559 }, { "epoch": 0.3667046217628682, "grad_norm": 0.5414371222872589, "learning_rate": 3.518410381184104e-05, "loss": 0.6515, "step": 12560 }, { "epoch": 0.3667338179907156, "grad_norm": 0.5773524948361434, "learning_rate": 3.5182481751824816e-05, "loss": 0.6718, "step": 12561 }, { "epoch": 0.36676301421856294, "grad_norm": 0.5318947382788697, "learning_rate": 3.51808596918086e-05, "loss": 0.6246, "step": 12562 }, { "epoch": 0.3667922104464103, "grad_norm": 0.5806687847757865, "learning_rate": 3.517923763179238e-05, "loss": 0.6851, "step": 12563 }, { "epoch": 0.36682140667425767, "grad_norm": 0.48653999992850355, "learning_rate": 3.5177615571776155e-05, "loss": 0.5025, "step": 12564 }, { "epoch": 0.366850602902105, "grad_norm": 0.5759397542479456, "learning_rate": 3.517599351175994e-05, "loss": 0.637, "step": 12565 }, { "epoch": 0.3668797991299524, "grad_norm": 0.5397068027916861, "learning_rate": 3.517437145174372e-05, "loss": 0.6566, "step": 12566 }, { "epoch": 0.36690899535779975, "grad_norm": 0.5772623007220496, "learning_rate": 3.517274939172749e-05, "loss": 0.691, "step": 12567 }, { "epoch": 0.3669381915856471, "grad_norm": 0.5774375814512065, "learning_rate": 3.5171127331711275e-05, "loss": 0.656, "step": 12568 }, { "epoch": 0.36696738781349447, "grad_norm": 0.5286328563051665, "learning_rate": 3.516950527169505e-05, "loss": 0.5902, "step": 12569 }, { "epoch": 0.36699658404134183, "grad_norm": 0.5316482241711694, "learning_rate": 3.516788321167883e-05, "loss": 0.5704, "step": 12570 }, { "epoch": 0.3670257802691892, "grad_norm": 0.5103901980150689, "learning_rate": 3.5166261151662614e-05, "loss": 0.5718, "step": 12571 }, { "epoch": 0.36705497649703656, "grad_norm": 0.5241091838942225, "learning_rate": 3.5164639091646395e-05, "loss": 0.6012, "step": 12572 }, { "epoch": 0.367084172724884, "grad_norm": 0.53967069166617, "learning_rate": 3.516301703163018e-05, "loss": 0.6167, "step": 12573 }, { "epoch": 0.36711336895273133, "grad_norm": 0.5568907999333933, "learning_rate": 3.516139497161395e-05, "loss": 0.6511, "step": 12574 }, { "epoch": 0.3671425651805787, "grad_norm": 0.5668208032175084, "learning_rate": 3.5159772911597734e-05, "loss": 0.6862, "step": 12575 }, { "epoch": 0.36717176140842606, "grad_norm": 0.509550355214578, "learning_rate": 3.515815085158151e-05, "loss": 0.6152, "step": 12576 }, { "epoch": 0.3672009576362734, "grad_norm": 0.5882935872612904, "learning_rate": 3.515652879156529e-05, "loss": 0.6789, "step": 12577 }, { "epoch": 0.3672301538641208, "grad_norm": 0.5309917983623481, "learning_rate": 3.515490673154907e-05, "loss": 0.5981, "step": 12578 }, { "epoch": 0.36725935009196814, "grad_norm": 0.5200425706284642, "learning_rate": 3.515328467153285e-05, "loss": 0.6194, "step": 12579 }, { "epoch": 0.3672885463198155, "grad_norm": 0.577017602033402, "learning_rate": 3.515166261151663e-05, "loss": 0.7264, "step": 12580 }, { "epoch": 0.36731774254766286, "grad_norm": 0.5254309662701918, "learning_rate": 3.5150040551500404e-05, "loss": 0.5906, "step": 12581 }, { "epoch": 0.3673469387755102, "grad_norm": 0.5194780151602971, "learning_rate": 3.5148418491484186e-05, "loss": 0.6089, "step": 12582 }, { "epoch": 0.3673761350033576, "grad_norm": 0.5600268881190983, "learning_rate": 3.514679643146797e-05, "loss": 0.6501, "step": 12583 }, { "epoch": 0.36740533123120495, "grad_norm": 0.5399472814336758, "learning_rate": 3.514517437145174e-05, "loss": 0.6679, "step": 12584 }, { "epoch": 0.3674345274590523, "grad_norm": 0.5994461137107134, "learning_rate": 3.5143552311435524e-05, "loss": 0.7298, "step": 12585 }, { "epoch": 0.36746372368689967, "grad_norm": 0.5560650949308409, "learning_rate": 3.51419302514193e-05, "loss": 0.6994, "step": 12586 }, { "epoch": 0.36749291991474703, "grad_norm": 0.5373787585250426, "learning_rate": 3.514030819140308e-05, "loss": 0.6395, "step": 12587 }, { "epoch": 0.3675221161425944, "grad_norm": 0.5233542620958394, "learning_rate": 3.513868613138686e-05, "loss": 0.5912, "step": 12588 }, { "epoch": 0.36755131237044175, "grad_norm": 0.535308654855245, "learning_rate": 3.513706407137064e-05, "loss": 0.6213, "step": 12589 }, { "epoch": 0.3675805085982891, "grad_norm": 0.5516041749863546, "learning_rate": 3.513544201135442e-05, "loss": 0.6605, "step": 12590 }, { "epoch": 0.3676097048261365, "grad_norm": 0.5689777679443923, "learning_rate": 3.51338199513382e-05, "loss": 0.692, "step": 12591 }, { "epoch": 0.36763890105398384, "grad_norm": 0.5924679893232703, "learning_rate": 3.513219789132198e-05, "loss": 0.6941, "step": 12592 }, { "epoch": 0.3676680972818312, "grad_norm": 0.5366294190639052, "learning_rate": 3.5130575831305765e-05, "loss": 0.6112, "step": 12593 }, { "epoch": 0.36769729350967856, "grad_norm": 0.5238466962009746, "learning_rate": 3.512895377128954e-05, "loss": 0.6102, "step": 12594 }, { "epoch": 0.3677264897375259, "grad_norm": 0.5520282517788272, "learning_rate": 3.512733171127332e-05, "loss": 0.6867, "step": 12595 }, { "epoch": 0.3677556859653733, "grad_norm": 0.5888116747975617, "learning_rate": 3.51257096512571e-05, "loss": 0.7233, "step": 12596 }, { "epoch": 0.36778488219322064, "grad_norm": 0.5709635294880858, "learning_rate": 3.512408759124088e-05, "loss": 0.6921, "step": 12597 }, { "epoch": 0.367814078421068, "grad_norm": 0.5519208728508739, "learning_rate": 3.512246553122466e-05, "loss": 0.6468, "step": 12598 }, { "epoch": 0.36784327464891536, "grad_norm": 0.5464106616107239, "learning_rate": 3.5120843471208435e-05, "loss": 0.6906, "step": 12599 }, { "epoch": 0.3678724708767627, "grad_norm": 0.5705298069260685, "learning_rate": 3.511922141119222e-05, "loss": 0.7214, "step": 12600 }, { "epoch": 0.3679016671046101, "grad_norm": 0.5213776046602084, "learning_rate": 3.511759935117599e-05, "loss": 0.617, "step": 12601 }, { "epoch": 0.36793086333245745, "grad_norm": 0.5305639905181234, "learning_rate": 3.5115977291159774e-05, "loss": 0.6478, "step": 12602 }, { "epoch": 0.3679600595603048, "grad_norm": 0.5317157827410993, "learning_rate": 3.5114355231143556e-05, "loss": 0.5841, "step": 12603 }, { "epoch": 0.36798925578815217, "grad_norm": 0.5274072230978746, "learning_rate": 3.511273317112733e-05, "loss": 0.6041, "step": 12604 }, { "epoch": 0.36801845201599953, "grad_norm": 0.6326149081832395, "learning_rate": 3.511111111111111e-05, "loss": 0.7124, "step": 12605 }, { "epoch": 0.3680476482438469, "grad_norm": 0.5809696816168469, "learning_rate": 3.510948905109489e-05, "loss": 0.6556, "step": 12606 }, { "epoch": 0.36807684447169425, "grad_norm": 0.5578809837186991, "learning_rate": 3.510786699107867e-05, "loss": 0.6112, "step": 12607 }, { "epoch": 0.3681060406995416, "grad_norm": 0.5494859981472929, "learning_rate": 3.510624493106245e-05, "loss": 0.6658, "step": 12608 }, { "epoch": 0.368135236927389, "grad_norm": 0.5066572364751872, "learning_rate": 3.5104622871046226e-05, "loss": 0.5146, "step": 12609 }, { "epoch": 0.36816443315523634, "grad_norm": 0.5611998411139822, "learning_rate": 3.5103000811030014e-05, "loss": 0.6988, "step": 12610 }, { "epoch": 0.3681936293830837, "grad_norm": 0.5494172815799953, "learning_rate": 3.510137875101379e-05, "loss": 0.6202, "step": 12611 }, { "epoch": 0.36822282561093106, "grad_norm": 0.5640799788196746, "learning_rate": 3.509975669099757e-05, "loss": 0.7007, "step": 12612 }, { "epoch": 0.3682520218387784, "grad_norm": 0.5368820626785057, "learning_rate": 3.509813463098135e-05, "loss": 0.6343, "step": 12613 }, { "epoch": 0.3682812180666258, "grad_norm": 0.5128284171192669, "learning_rate": 3.509651257096513e-05, "loss": 0.5871, "step": 12614 }, { "epoch": 0.36831041429447314, "grad_norm": 0.5473728137785939, "learning_rate": 3.509489051094891e-05, "loss": 0.6397, "step": 12615 }, { "epoch": 0.3683396105223205, "grad_norm": 0.5786088568064046, "learning_rate": 3.5093268450932685e-05, "loss": 0.6968, "step": 12616 }, { "epoch": 0.36836880675016787, "grad_norm": 0.4881237632176866, "learning_rate": 3.5091646390916466e-05, "loss": 0.5177, "step": 12617 }, { "epoch": 0.3683980029780152, "grad_norm": 0.5374154252304314, "learning_rate": 3.509002433090025e-05, "loss": 0.6593, "step": 12618 }, { "epoch": 0.3684271992058626, "grad_norm": 0.5587680983414419, "learning_rate": 3.508840227088402e-05, "loss": 0.6048, "step": 12619 }, { "epoch": 0.36845639543370995, "grad_norm": 0.5382645171357445, "learning_rate": 3.5086780210867805e-05, "loss": 0.6319, "step": 12620 }, { "epoch": 0.3684855916615573, "grad_norm": 0.5287203420264702, "learning_rate": 3.508515815085158e-05, "loss": 0.6417, "step": 12621 }, { "epoch": 0.36851478788940467, "grad_norm": 0.6375861552206142, "learning_rate": 3.508353609083536e-05, "loss": 0.8025, "step": 12622 }, { "epoch": 0.36854398411725203, "grad_norm": 0.5121526630617607, "learning_rate": 3.508191403081914e-05, "loss": 0.547, "step": 12623 }, { "epoch": 0.3685731803450994, "grad_norm": 0.529539726784545, "learning_rate": 3.508029197080292e-05, "loss": 0.6296, "step": 12624 }, { "epoch": 0.36860237657294675, "grad_norm": 0.5214136640947081, "learning_rate": 3.50786699107867e-05, "loss": 0.6029, "step": 12625 }, { "epoch": 0.3686315728007941, "grad_norm": 0.5412319585346416, "learning_rate": 3.5077047850770475e-05, "loss": 0.6515, "step": 12626 }, { "epoch": 0.3686607690286415, "grad_norm": 0.5298712639074622, "learning_rate": 3.507542579075426e-05, "loss": 0.6206, "step": 12627 }, { "epoch": 0.36868996525648884, "grad_norm": 0.5284716725828525, "learning_rate": 3.507380373073804e-05, "loss": 0.6173, "step": 12628 }, { "epoch": 0.3687191614843362, "grad_norm": 0.5485316722529567, "learning_rate": 3.507218167072182e-05, "loss": 0.6902, "step": 12629 }, { "epoch": 0.36874835771218356, "grad_norm": 0.5186546832278696, "learning_rate": 3.50705596107056e-05, "loss": 0.6149, "step": 12630 }, { "epoch": 0.3687775539400309, "grad_norm": 0.5078530064184977, "learning_rate": 3.506893755068938e-05, "loss": 0.5938, "step": 12631 }, { "epoch": 0.3688067501678783, "grad_norm": 0.49461975999746144, "learning_rate": 3.506731549067316e-05, "loss": 0.5666, "step": 12632 }, { "epoch": 0.36883594639572564, "grad_norm": 0.5461570568209067, "learning_rate": 3.506569343065694e-05, "loss": 0.6666, "step": 12633 }, { "epoch": 0.36886514262357306, "grad_norm": 0.5394311970360833, "learning_rate": 3.5064071370640716e-05, "loss": 0.6276, "step": 12634 }, { "epoch": 0.3688943388514204, "grad_norm": 0.5569610911831019, "learning_rate": 3.50624493106245e-05, "loss": 0.7121, "step": 12635 }, { "epoch": 0.3689235350792678, "grad_norm": 0.5160766491235491, "learning_rate": 3.506082725060827e-05, "loss": 0.5779, "step": 12636 }, { "epoch": 0.36895273130711514, "grad_norm": 0.5551230834750981, "learning_rate": 3.5059205190592054e-05, "loss": 0.6603, "step": 12637 }, { "epoch": 0.3689819275349625, "grad_norm": 0.5118081797233811, "learning_rate": 3.5057583130575836e-05, "loss": 0.5629, "step": 12638 }, { "epoch": 0.36901112376280987, "grad_norm": 0.5985113433285628, "learning_rate": 3.505596107055961e-05, "loss": 0.7512, "step": 12639 }, { "epoch": 0.36904031999065723, "grad_norm": 0.5675414593778924, "learning_rate": 3.505433901054339e-05, "loss": 0.6762, "step": 12640 }, { "epoch": 0.3690695162185046, "grad_norm": 0.6090184465475913, "learning_rate": 3.505271695052717e-05, "loss": 0.8084, "step": 12641 }, { "epoch": 0.36909871244635195, "grad_norm": 0.5645636798177962, "learning_rate": 3.505109489051095e-05, "loss": 0.6918, "step": 12642 }, { "epoch": 0.3691279086741993, "grad_norm": 0.5561354793034021, "learning_rate": 3.504947283049473e-05, "loss": 0.6723, "step": 12643 }, { "epoch": 0.3691571049020467, "grad_norm": 0.5989137211256114, "learning_rate": 3.5047850770478506e-05, "loss": 0.6855, "step": 12644 }, { "epoch": 0.36918630112989403, "grad_norm": 0.5648774684881466, "learning_rate": 3.504622871046229e-05, "loss": 0.6827, "step": 12645 }, { "epoch": 0.3692154973577414, "grad_norm": 0.5147868023581517, "learning_rate": 3.504460665044606e-05, "loss": 0.5793, "step": 12646 }, { "epoch": 0.36924469358558876, "grad_norm": 0.5797714913013099, "learning_rate": 3.5042984590429845e-05, "loss": 0.6929, "step": 12647 }, { "epoch": 0.3692738898134361, "grad_norm": 0.5659155264005754, "learning_rate": 3.5041362530413626e-05, "loss": 0.6095, "step": 12648 }, { "epoch": 0.3693030860412835, "grad_norm": 0.5378860133413923, "learning_rate": 3.503974047039741e-05, "loss": 0.6427, "step": 12649 }, { "epoch": 0.36933228226913084, "grad_norm": 0.5344559854255618, "learning_rate": 3.503811841038119e-05, "loss": 0.6103, "step": 12650 }, { "epoch": 0.3693614784969782, "grad_norm": 0.5537370391355122, "learning_rate": 3.5036496350364965e-05, "loss": 0.6419, "step": 12651 }, { "epoch": 0.36939067472482556, "grad_norm": 0.5238238421915383, "learning_rate": 3.503487429034875e-05, "loss": 0.5836, "step": 12652 }, { "epoch": 0.3694198709526729, "grad_norm": 0.5770058190560212, "learning_rate": 3.503325223033252e-05, "loss": 0.6527, "step": 12653 }, { "epoch": 0.3694490671805203, "grad_norm": 0.4745070821882977, "learning_rate": 3.5031630170316303e-05, "loss": 0.484, "step": 12654 }, { "epoch": 0.36947826340836765, "grad_norm": 0.5386918171241687, "learning_rate": 3.5030008110300085e-05, "loss": 0.6772, "step": 12655 }, { "epoch": 0.369507459636215, "grad_norm": 0.5777622225030818, "learning_rate": 3.502838605028386e-05, "loss": 0.6613, "step": 12656 }, { "epoch": 0.36953665586406237, "grad_norm": 0.6121885427260048, "learning_rate": 3.502676399026764e-05, "loss": 0.7743, "step": 12657 }, { "epoch": 0.36956585209190973, "grad_norm": 0.5078037654653301, "learning_rate": 3.5025141930251424e-05, "loss": 0.5795, "step": 12658 }, { "epoch": 0.3695950483197571, "grad_norm": 0.5795681514681595, "learning_rate": 3.50235198702352e-05, "loss": 0.7145, "step": 12659 }, { "epoch": 0.36962424454760445, "grad_norm": 0.5986489022669708, "learning_rate": 3.502189781021898e-05, "loss": 0.7842, "step": 12660 }, { "epoch": 0.3696534407754518, "grad_norm": 0.4945508996231232, "learning_rate": 3.5020275750202755e-05, "loss": 0.5848, "step": 12661 }, { "epoch": 0.3696826370032992, "grad_norm": 0.49556543612681714, "learning_rate": 3.501865369018654e-05, "loss": 0.5687, "step": 12662 }, { "epoch": 0.36971183323114654, "grad_norm": 0.5145962850854553, "learning_rate": 3.501703163017032e-05, "loss": 0.57, "step": 12663 }, { "epoch": 0.3697410294589939, "grad_norm": 0.5705682012389578, "learning_rate": 3.5015409570154094e-05, "loss": 0.6683, "step": 12664 }, { "epoch": 0.36977022568684126, "grad_norm": 0.5246990092765214, "learning_rate": 3.5013787510137876e-05, "loss": 0.6056, "step": 12665 }, { "epoch": 0.3697994219146886, "grad_norm": 0.5574240251056943, "learning_rate": 3.501216545012166e-05, "loss": 0.5731, "step": 12666 }, { "epoch": 0.369828618142536, "grad_norm": 0.5433213261115953, "learning_rate": 3.501054339010544e-05, "loss": 0.6079, "step": 12667 }, { "epoch": 0.36985781437038334, "grad_norm": 0.5460863656844578, "learning_rate": 3.5008921330089214e-05, "loss": 0.6352, "step": 12668 }, { "epoch": 0.3698870105982307, "grad_norm": 0.547405795279606, "learning_rate": 3.5007299270072996e-05, "loss": 0.6601, "step": 12669 }, { "epoch": 0.36991620682607806, "grad_norm": 0.5564053687238327, "learning_rate": 3.500567721005678e-05, "loss": 0.6147, "step": 12670 }, { "epoch": 0.3699454030539254, "grad_norm": 0.5297811934237141, "learning_rate": 3.500405515004055e-05, "loss": 0.6067, "step": 12671 }, { "epoch": 0.3699745992817728, "grad_norm": 0.5268064015284032, "learning_rate": 3.5002433090024334e-05, "loss": 0.6371, "step": 12672 }, { "epoch": 0.37000379550962015, "grad_norm": 0.5056723723324118, "learning_rate": 3.500081103000811e-05, "loss": 0.5183, "step": 12673 }, { "epoch": 0.3700329917374675, "grad_norm": 0.4995283549337225, "learning_rate": 3.499918896999189e-05, "loss": 0.5929, "step": 12674 }, { "epoch": 0.37006218796531487, "grad_norm": 0.5377186252430273, "learning_rate": 3.499756690997567e-05, "loss": 0.6567, "step": 12675 }, { "epoch": 0.37009138419316223, "grad_norm": 0.5805740913057252, "learning_rate": 3.499594484995945e-05, "loss": 0.6962, "step": 12676 }, { "epoch": 0.3701205804210096, "grad_norm": 0.5040942952476274, "learning_rate": 3.499432278994323e-05, "loss": 0.5666, "step": 12677 }, { "epoch": 0.37014977664885695, "grad_norm": 0.5593219958238698, "learning_rate": 3.499270072992701e-05, "loss": 0.6299, "step": 12678 }, { "epoch": 0.3701789728767043, "grad_norm": 0.5387998628660852, "learning_rate": 3.4991078669910786e-05, "loss": 0.6237, "step": 12679 }, { "epoch": 0.3702081691045517, "grad_norm": 0.49832339287150496, "learning_rate": 3.498945660989457e-05, "loss": 0.5728, "step": 12680 }, { "epoch": 0.37023736533239904, "grad_norm": 0.5739841298229237, "learning_rate": 3.498783454987834e-05, "loss": 0.6409, "step": 12681 }, { "epoch": 0.3702665615602464, "grad_norm": 0.5476134703531985, "learning_rate": 3.4986212489862125e-05, "loss": 0.6306, "step": 12682 }, { "epoch": 0.37029575778809376, "grad_norm": 0.5677981033027135, "learning_rate": 3.498459042984591e-05, "loss": 0.6545, "step": 12683 }, { "epoch": 0.3703249540159411, "grad_norm": 0.5513228346437443, "learning_rate": 3.498296836982968e-05, "loss": 0.6787, "step": 12684 }, { "epoch": 0.3703541502437885, "grad_norm": 0.5334544935904675, "learning_rate": 3.498134630981347e-05, "loss": 0.6268, "step": 12685 }, { "epoch": 0.37038334647163584, "grad_norm": 0.5473814311332917, "learning_rate": 3.4979724249797245e-05, "loss": 0.6664, "step": 12686 }, { "epoch": 0.3704125426994832, "grad_norm": 0.5697418456030824, "learning_rate": 3.497810218978103e-05, "loss": 0.7586, "step": 12687 }, { "epoch": 0.37044173892733057, "grad_norm": 0.5138131358654812, "learning_rate": 3.49764801297648e-05, "loss": 0.5887, "step": 12688 }, { "epoch": 0.3704709351551779, "grad_norm": 0.522873276126607, "learning_rate": 3.4974858069748584e-05, "loss": 0.6468, "step": 12689 }, { "epoch": 0.3705001313830253, "grad_norm": 0.5559184952028033, "learning_rate": 3.4973236009732366e-05, "loss": 0.6949, "step": 12690 }, { "epoch": 0.37052932761087265, "grad_norm": 0.532058890921934, "learning_rate": 3.497161394971614e-05, "loss": 0.6545, "step": 12691 }, { "epoch": 0.37055852383872, "grad_norm": 0.5519622783673519, "learning_rate": 3.496999188969992e-05, "loss": 0.6981, "step": 12692 }, { "epoch": 0.37058772006656737, "grad_norm": 0.5554081772597627, "learning_rate": 3.49683698296837e-05, "loss": 0.6638, "step": 12693 }, { "epoch": 0.3706169162944148, "grad_norm": 0.5432352351110569, "learning_rate": 3.496674776966748e-05, "loss": 0.6348, "step": 12694 }, { "epoch": 0.37064611252226215, "grad_norm": 0.5051525680402384, "learning_rate": 3.496512570965126e-05, "loss": 0.5527, "step": 12695 }, { "epoch": 0.3706753087501095, "grad_norm": 0.5438463764386015, "learning_rate": 3.4963503649635036e-05, "loss": 0.6148, "step": 12696 }, { "epoch": 0.3707045049779569, "grad_norm": 0.5473951571647935, "learning_rate": 3.496188158961882e-05, "loss": 0.6499, "step": 12697 }, { "epoch": 0.37073370120580423, "grad_norm": 0.5481384945408236, "learning_rate": 3.496025952960259e-05, "loss": 0.6835, "step": 12698 }, { "epoch": 0.3707628974336516, "grad_norm": 0.5023487487598747, "learning_rate": 3.4958637469586374e-05, "loss": 0.5979, "step": 12699 }, { "epoch": 0.37079209366149896, "grad_norm": 0.5532589319083555, "learning_rate": 3.4957015409570156e-05, "loss": 0.6833, "step": 12700 }, { "epoch": 0.3708212898893463, "grad_norm": 0.5269391922870968, "learning_rate": 3.495539334955393e-05, "loss": 0.6121, "step": 12701 }, { "epoch": 0.3708504861171937, "grad_norm": 0.5871577803193456, "learning_rate": 3.495377128953771e-05, "loss": 0.6717, "step": 12702 }, { "epoch": 0.37087968234504104, "grad_norm": 0.5109798952657488, "learning_rate": 3.4952149229521495e-05, "loss": 0.5848, "step": 12703 }, { "epoch": 0.3709088785728884, "grad_norm": 0.5586161278050573, "learning_rate": 3.4950527169505276e-05, "loss": 0.6598, "step": 12704 }, { "epoch": 0.37093807480073576, "grad_norm": 0.5380853070042836, "learning_rate": 3.494890510948906e-05, "loss": 0.6212, "step": 12705 }, { "epoch": 0.3709672710285831, "grad_norm": 0.5365647689922601, "learning_rate": 3.494728304947283e-05, "loss": 0.6487, "step": 12706 }, { "epoch": 0.3709964672564305, "grad_norm": 0.47669973428633544, "learning_rate": 3.4945660989456615e-05, "loss": 0.5418, "step": 12707 }, { "epoch": 0.37102566348427785, "grad_norm": 0.558881620615268, "learning_rate": 3.494403892944039e-05, "loss": 0.6764, "step": 12708 }, { "epoch": 0.3710548597121252, "grad_norm": 0.527105581368418, "learning_rate": 3.494241686942417e-05, "loss": 0.6323, "step": 12709 }, { "epoch": 0.37108405593997257, "grad_norm": 0.5319109991675315, "learning_rate": 3.494079480940795e-05, "loss": 0.6745, "step": 12710 }, { "epoch": 0.37111325216781993, "grad_norm": 0.5695449690507657, "learning_rate": 3.493917274939173e-05, "loss": 0.6674, "step": 12711 }, { "epoch": 0.3711424483956673, "grad_norm": 0.5955578862511548, "learning_rate": 3.493755068937551e-05, "loss": 0.6842, "step": 12712 }, { "epoch": 0.37117164462351465, "grad_norm": 0.5567813876093345, "learning_rate": 3.4935928629359285e-05, "loss": 0.7206, "step": 12713 }, { "epoch": 0.371200840851362, "grad_norm": 0.5777641588288185, "learning_rate": 3.493430656934307e-05, "loss": 0.6797, "step": 12714 }, { "epoch": 0.3712300370792094, "grad_norm": 0.5138584089466587, "learning_rate": 3.493268450932685e-05, "loss": 0.622, "step": 12715 }, { "epoch": 0.37125923330705674, "grad_norm": 0.5069803755353605, "learning_rate": 3.4931062449310624e-05, "loss": 0.5832, "step": 12716 }, { "epoch": 0.3712884295349041, "grad_norm": 0.5100571211503738, "learning_rate": 3.4929440389294405e-05, "loss": 0.5993, "step": 12717 }, { "epoch": 0.37131762576275146, "grad_norm": 0.5615868749553203, "learning_rate": 3.492781832927818e-05, "loss": 0.6685, "step": 12718 }, { "epoch": 0.3713468219905988, "grad_norm": 0.5489795056213125, "learning_rate": 3.492619626926196e-05, "loss": 0.6906, "step": 12719 }, { "epoch": 0.3713760182184462, "grad_norm": 0.5814995172329694, "learning_rate": 3.4924574209245744e-05, "loss": 0.7605, "step": 12720 }, { "epoch": 0.37140521444629354, "grad_norm": 0.5119575340305912, "learning_rate": 3.492295214922952e-05, "loss": 0.552, "step": 12721 }, { "epoch": 0.3714344106741409, "grad_norm": 0.5755790774761684, "learning_rate": 3.49213300892133e-05, "loss": 0.7053, "step": 12722 }, { "epoch": 0.37146360690198826, "grad_norm": 0.5003813407644719, "learning_rate": 3.491970802919708e-05, "loss": 0.5889, "step": 12723 }, { "epoch": 0.3714928031298356, "grad_norm": 0.5250159672693332, "learning_rate": 3.4918085969180864e-05, "loss": 0.6061, "step": 12724 }, { "epoch": 0.371521999357683, "grad_norm": 0.5275793730901756, "learning_rate": 3.4916463909164646e-05, "loss": 0.6316, "step": 12725 }, { "epoch": 0.37155119558553035, "grad_norm": 0.5280371095593995, "learning_rate": 3.491484184914842e-05, "loss": 0.6569, "step": 12726 }, { "epoch": 0.3715803918133777, "grad_norm": 0.5070344837610863, "learning_rate": 3.49132197891322e-05, "loss": 0.5572, "step": 12727 }, { "epoch": 0.37160958804122507, "grad_norm": 0.5216281876799483, "learning_rate": 3.491159772911598e-05, "loss": 0.6022, "step": 12728 }, { "epoch": 0.37163878426907243, "grad_norm": 0.49257314477750924, "learning_rate": 3.490997566909976e-05, "loss": 0.4689, "step": 12729 }, { "epoch": 0.3716679804969198, "grad_norm": 0.5066633518962537, "learning_rate": 3.490835360908354e-05, "loss": 0.5633, "step": 12730 }, { "epoch": 0.37169717672476715, "grad_norm": 0.6281027437378786, "learning_rate": 3.4906731549067316e-05, "loss": 0.7072, "step": 12731 }, { "epoch": 0.3717263729526145, "grad_norm": 0.8918784350048146, "learning_rate": 3.49051094890511e-05, "loss": 0.7485, "step": 12732 }, { "epoch": 0.3717555691804619, "grad_norm": 0.5149007363211292, "learning_rate": 3.490348742903487e-05, "loss": 0.5801, "step": 12733 }, { "epoch": 0.37178476540830924, "grad_norm": 0.5926335477892396, "learning_rate": 3.4901865369018655e-05, "loss": 0.75, "step": 12734 }, { "epoch": 0.3718139616361566, "grad_norm": 0.5017771835353455, "learning_rate": 3.4900243309002436e-05, "loss": 0.6124, "step": 12735 }, { "epoch": 0.37184315786400396, "grad_norm": 0.5154707993720608, "learning_rate": 3.489862124898621e-05, "loss": 0.5818, "step": 12736 }, { "epoch": 0.3718723540918513, "grad_norm": 0.5287416161953076, "learning_rate": 3.489699918896999e-05, "loss": 0.6197, "step": 12737 }, { "epoch": 0.3719015503196987, "grad_norm": 0.5597925997957539, "learning_rate": 3.489537712895377e-05, "loss": 0.6923, "step": 12738 }, { "epoch": 0.37193074654754604, "grad_norm": 0.5271872022894435, "learning_rate": 3.489375506893755e-05, "loss": 0.598, "step": 12739 }, { "epoch": 0.3719599427753934, "grad_norm": 0.5154237200202135, "learning_rate": 3.489213300892133e-05, "loss": 0.597, "step": 12740 }, { "epoch": 0.37198913900324077, "grad_norm": 0.4978856456168339, "learning_rate": 3.489051094890511e-05, "loss": 0.5421, "step": 12741 }, { "epoch": 0.3720183352310881, "grad_norm": 0.5636358990778796, "learning_rate": 3.4888888888888895e-05, "loss": 0.6915, "step": 12742 }, { "epoch": 0.3720475314589355, "grad_norm": 0.5289149963177916, "learning_rate": 3.488726682887267e-05, "loss": 0.6254, "step": 12743 }, { "epoch": 0.37207672768678285, "grad_norm": 0.604502005614754, "learning_rate": 3.488564476885645e-05, "loss": 0.7811, "step": 12744 }, { "epoch": 0.3721059239146302, "grad_norm": 0.5667395893619631, "learning_rate": 3.4884022708840234e-05, "loss": 0.6493, "step": 12745 }, { "epoch": 0.37213512014247757, "grad_norm": 0.5416022974593125, "learning_rate": 3.488240064882401e-05, "loss": 0.6479, "step": 12746 }, { "epoch": 0.37216431637032493, "grad_norm": 0.5584833382876907, "learning_rate": 3.488077858880779e-05, "loss": 0.6636, "step": 12747 }, { "epoch": 0.3721935125981723, "grad_norm": 0.5421663889893966, "learning_rate": 3.4879156528791565e-05, "loss": 0.6665, "step": 12748 }, { "epoch": 0.37222270882601965, "grad_norm": 0.5353989966200813, "learning_rate": 3.487753446877535e-05, "loss": 0.6158, "step": 12749 }, { "epoch": 0.372251905053867, "grad_norm": 0.5766541909009691, "learning_rate": 3.487591240875913e-05, "loss": 0.7183, "step": 12750 }, { "epoch": 0.3722811012817144, "grad_norm": 0.5262591351769189, "learning_rate": 3.4874290348742904e-05, "loss": 0.6408, "step": 12751 }, { "epoch": 0.37231029750956174, "grad_norm": 0.5804523445191728, "learning_rate": 3.4872668288726686e-05, "loss": 0.69, "step": 12752 }, { "epoch": 0.3723394937374091, "grad_norm": 0.579741224813224, "learning_rate": 3.487104622871046e-05, "loss": 0.6914, "step": 12753 }, { "epoch": 0.3723686899652565, "grad_norm": 0.5016716709305006, "learning_rate": 3.486942416869424e-05, "loss": 0.5592, "step": 12754 }, { "epoch": 0.3723978861931039, "grad_norm": 0.5192472152831307, "learning_rate": 3.4867802108678024e-05, "loss": 0.5875, "step": 12755 }, { "epoch": 0.37242708242095124, "grad_norm": 0.5131721846072408, "learning_rate": 3.48661800486618e-05, "loss": 0.5681, "step": 12756 }, { "epoch": 0.3724562786487986, "grad_norm": 0.5701294961540505, "learning_rate": 3.486455798864558e-05, "loss": 0.6776, "step": 12757 }, { "epoch": 0.37248547487664596, "grad_norm": 0.4793551274143255, "learning_rate": 3.4862935928629356e-05, "loss": 0.5023, "step": 12758 }, { "epoch": 0.3725146711044933, "grad_norm": 0.4919671726814417, "learning_rate": 3.486131386861314e-05, "loss": 0.5691, "step": 12759 }, { "epoch": 0.3725438673323407, "grad_norm": 0.5601468872915943, "learning_rate": 3.485969180859692e-05, "loss": 0.6612, "step": 12760 }, { "epoch": 0.37257306356018804, "grad_norm": 0.5047947035673817, "learning_rate": 3.48580697485807e-05, "loss": 0.5893, "step": 12761 }, { "epoch": 0.3726022597880354, "grad_norm": 0.5372383733829851, "learning_rate": 3.485644768856448e-05, "loss": 0.6424, "step": 12762 }, { "epoch": 0.37263145601588277, "grad_norm": 0.5525916412228628, "learning_rate": 3.485482562854826e-05, "loss": 0.6799, "step": 12763 }, { "epoch": 0.37266065224373013, "grad_norm": 0.5233676911700007, "learning_rate": 3.485320356853204e-05, "loss": 0.6139, "step": 12764 }, { "epoch": 0.3726898484715775, "grad_norm": 0.5400874985244697, "learning_rate": 3.485158150851582e-05, "loss": 0.6475, "step": 12765 }, { "epoch": 0.37271904469942485, "grad_norm": 0.5316824995455316, "learning_rate": 3.4849959448499596e-05, "loss": 0.6674, "step": 12766 }, { "epoch": 0.3727482409272722, "grad_norm": 0.47301584190949925, "learning_rate": 3.484833738848338e-05, "loss": 0.5655, "step": 12767 }, { "epoch": 0.3727774371551196, "grad_norm": 0.5414621220394525, "learning_rate": 3.484671532846715e-05, "loss": 0.6698, "step": 12768 }, { "epoch": 0.37280663338296693, "grad_norm": 0.5726992455594915, "learning_rate": 3.4845093268450935e-05, "loss": 0.6912, "step": 12769 }, { "epoch": 0.3728358296108143, "grad_norm": 0.5609647693440706, "learning_rate": 3.484347120843472e-05, "loss": 0.6406, "step": 12770 }, { "epoch": 0.37286502583866166, "grad_norm": 0.5406524948565283, "learning_rate": 3.484184914841849e-05, "loss": 0.6679, "step": 12771 }, { "epoch": 0.372894222066509, "grad_norm": 0.5390258465179258, "learning_rate": 3.4840227088402273e-05, "loss": 0.6456, "step": 12772 }, { "epoch": 0.3729234182943564, "grad_norm": 0.5292829755980367, "learning_rate": 3.483860502838605e-05, "loss": 0.6001, "step": 12773 }, { "epoch": 0.37295261452220374, "grad_norm": 0.619649929155431, "learning_rate": 3.483698296836983e-05, "loss": 0.774, "step": 12774 }, { "epoch": 0.3729818107500511, "grad_norm": 0.5250779728660605, "learning_rate": 3.483536090835361e-05, "loss": 0.6018, "step": 12775 }, { "epoch": 0.37301100697789846, "grad_norm": 0.5629380846163864, "learning_rate": 3.483373884833739e-05, "loss": 0.6722, "step": 12776 }, { "epoch": 0.3730402032057458, "grad_norm": 0.5660213819394393, "learning_rate": 3.483211678832117e-05, "loss": 0.6182, "step": 12777 }, { "epoch": 0.3730693994335932, "grad_norm": 0.48211720136844405, "learning_rate": 3.4830494728304944e-05, "loss": 0.5034, "step": 12778 }, { "epoch": 0.37309859566144055, "grad_norm": 0.5457832992788608, "learning_rate": 3.4828872668288725e-05, "loss": 0.6934, "step": 12779 }, { "epoch": 0.3731277918892879, "grad_norm": 0.5346726841609282, "learning_rate": 3.482725060827251e-05, "loss": 0.6741, "step": 12780 }, { "epoch": 0.37315698811713527, "grad_norm": 0.49482944735960954, "learning_rate": 3.482562854825629e-05, "loss": 0.5453, "step": 12781 }, { "epoch": 0.37318618434498263, "grad_norm": 0.5925861385027912, "learning_rate": 3.482400648824007e-05, "loss": 0.7078, "step": 12782 }, { "epoch": 0.37321538057283, "grad_norm": 0.5196385770407879, "learning_rate": 3.4822384428223846e-05, "loss": 0.6479, "step": 12783 }, { "epoch": 0.37324457680067735, "grad_norm": 0.5510519059091424, "learning_rate": 3.482076236820763e-05, "loss": 0.7029, "step": 12784 }, { "epoch": 0.3732737730285247, "grad_norm": 0.5330653020378555, "learning_rate": 3.48191403081914e-05, "loss": 0.618, "step": 12785 }, { "epoch": 0.3733029692563721, "grad_norm": 0.47956380982833235, "learning_rate": 3.4817518248175184e-05, "loss": 0.5389, "step": 12786 }, { "epoch": 0.37333216548421944, "grad_norm": 0.5359427139154358, "learning_rate": 3.4815896188158966e-05, "loss": 0.605, "step": 12787 }, { "epoch": 0.3733613617120668, "grad_norm": 0.47472151372864146, "learning_rate": 3.481427412814274e-05, "loss": 0.4915, "step": 12788 }, { "epoch": 0.37339055793991416, "grad_norm": 0.5369816481223908, "learning_rate": 3.481265206812652e-05, "loss": 0.6325, "step": 12789 }, { "epoch": 0.3734197541677615, "grad_norm": 0.5520431549315131, "learning_rate": 3.4811030008110305e-05, "loss": 0.6848, "step": 12790 }, { "epoch": 0.3734489503956089, "grad_norm": 0.5529779131889722, "learning_rate": 3.480940794809408e-05, "loss": 0.66, "step": 12791 }, { "epoch": 0.37347814662345624, "grad_norm": 0.4709608598195363, "learning_rate": 3.480778588807786e-05, "loss": 0.5219, "step": 12792 }, { "epoch": 0.3735073428513036, "grad_norm": 0.5671765168238526, "learning_rate": 3.4806163828061636e-05, "loss": 0.6881, "step": 12793 }, { "epoch": 0.37353653907915096, "grad_norm": 0.5463966191462057, "learning_rate": 3.480454176804542e-05, "loss": 0.6136, "step": 12794 }, { "epoch": 0.3735657353069983, "grad_norm": 0.5583243208838556, "learning_rate": 3.48029197080292e-05, "loss": 0.6399, "step": 12795 }, { "epoch": 0.3735949315348457, "grad_norm": 0.5516291998532112, "learning_rate": 3.4801297648012975e-05, "loss": 0.6498, "step": 12796 }, { "epoch": 0.37362412776269305, "grad_norm": 0.5969510097758276, "learning_rate": 3.4799675587996757e-05, "loss": 0.7658, "step": 12797 }, { "epoch": 0.3736533239905404, "grad_norm": 0.549955634398383, "learning_rate": 3.479805352798053e-05, "loss": 0.7039, "step": 12798 }, { "epoch": 0.37368252021838777, "grad_norm": 0.4965803745750114, "learning_rate": 3.479643146796432e-05, "loss": 0.6021, "step": 12799 }, { "epoch": 0.37371171644623513, "grad_norm": 0.5180047719817363, "learning_rate": 3.4794809407948095e-05, "loss": 0.6031, "step": 12800 }, { "epoch": 0.3737409126740825, "grad_norm": 0.5030359249006686, "learning_rate": 3.479318734793188e-05, "loss": 0.5784, "step": 12801 }, { "epoch": 0.37377010890192985, "grad_norm": 0.5762828991196701, "learning_rate": 3.479156528791566e-05, "loss": 0.7114, "step": 12802 }, { "epoch": 0.3737993051297772, "grad_norm": 0.5354561015322346, "learning_rate": 3.4789943227899434e-05, "loss": 0.6214, "step": 12803 }, { "epoch": 0.3738285013576246, "grad_norm": 0.5130132932883579, "learning_rate": 3.4788321167883215e-05, "loss": 0.6071, "step": 12804 }, { "epoch": 0.37385769758547194, "grad_norm": 0.6384718905739782, "learning_rate": 3.478669910786699e-05, "loss": 0.5964, "step": 12805 }, { "epoch": 0.3738868938133193, "grad_norm": 0.5085806487032178, "learning_rate": 3.478507704785077e-05, "loss": 0.5916, "step": 12806 }, { "epoch": 0.37391609004116666, "grad_norm": 0.5483599901483942, "learning_rate": 3.4783454987834554e-05, "loss": 0.6242, "step": 12807 }, { "epoch": 0.373945286269014, "grad_norm": 0.5849632751300627, "learning_rate": 3.478183292781833e-05, "loss": 0.7122, "step": 12808 }, { "epoch": 0.3739744824968614, "grad_norm": 0.5167536469338533, "learning_rate": 3.478021086780211e-05, "loss": 0.6091, "step": 12809 }, { "epoch": 0.37400367872470874, "grad_norm": 0.5250950683427181, "learning_rate": 3.477858880778589e-05, "loss": 0.6055, "step": 12810 }, { "epoch": 0.3740328749525561, "grad_norm": 0.5371747924179575, "learning_rate": 3.477696674776967e-05, "loss": 0.6369, "step": 12811 }, { "epoch": 0.37406207118040347, "grad_norm": 0.5338024699771845, "learning_rate": 3.477534468775345e-05, "loss": 0.6013, "step": 12812 }, { "epoch": 0.3740912674082508, "grad_norm": 0.5635046629715975, "learning_rate": 3.4773722627737224e-05, "loss": 0.7272, "step": 12813 }, { "epoch": 0.37412046363609824, "grad_norm": 0.6102843428724825, "learning_rate": 3.4772100567721006e-05, "loss": 0.7836, "step": 12814 }, { "epoch": 0.3741496598639456, "grad_norm": 0.5222324678286779, "learning_rate": 3.477047850770479e-05, "loss": 0.5971, "step": 12815 }, { "epoch": 0.37417885609179297, "grad_norm": 0.5229140667024315, "learning_rate": 3.476885644768856e-05, "loss": 0.6265, "step": 12816 }, { "epoch": 0.3742080523196403, "grad_norm": 0.5289912979661848, "learning_rate": 3.476723438767235e-05, "loss": 0.5746, "step": 12817 }, { "epoch": 0.3742372485474877, "grad_norm": 0.5780130310080746, "learning_rate": 3.4765612327656126e-05, "loss": 0.6655, "step": 12818 }, { "epoch": 0.37426644477533505, "grad_norm": 0.5598851395454308, "learning_rate": 3.476399026763991e-05, "loss": 0.6782, "step": 12819 }, { "epoch": 0.3742956410031824, "grad_norm": 0.5406440223363567, "learning_rate": 3.476236820762368e-05, "loss": 0.6382, "step": 12820 }, { "epoch": 0.37432483723102977, "grad_norm": 0.5526551733665064, "learning_rate": 3.4760746147607465e-05, "loss": 0.637, "step": 12821 }, { "epoch": 0.37435403345887713, "grad_norm": 0.5419691478629544, "learning_rate": 3.4759124087591246e-05, "loss": 0.6219, "step": 12822 }, { "epoch": 0.3743832296867245, "grad_norm": 0.5492411679959337, "learning_rate": 3.475750202757502e-05, "loss": 0.6404, "step": 12823 }, { "epoch": 0.37441242591457186, "grad_norm": 0.5275572855063128, "learning_rate": 3.47558799675588e-05, "loss": 0.5767, "step": 12824 }, { "epoch": 0.3744416221424192, "grad_norm": 0.49893281241261955, "learning_rate": 3.475425790754258e-05, "loss": 0.5663, "step": 12825 }, { "epoch": 0.3744708183702666, "grad_norm": 0.5569692115021141, "learning_rate": 3.475263584752636e-05, "loss": 0.6546, "step": 12826 }, { "epoch": 0.37450001459811394, "grad_norm": 0.5892307343710951, "learning_rate": 3.475101378751014e-05, "loss": 0.7577, "step": 12827 }, { "epoch": 0.3745292108259613, "grad_norm": 0.5131338219108389, "learning_rate": 3.474939172749392e-05, "loss": 0.5653, "step": 12828 }, { "epoch": 0.37455840705380866, "grad_norm": 0.5148094834739254, "learning_rate": 3.47477696674777e-05, "loss": 0.5936, "step": 12829 }, { "epoch": 0.374587603281656, "grad_norm": 0.533423526012744, "learning_rate": 3.4746147607461473e-05, "loss": 0.6443, "step": 12830 }, { "epoch": 0.3746167995095034, "grad_norm": 0.49941770896402854, "learning_rate": 3.4744525547445255e-05, "loss": 0.569, "step": 12831 }, { "epoch": 0.37464599573735075, "grad_norm": 0.565671570721385, "learning_rate": 3.474290348742904e-05, "loss": 0.652, "step": 12832 }, { "epoch": 0.3746751919651981, "grad_norm": 0.5580213914449019, "learning_rate": 3.474128142741281e-05, "loss": 0.5881, "step": 12833 }, { "epoch": 0.37470438819304547, "grad_norm": 0.5291329083953513, "learning_rate": 3.4739659367396594e-05, "loss": 0.6206, "step": 12834 }, { "epoch": 0.37473358442089283, "grad_norm": 0.5243566447260468, "learning_rate": 3.4738037307380375e-05, "loss": 0.6023, "step": 12835 }, { "epoch": 0.3747627806487402, "grad_norm": 0.5086754673205393, "learning_rate": 3.473641524736416e-05, "loss": 0.605, "step": 12836 }, { "epoch": 0.37479197687658755, "grad_norm": 0.5253659833161783, "learning_rate": 3.473479318734794e-05, "loss": 0.664, "step": 12837 }, { "epoch": 0.3748211731044349, "grad_norm": 0.5663272213597794, "learning_rate": 3.4733171127331714e-05, "loss": 0.6446, "step": 12838 }, { "epoch": 0.3748503693322823, "grad_norm": 0.5954037968473798, "learning_rate": 3.4731549067315496e-05, "loss": 0.7104, "step": 12839 }, { "epoch": 0.37487956556012964, "grad_norm": 0.559120113628805, "learning_rate": 3.472992700729927e-05, "loss": 0.6545, "step": 12840 }, { "epoch": 0.374908761787977, "grad_norm": 0.5457427376728458, "learning_rate": 3.472830494728305e-05, "loss": 0.6658, "step": 12841 }, { "epoch": 0.37493795801582436, "grad_norm": 0.5757160574385196, "learning_rate": 3.4726682887266834e-05, "loss": 0.6677, "step": 12842 }, { "epoch": 0.3749671542436717, "grad_norm": 0.5481499878052845, "learning_rate": 3.472506082725061e-05, "loss": 0.6379, "step": 12843 }, { "epoch": 0.3749963504715191, "grad_norm": 0.5398423977372026, "learning_rate": 3.472343876723439e-05, "loss": 0.672, "step": 12844 }, { "epoch": 0.37502554669936644, "grad_norm": 0.5248558382970406, "learning_rate": 3.4721816707218166e-05, "loss": 0.6273, "step": 12845 }, { "epoch": 0.3750547429272138, "grad_norm": 0.5235133539446649, "learning_rate": 3.472019464720195e-05, "loss": 0.6019, "step": 12846 }, { "epoch": 0.37508393915506116, "grad_norm": 0.4684488320161675, "learning_rate": 3.471857258718573e-05, "loss": 0.53, "step": 12847 }, { "epoch": 0.3751131353829085, "grad_norm": 0.5096118383407164, "learning_rate": 3.4716950527169504e-05, "loss": 0.593, "step": 12848 }, { "epoch": 0.3751423316107559, "grad_norm": 0.5780288106538289, "learning_rate": 3.4715328467153286e-05, "loss": 0.6988, "step": 12849 }, { "epoch": 0.37517152783860325, "grad_norm": 0.566419643859426, "learning_rate": 3.471370640713706e-05, "loss": 0.6903, "step": 12850 }, { "epoch": 0.3752007240664506, "grad_norm": 0.518555583918929, "learning_rate": 3.471208434712084e-05, "loss": 0.6329, "step": 12851 }, { "epoch": 0.37522992029429797, "grad_norm": 0.5367392456149558, "learning_rate": 3.4710462287104625e-05, "loss": 0.66, "step": 12852 }, { "epoch": 0.37525911652214533, "grad_norm": 0.5312771381796602, "learning_rate": 3.47088402270884e-05, "loss": 0.6034, "step": 12853 }, { "epoch": 0.3752883127499927, "grad_norm": 0.5391862240519878, "learning_rate": 3.470721816707218e-05, "loss": 0.62, "step": 12854 }, { "epoch": 0.37531750897784005, "grad_norm": 0.5484787185253065, "learning_rate": 3.470559610705596e-05, "loss": 0.6112, "step": 12855 }, { "epoch": 0.3753467052056874, "grad_norm": 0.5599899385713568, "learning_rate": 3.4703974047039745e-05, "loss": 0.6654, "step": 12856 }, { "epoch": 0.3753759014335348, "grad_norm": 0.518104237299244, "learning_rate": 3.470235198702353e-05, "loss": 0.599, "step": 12857 }, { "epoch": 0.37540509766138214, "grad_norm": 0.5652163571016889, "learning_rate": 3.47007299270073e-05, "loss": 0.7155, "step": 12858 }, { "epoch": 0.3754342938892295, "grad_norm": 0.5585406320722222, "learning_rate": 3.4699107866991084e-05, "loss": 0.708, "step": 12859 }, { "epoch": 0.37546349011707686, "grad_norm": 0.5979966138427091, "learning_rate": 3.469748580697486e-05, "loss": 0.731, "step": 12860 }, { "epoch": 0.3754926863449242, "grad_norm": 0.510835397985598, "learning_rate": 3.469586374695864e-05, "loss": 0.6533, "step": 12861 }, { "epoch": 0.3755218825727716, "grad_norm": 0.5436552047039646, "learning_rate": 3.469424168694242e-05, "loss": 0.6726, "step": 12862 }, { "epoch": 0.37555107880061894, "grad_norm": 0.5332019410132958, "learning_rate": 3.46926196269262e-05, "loss": 0.6449, "step": 12863 }, { "epoch": 0.3755802750284663, "grad_norm": 0.4707946931021288, "learning_rate": 3.469099756690998e-05, "loss": 0.4857, "step": 12864 }, { "epoch": 0.37560947125631367, "grad_norm": 0.507842431647149, "learning_rate": 3.4689375506893754e-05, "loss": 0.5992, "step": 12865 }, { "epoch": 0.375638667484161, "grad_norm": 0.5369010203283149, "learning_rate": 3.4687753446877536e-05, "loss": 0.6541, "step": 12866 }, { "epoch": 0.3756678637120084, "grad_norm": 0.5284613148225538, "learning_rate": 3.468613138686132e-05, "loss": 0.634, "step": 12867 }, { "epoch": 0.37569705993985575, "grad_norm": 0.5267205868283447, "learning_rate": 3.468450932684509e-05, "loss": 0.59, "step": 12868 }, { "epoch": 0.3757262561677031, "grad_norm": 0.5147898975181825, "learning_rate": 3.4682887266828874e-05, "loss": 0.6257, "step": 12869 }, { "epoch": 0.37575545239555047, "grad_norm": 0.5202249812811887, "learning_rate": 3.468126520681265e-05, "loss": 0.6202, "step": 12870 }, { "epoch": 0.37578464862339783, "grad_norm": 0.49695760484217855, "learning_rate": 3.467964314679643e-05, "loss": 0.5872, "step": 12871 }, { "epoch": 0.3758138448512452, "grad_norm": 0.5517946070274486, "learning_rate": 3.467802108678021e-05, "loss": 0.6827, "step": 12872 }, { "epoch": 0.37584304107909255, "grad_norm": 0.542743239239378, "learning_rate": 3.467639902676399e-05, "loss": 0.6486, "step": 12873 }, { "epoch": 0.3758722373069399, "grad_norm": 0.5358512814872874, "learning_rate": 3.4674776966747776e-05, "loss": 0.6389, "step": 12874 }, { "epoch": 0.37590143353478733, "grad_norm": 0.5083374624629693, "learning_rate": 3.467315490673155e-05, "loss": 0.5934, "step": 12875 }, { "epoch": 0.3759306297626347, "grad_norm": 0.5187194724676399, "learning_rate": 3.467153284671533e-05, "loss": 0.6701, "step": 12876 }, { "epoch": 0.37595982599048205, "grad_norm": 0.5468791166141919, "learning_rate": 3.4669910786699115e-05, "loss": 0.7098, "step": 12877 }, { "epoch": 0.3759890222183294, "grad_norm": 0.5297380007711757, "learning_rate": 3.466828872668289e-05, "loss": 0.6516, "step": 12878 }, { "epoch": 0.3760182184461768, "grad_norm": 0.5355245458444001, "learning_rate": 3.466666666666667e-05, "loss": 0.6516, "step": 12879 }, { "epoch": 0.37604741467402414, "grad_norm": 0.5996241922933638, "learning_rate": 3.4665044606650446e-05, "loss": 0.7018, "step": 12880 }, { "epoch": 0.3760766109018715, "grad_norm": 0.5481047009523535, "learning_rate": 3.466342254663423e-05, "loss": 0.6748, "step": 12881 }, { "epoch": 0.37610580712971886, "grad_norm": 0.5468507949240703, "learning_rate": 3.466180048661801e-05, "loss": 0.6425, "step": 12882 }, { "epoch": 0.3761350033575662, "grad_norm": 0.493088509300945, "learning_rate": 3.4660178426601785e-05, "loss": 0.5153, "step": 12883 }, { "epoch": 0.3761641995854136, "grad_norm": 0.5477250783742914, "learning_rate": 3.4658556366585567e-05, "loss": 0.6845, "step": 12884 }, { "epoch": 0.37619339581326094, "grad_norm": 0.5145782129409681, "learning_rate": 3.465693430656934e-05, "loss": 0.6087, "step": 12885 }, { "epoch": 0.3762225920411083, "grad_norm": 0.6173884603222528, "learning_rate": 3.465531224655312e-05, "loss": 0.7233, "step": 12886 }, { "epoch": 0.37625178826895567, "grad_norm": 0.48775678399369193, "learning_rate": 3.4653690186536905e-05, "loss": 0.5467, "step": 12887 }, { "epoch": 0.37628098449680303, "grad_norm": 0.5736515078360588, "learning_rate": 3.465206812652068e-05, "loss": 0.6624, "step": 12888 }, { "epoch": 0.3763101807246504, "grad_norm": 0.5360541827749202, "learning_rate": 3.465044606650446e-05, "loss": 0.6325, "step": 12889 }, { "epoch": 0.37633937695249775, "grad_norm": 0.517325906882814, "learning_rate": 3.464882400648824e-05, "loss": 0.5391, "step": 12890 }, { "epoch": 0.3763685731803451, "grad_norm": 0.5345379757143965, "learning_rate": 3.464720194647202e-05, "loss": 0.6448, "step": 12891 }, { "epoch": 0.3763977694081925, "grad_norm": 0.5175116661195435, "learning_rate": 3.46455798864558e-05, "loss": 0.5755, "step": 12892 }, { "epoch": 0.37642696563603983, "grad_norm": 0.537080861170731, "learning_rate": 3.464395782643958e-05, "loss": 0.6569, "step": 12893 }, { "epoch": 0.3764561618638872, "grad_norm": 0.551057137789123, "learning_rate": 3.4642335766423364e-05, "loss": 0.6315, "step": 12894 }, { "epoch": 0.37648535809173456, "grad_norm": 0.5450528017925848, "learning_rate": 3.464071370640714e-05, "loss": 0.6676, "step": 12895 }, { "epoch": 0.3765145543195819, "grad_norm": 0.5534242646226871, "learning_rate": 3.463909164639092e-05, "loss": 0.6473, "step": 12896 }, { "epoch": 0.3765437505474293, "grad_norm": 0.571708055974592, "learning_rate": 3.4637469586374696e-05, "loss": 0.6349, "step": 12897 }, { "epoch": 0.37657294677527664, "grad_norm": 0.5583998610176114, "learning_rate": 3.463584752635848e-05, "loss": 0.6874, "step": 12898 }, { "epoch": 0.376602143003124, "grad_norm": 0.564307031675225, "learning_rate": 3.463422546634226e-05, "loss": 0.6629, "step": 12899 }, { "epoch": 0.37663133923097136, "grad_norm": 0.5177524635338453, "learning_rate": 3.4632603406326034e-05, "loss": 0.5637, "step": 12900 }, { "epoch": 0.3766605354588187, "grad_norm": 0.5155435120121653, "learning_rate": 3.4630981346309816e-05, "loss": 0.6428, "step": 12901 }, { "epoch": 0.3766897316866661, "grad_norm": 0.564476211010864, "learning_rate": 3.46293592862936e-05, "loss": 0.6307, "step": 12902 }, { "epoch": 0.37671892791451345, "grad_norm": 0.5224509528854047, "learning_rate": 3.462773722627737e-05, "loss": 0.605, "step": 12903 }, { "epoch": 0.3767481241423608, "grad_norm": 0.4981007622288226, "learning_rate": 3.4626115166261154e-05, "loss": 0.5754, "step": 12904 }, { "epoch": 0.37677732037020817, "grad_norm": 0.5237353320309581, "learning_rate": 3.462449310624493e-05, "loss": 0.6193, "step": 12905 }, { "epoch": 0.37680651659805553, "grad_norm": 0.6014349355127504, "learning_rate": 3.462287104622871e-05, "loss": 0.6866, "step": 12906 }, { "epoch": 0.3768357128259029, "grad_norm": 0.5168487276387429, "learning_rate": 3.462124898621249e-05, "loss": 0.6107, "step": 12907 }, { "epoch": 0.37686490905375025, "grad_norm": 0.49818963346283107, "learning_rate": 3.461962692619627e-05, "loss": 0.5922, "step": 12908 }, { "epoch": 0.3768941052815976, "grad_norm": 0.5406533866973362, "learning_rate": 3.461800486618005e-05, "loss": 0.6497, "step": 12909 }, { "epoch": 0.376923301509445, "grad_norm": 0.5016601805026393, "learning_rate": 3.4616382806163825e-05, "loss": 0.5421, "step": 12910 }, { "epoch": 0.37695249773729234, "grad_norm": 0.5884189848667327, "learning_rate": 3.4614760746147606e-05, "loss": 0.7745, "step": 12911 }, { "epoch": 0.3769816939651397, "grad_norm": 0.5172683605396917, "learning_rate": 3.461313868613139e-05, "loss": 0.6134, "step": 12912 }, { "epoch": 0.37701089019298706, "grad_norm": 0.5819615603884315, "learning_rate": 3.461151662611517e-05, "loss": 0.7187, "step": 12913 }, { "epoch": 0.3770400864208344, "grad_norm": 0.5252440835864045, "learning_rate": 3.460989456609895e-05, "loss": 0.6332, "step": 12914 }, { "epoch": 0.3770692826486818, "grad_norm": 0.6195290734182517, "learning_rate": 3.460827250608273e-05, "loss": 0.6838, "step": 12915 }, { "epoch": 0.37709847887652914, "grad_norm": 0.5343378874409808, "learning_rate": 3.460665044606651e-05, "loss": 0.6333, "step": 12916 }, { "epoch": 0.3771276751043765, "grad_norm": 0.48504374394126354, "learning_rate": 3.4605028386050283e-05, "loss": 0.5656, "step": 12917 }, { "epoch": 0.37715687133222386, "grad_norm": 0.49826513201886646, "learning_rate": 3.4603406326034065e-05, "loss": 0.59, "step": 12918 }, { "epoch": 0.3771860675600712, "grad_norm": 0.5383457429297736, "learning_rate": 3.460178426601785e-05, "loss": 0.6597, "step": 12919 }, { "epoch": 0.3772152637879186, "grad_norm": 0.5774922918436993, "learning_rate": 3.460016220600162e-05, "loss": 0.7334, "step": 12920 }, { "epoch": 0.37724446001576595, "grad_norm": 0.50818230925049, "learning_rate": 3.4598540145985404e-05, "loss": 0.583, "step": 12921 }, { "epoch": 0.3772736562436133, "grad_norm": 0.5066185154182765, "learning_rate": 3.4596918085969185e-05, "loss": 0.5887, "step": 12922 }, { "epoch": 0.37730285247146067, "grad_norm": 0.5190373586128982, "learning_rate": 3.459529602595296e-05, "loss": 0.6034, "step": 12923 }, { "epoch": 0.37733204869930803, "grad_norm": 0.583298501280664, "learning_rate": 3.459367396593674e-05, "loss": 0.7006, "step": 12924 }, { "epoch": 0.3773612449271554, "grad_norm": 0.5101088340125454, "learning_rate": 3.459205190592052e-05, "loss": 0.5952, "step": 12925 }, { "epoch": 0.37739044115500275, "grad_norm": 0.513902733958708, "learning_rate": 3.45904298459043e-05, "loss": 0.5901, "step": 12926 }, { "epoch": 0.3774196373828501, "grad_norm": 0.7245150039770756, "learning_rate": 3.458880778588808e-05, "loss": 0.756, "step": 12927 }, { "epoch": 0.3774488336106975, "grad_norm": 0.537566188056228, "learning_rate": 3.4587185725871856e-05, "loss": 0.6365, "step": 12928 }, { "epoch": 0.37747802983854484, "grad_norm": 0.5524480425522428, "learning_rate": 3.458556366585564e-05, "loss": 0.621, "step": 12929 }, { "epoch": 0.3775072260663922, "grad_norm": 0.5115493711410098, "learning_rate": 3.458394160583941e-05, "loss": 0.6178, "step": 12930 }, { "epoch": 0.37753642229423956, "grad_norm": 0.5670271256974451, "learning_rate": 3.45823195458232e-05, "loss": 0.7033, "step": 12931 }, { "epoch": 0.3775656185220869, "grad_norm": 0.5177537649472278, "learning_rate": 3.4580697485806976e-05, "loss": 0.5761, "step": 12932 }, { "epoch": 0.3775948147499343, "grad_norm": 0.529349124454366, "learning_rate": 3.457907542579076e-05, "loss": 0.6407, "step": 12933 }, { "epoch": 0.37762401097778164, "grad_norm": 0.49466215938829283, "learning_rate": 3.457745336577454e-05, "loss": 0.6004, "step": 12934 }, { "epoch": 0.37765320720562906, "grad_norm": 0.488188763805222, "learning_rate": 3.4575831305758314e-05, "loss": 0.5368, "step": 12935 }, { "epoch": 0.3776824034334764, "grad_norm": 0.5250629526198737, "learning_rate": 3.4574209245742096e-05, "loss": 0.5972, "step": 12936 }, { "epoch": 0.3777115996613238, "grad_norm": 0.488756784626598, "learning_rate": 3.457258718572587e-05, "loss": 0.5476, "step": 12937 }, { "epoch": 0.37774079588917114, "grad_norm": 0.49697545791210157, "learning_rate": 3.457096512570965e-05, "loss": 0.529, "step": 12938 }, { "epoch": 0.3777699921170185, "grad_norm": 0.5365172266694623, "learning_rate": 3.4569343065693435e-05, "loss": 0.6367, "step": 12939 }, { "epoch": 0.37779918834486587, "grad_norm": 0.563582815583772, "learning_rate": 3.456772100567721e-05, "loss": 0.6996, "step": 12940 }, { "epoch": 0.3778283845727132, "grad_norm": 0.5584136237450048, "learning_rate": 3.456609894566099e-05, "loss": 0.6418, "step": 12941 }, { "epoch": 0.3778575808005606, "grad_norm": 0.5327644268294753, "learning_rate": 3.4564476885644766e-05, "loss": 0.6114, "step": 12942 }, { "epoch": 0.37788677702840795, "grad_norm": 0.5685738011618714, "learning_rate": 3.456285482562855e-05, "loss": 0.6997, "step": 12943 }, { "epoch": 0.3779159732562553, "grad_norm": 0.5430634910043456, "learning_rate": 3.456123276561233e-05, "loss": 0.6175, "step": 12944 }, { "epoch": 0.37794516948410267, "grad_norm": 0.5178267956480443, "learning_rate": 3.4559610705596105e-05, "loss": 0.6362, "step": 12945 }, { "epoch": 0.37797436571195003, "grad_norm": 0.6351361988376681, "learning_rate": 3.455798864557989e-05, "loss": 0.7409, "step": 12946 }, { "epoch": 0.3780035619397974, "grad_norm": 0.5659652075710447, "learning_rate": 3.455636658556367e-05, "loss": 0.6384, "step": 12947 }, { "epoch": 0.37803275816764476, "grad_norm": 0.5104174319668787, "learning_rate": 3.4554744525547443e-05, "loss": 0.5579, "step": 12948 }, { "epoch": 0.3780619543954921, "grad_norm": 0.5639130095634753, "learning_rate": 3.4553122465531225e-05, "loss": 0.648, "step": 12949 }, { "epoch": 0.3780911506233395, "grad_norm": 0.5778354734970579, "learning_rate": 3.455150040551501e-05, "loss": 0.7375, "step": 12950 }, { "epoch": 0.37812034685118684, "grad_norm": 0.6093140451720519, "learning_rate": 3.454987834549879e-05, "loss": 0.7362, "step": 12951 }, { "epoch": 0.3781495430790342, "grad_norm": 0.533606230047255, "learning_rate": 3.4548256285482564e-05, "loss": 0.6323, "step": 12952 }, { "epoch": 0.37817873930688156, "grad_norm": 0.5044138885453554, "learning_rate": 3.4546634225466346e-05, "loss": 0.6079, "step": 12953 }, { "epoch": 0.3782079355347289, "grad_norm": 0.570113896969396, "learning_rate": 3.454501216545013e-05, "loss": 0.6656, "step": 12954 }, { "epoch": 0.3782371317625763, "grad_norm": 0.5493813880234526, "learning_rate": 3.45433901054339e-05, "loss": 0.6777, "step": 12955 }, { "epoch": 0.37826632799042365, "grad_norm": 0.5005160434391518, "learning_rate": 3.4541768045417684e-05, "loss": 0.58, "step": 12956 }, { "epoch": 0.378295524218271, "grad_norm": 0.6083891834335772, "learning_rate": 3.454014598540146e-05, "loss": 0.7439, "step": 12957 }, { "epoch": 0.37832472044611837, "grad_norm": 0.5693965834385565, "learning_rate": 3.453852392538524e-05, "loss": 0.6734, "step": 12958 }, { "epoch": 0.37835391667396573, "grad_norm": 0.5484982860990494, "learning_rate": 3.453690186536902e-05, "loss": 0.6734, "step": 12959 }, { "epoch": 0.3783831129018131, "grad_norm": 0.5859019713831638, "learning_rate": 3.45352798053528e-05, "loss": 0.6809, "step": 12960 }, { "epoch": 0.37841230912966045, "grad_norm": 0.5589001202660414, "learning_rate": 3.453365774533658e-05, "loss": 0.6109, "step": 12961 }, { "epoch": 0.3784415053575078, "grad_norm": 0.5423919415002871, "learning_rate": 3.4532035685320354e-05, "loss": 0.6979, "step": 12962 }, { "epoch": 0.3784707015853552, "grad_norm": 0.5415995988369809, "learning_rate": 3.4530413625304136e-05, "loss": 0.6483, "step": 12963 }, { "epoch": 0.37849989781320253, "grad_norm": 0.5205826368753779, "learning_rate": 3.452879156528792e-05, "loss": 0.5788, "step": 12964 }, { "epoch": 0.3785290940410499, "grad_norm": 0.5373352593298035, "learning_rate": 3.452716950527169e-05, "loss": 0.6571, "step": 12965 }, { "epoch": 0.37855829026889726, "grad_norm": 0.4966435021031169, "learning_rate": 3.4525547445255475e-05, "loss": 0.5582, "step": 12966 }, { "epoch": 0.3785874864967446, "grad_norm": 0.5830248034433853, "learning_rate": 3.4523925385239256e-05, "loss": 0.6715, "step": 12967 }, { "epoch": 0.378616682724592, "grad_norm": 0.5803663774170783, "learning_rate": 3.452230332522304e-05, "loss": 0.6434, "step": 12968 }, { "epoch": 0.37864587895243934, "grad_norm": 0.5156855205409144, "learning_rate": 3.452068126520682e-05, "loss": 0.6075, "step": 12969 }, { "epoch": 0.3786750751802867, "grad_norm": 0.5177424510277605, "learning_rate": 3.4519059205190595e-05, "loss": 0.634, "step": 12970 }, { "epoch": 0.37870427140813406, "grad_norm": 0.5422844063655892, "learning_rate": 3.4517437145174377e-05, "loss": 0.6324, "step": 12971 }, { "epoch": 0.3787334676359814, "grad_norm": 0.5642763821462189, "learning_rate": 3.451581508515815e-05, "loss": 0.7009, "step": 12972 }, { "epoch": 0.3787626638638288, "grad_norm": 0.5384548759967058, "learning_rate": 3.451419302514193e-05, "loss": 0.6173, "step": 12973 }, { "epoch": 0.37879186009167615, "grad_norm": 0.5189518108192854, "learning_rate": 3.4512570965125715e-05, "loss": 0.6043, "step": 12974 }, { "epoch": 0.3788210563195235, "grad_norm": 0.5428033737288763, "learning_rate": 3.451094890510949e-05, "loss": 0.6654, "step": 12975 }, { "epoch": 0.37885025254737087, "grad_norm": 0.51776633071544, "learning_rate": 3.450932684509327e-05, "loss": 0.618, "step": 12976 }, { "epoch": 0.37887944877521823, "grad_norm": 0.5232091972325789, "learning_rate": 3.450770478507705e-05, "loss": 0.6102, "step": 12977 }, { "epoch": 0.3789086450030656, "grad_norm": 0.5304101214468538, "learning_rate": 3.450608272506083e-05, "loss": 0.6599, "step": 12978 }, { "epoch": 0.37893784123091295, "grad_norm": 0.5291689197002274, "learning_rate": 3.450446066504461e-05, "loss": 0.6221, "step": 12979 }, { "epoch": 0.3789670374587603, "grad_norm": 0.5517326632347175, "learning_rate": 3.4502838605028385e-05, "loss": 0.6448, "step": 12980 }, { "epoch": 0.3789962336866077, "grad_norm": 0.5980029629698156, "learning_rate": 3.450121654501217e-05, "loss": 0.6644, "step": 12981 }, { "epoch": 0.37902542991445504, "grad_norm": 0.5849371127202706, "learning_rate": 3.449959448499594e-05, "loss": 0.7303, "step": 12982 }, { "epoch": 0.3790546261423024, "grad_norm": 0.51799368783089, "learning_rate": 3.4497972424979724e-05, "loss": 0.6031, "step": 12983 }, { "epoch": 0.37908382237014976, "grad_norm": 0.5400717260244575, "learning_rate": 3.4496350364963506e-05, "loss": 0.6612, "step": 12984 }, { "epoch": 0.3791130185979971, "grad_norm": 0.6092845142819481, "learning_rate": 3.449472830494728e-05, "loss": 0.7101, "step": 12985 }, { "epoch": 0.3791422148258445, "grad_norm": 0.5622686473479663, "learning_rate": 3.449310624493106e-05, "loss": 0.6671, "step": 12986 }, { "epoch": 0.37917141105369184, "grad_norm": 0.5719901887839149, "learning_rate": 3.4491484184914844e-05, "loss": 0.7258, "step": 12987 }, { "epoch": 0.3792006072815392, "grad_norm": 0.5958504629515982, "learning_rate": 3.4489862124898626e-05, "loss": 0.7024, "step": 12988 }, { "epoch": 0.37922980350938656, "grad_norm": 0.5401330768931849, "learning_rate": 3.448824006488241e-05, "loss": 0.6007, "step": 12989 }, { "epoch": 0.3792589997372339, "grad_norm": 0.4955049728168219, "learning_rate": 3.448661800486618e-05, "loss": 0.5854, "step": 12990 }, { "epoch": 0.3792881959650813, "grad_norm": 0.5702320928717393, "learning_rate": 3.4484995944849964e-05, "loss": 0.6713, "step": 12991 }, { "epoch": 0.37931739219292865, "grad_norm": 0.5038849974218641, "learning_rate": 3.448337388483374e-05, "loss": 0.5569, "step": 12992 }, { "epoch": 0.379346588420776, "grad_norm": 0.5396931598352571, "learning_rate": 3.448175182481752e-05, "loss": 0.6421, "step": 12993 }, { "epoch": 0.37937578464862337, "grad_norm": 0.5637971220026751, "learning_rate": 3.44801297648013e-05, "loss": 0.6743, "step": 12994 }, { "epoch": 0.3794049808764708, "grad_norm": 0.5243419790223603, "learning_rate": 3.447850770478508e-05, "loss": 0.6573, "step": 12995 }, { "epoch": 0.37943417710431815, "grad_norm": 0.560169679089299, "learning_rate": 3.447688564476886e-05, "loss": 0.6564, "step": 12996 }, { "epoch": 0.3794633733321655, "grad_norm": 0.48421923936654265, "learning_rate": 3.4475263584752635e-05, "loss": 0.5354, "step": 12997 }, { "epoch": 0.37949256956001287, "grad_norm": 0.5461735895160329, "learning_rate": 3.4473641524736416e-05, "loss": 0.6155, "step": 12998 }, { "epoch": 0.37952176578786023, "grad_norm": 0.5709076426214748, "learning_rate": 3.44720194647202e-05, "loss": 0.7332, "step": 12999 }, { "epoch": 0.3795509620157076, "grad_norm": 0.5670783186768905, "learning_rate": 3.447039740470397e-05, "loss": 0.7018, "step": 13000 }, { "epoch": 0.37958015824355495, "grad_norm": 0.510092462799295, "learning_rate": 3.4468775344687755e-05, "loss": 0.5811, "step": 13001 }, { "epoch": 0.3796093544714023, "grad_norm": 0.5963605252021746, "learning_rate": 3.446715328467153e-05, "loss": 0.6716, "step": 13002 }, { "epoch": 0.3796385506992497, "grad_norm": 0.5709539310131706, "learning_rate": 3.446553122465531e-05, "loss": 0.6822, "step": 13003 }, { "epoch": 0.37966774692709704, "grad_norm": 0.526243986036564, "learning_rate": 3.4463909164639093e-05, "loss": 0.5876, "step": 13004 }, { "epoch": 0.3796969431549444, "grad_norm": 0.5745502132454762, "learning_rate": 3.446228710462287e-05, "loss": 0.6819, "step": 13005 }, { "epoch": 0.37972613938279176, "grad_norm": 0.5502153016822826, "learning_rate": 3.446066504460666e-05, "loss": 0.663, "step": 13006 }, { "epoch": 0.3797553356106391, "grad_norm": 0.5628383013015041, "learning_rate": 3.445904298459043e-05, "loss": 0.7184, "step": 13007 }, { "epoch": 0.3797845318384865, "grad_norm": 0.5596769628760537, "learning_rate": 3.4457420924574214e-05, "loss": 0.6651, "step": 13008 }, { "epoch": 0.37981372806633384, "grad_norm": 0.4954865582928817, "learning_rate": 3.4455798864557995e-05, "loss": 0.5487, "step": 13009 }, { "epoch": 0.3798429242941812, "grad_norm": 0.49138435897597943, "learning_rate": 3.445417680454177e-05, "loss": 0.5567, "step": 13010 }, { "epoch": 0.37987212052202857, "grad_norm": 0.5084974522623897, "learning_rate": 3.445255474452555e-05, "loss": 0.5816, "step": 13011 }, { "epoch": 0.37990131674987593, "grad_norm": 0.5328334865852699, "learning_rate": 3.445093268450933e-05, "loss": 0.633, "step": 13012 }, { "epoch": 0.3799305129777233, "grad_norm": 0.5665415688786175, "learning_rate": 3.444931062449311e-05, "loss": 0.6384, "step": 13013 }, { "epoch": 0.37995970920557065, "grad_norm": 0.697791464093431, "learning_rate": 3.444768856447689e-05, "loss": 0.7598, "step": 13014 }, { "epoch": 0.379988905433418, "grad_norm": 0.545352028761334, "learning_rate": 3.4446066504460666e-05, "loss": 0.6141, "step": 13015 }, { "epoch": 0.3800181016612654, "grad_norm": 0.5163711521064321, "learning_rate": 3.444444444444445e-05, "loss": 0.606, "step": 13016 }, { "epoch": 0.38004729788911273, "grad_norm": 0.5264811100220861, "learning_rate": 3.444282238442822e-05, "loss": 0.5859, "step": 13017 }, { "epoch": 0.3800764941169601, "grad_norm": 0.5431485189346675, "learning_rate": 3.4441200324412004e-05, "loss": 0.6561, "step": 13018 }, { "epoch": 0.38010569034480746, "grad_norm": 0.582820328389859, "learning_rate": 3.4439578264395786e-05, "loss": 0.6953, "step": 13019 }, { "epoch": 0.3801348865726548, "grad_norm": 0.5655898902338681, "learning_rate": 3.443795620437956e-05, "loss": 0.6481, "step": 13020 }, { "epoch": 0.3801640828005022, "grad_norm": 0.5085452045234581, "learning_rate": 3.443633414436334e-05, "loss": 0.623, "step": 13021 }, { "epoch": 0.38019327902834954, "grad_norm": 0.5839572159258921, "learning_rate": 3.443471208434712e-05, "loss": 0.7173, "step": 13022 }, { "epoch": 0.3802224752561969, "grad_norm": 0.56332576518173, "learning_rate": 3.44330900243309e-05, "loss": 0.7094, "step": 13023 }, { "epoch": 0.38025167148404426, "grad_norm": 0.5028930699271122, "learning_rate": 3.443146796431468e-05, "loss": 0.5939, "step": 13024 }, { "epoch": 0.3802808677118916, "grad_norm": 0.5377704915485797, "learning_rate": 3.442984590429846e-05, "loss": 0.656, "step": 13025 }, { "epoch": 0.380310063939739, "grad_norm": 0.5285789849156233, "learning_rate": 3.4428223844282245e-05, "loss": 0.6665, "step": 13026 }, { "epoch": 0.38033926016758635, "grad_norm": 0.5310145599162215, "learning_rate": 3.442660178426602e-05, "loss": 0.6431, "step": 13027 }, { "epoch": 0.3803684563954337, "grad_norm": 0.5528350803120986, "learning_rate": 3.44249797242498e-05, "loss": 0.6423, "step": 13028 }, { "epoch": 0.38039765262328107, "grad_norm": 0.5476357524999387, "learning_rate": 3.4423357664233576e-05, "loss": 0.6327, "step": 13029 }, { "epoch": 0.38042684885112843, "grad_norm": 0.5152328031458212, "learning_rate": 3.442173560421736e-05, "loss": 0.5673, "step": 13030 }, { "epoch": 0.3804560450789758, "grad_norm": 0.5132602581757908, "learning_rate": 3.442011354420114e-05, "loss": 0.6374, "step": 13031 }, { "epoch": 0.38048524130682315, "grad_norm": 0.5236433471957608, "learning_rate": 3.4418491484184915e-05, "loss": 0.6074, "step": 13032 }, { "epoch": 0.3805144375346705, "grad_norm": 0.5486159913480413, "learning_rate": 3.44168694241687e-05, "loss": 0.6028, "step": 13033 }, { "epoch": 0.3805436337625179, "grad_norm": 0.6025507822919056, "learning_rate": 3.441524736415248e-05, "loss": 0.7226, "step": 13034 }, { "epoch": 0.38057282999036524, "grad_norm": 0.5635004467608804, "learning_rate": 3.4413625304136253e-05, "loss": 0.631, "step": 13035 }, { "epoch": 0.3806020262182126, "grad_norm": 0.5204075406735692, "learning_rate": 3.4412003244120035e-05, "loss": 0.6283, "step": 13036 }, { "epoch": 0.38063122244605996, "grad_norm": 0.5462400050488404, "learning_rate": 3.441038118410381e-05, "loss": 0.6379, "step": 13037 }, { "epoch": 0.3806604186739073, "grad_norm": 0.5480713870321987, "learning_rate": 3.440875912408759e-05, "loss": 0.6722, "step": 13038 }, { "epoch": 0.3806896149017547, "grad_norm": 0.5110819838244697, "learning_rate": 3.4407137064071374e-05, "loss": 0.5487, "step": 13039 }, { "epoch": 0.38071881112960204, "grad_norm": 0.5188989648777083, "learning_rate": 3.440551500405515e-05, "loss": 0.6121, "step": 13040 }, { "epoch": 0.3807480073574494, "grad_norm": 0.5257103019854945, "learning_rate": 3.440389294403893e-05, "loss": 0.6159, "step": 13041 }, { "epoch": 0.38077720358529676, "grad_norm": 0.646312423926654, "learning_rate": 3.4402270884022706e-05, "loss": 0.663, "step": 13042 }, { "epoch": 0.3808063998131441, "grad_norm": 0.5868656205326772, "learning_rate": 3.440064882400649e-05, "loss": 0.6843, "step": 13043 }, { "epoch": 0.3808355960409915, "grad_norm": 0.6006525847810196, "learning_rate": 3.439902676399027e-05, "loss": 0.7203, "step": 13044 }, { "epoch": 0.38086479226883885, "grad_norm": 0.5603647738865065, "learning_rate": 3.439740470397405e-05, "loss": 0.7003, "step": 13045 }, { "epoch": 0.3808939884966862, "grad_norm": 0.5510889025298719, "learning_rate": 3.439578264395783e-05, "loss": 0.6644, "step": 13046 }, { "epoch": 0.38092318472453357, "grad_norm": 0.5435442386189969, "learning_rate": 3.439416058394161e-05, "loss": 0.671, "step": 13047 }, { "epoch": 0.38095238095238093, "grad_norm": 0.5368259251503914, "learning_rate": 3.439253852392539e-05, "loss": 0.6278, "step": 13048 }, { "epoch": 0.3809815771802283, "grad_norm": 0.4996704703025442, "learning_rate": 3.4390916463909164e-05, "loss": 0.552, "step": 13049 }, { "epoch": 0.38101077340807565, "grad_norm": 0.5506007273825831, "learning_rate": 3.4389294403892946e-05, "loss": 0.6826, "step": 13050 }, { "epoch": 0.381039969635923, "grad_norm": 0.49729924725215563, "learning_rate": 3.438767234387673e-05, "loss": 0.547, "step": 13051 }, { "epoch": 0.3810691658637704, "grad_norm": 0.5223133510354002, "learning_rate": 3.43860502838605e-05, "loss": 0.615, "step": 13052 }, { "epoch": 0.38109836209161774, "grad_norm": 0.5677331364597255, "learning_rate": 3.4384428223844285e-05, "loss": 0.6785, "step": 13053 }, { "epoch": 0.3811275583194651, "grad_norm": 0.522974734696195, "learning_rate": 3.4382806163828066e-05, "loss": 0.6691, "step": 13054 }, { "epoch": 0.38115675454731246, "grad_norm": 0.549848456134347, "learning_rate": 3.438118410381184e-05, "loss": 0.6208, "step": 13055 }, { "epoch": 0.3811859507751599, "grad_norm": 0.576228431263146, "learning_rate": 3.437956204379562e-05, "loss": 0.6324, "step": 13056 }, { "epoch": 0.38121514700300724, "grad_norm": 0.513073568764926, "learning_rate": 3.43779399837794e-05, "loss": 0.5861, "step": 13057 }, { "epoch": 0.3812443432308546, "grad_norm": 0.5402432072207433, "learning_rate": 3.437631792376318e-05, "loss": 0.6213, "step": 13058 }, { "epoch": 0.38127353945870196, "grad_norm": 0.5480842397736304, "learning_rate": 3.437469586374696e-05, "loss": 0.6466, "step": 13059 }, { "epoch": 0.3813027356865493, "grad_norm": 0.5880697134289775, "learning_rate": 3.4373073803730737e-05, "loss": 0.6903, "step": 13060 }, { "epoch": 0.3813319319143967, "grad_norm": 0.5820099086302122, "learning_rate": 3.437145174371452e-05, "loss": 0.6219, "step": 13061 }, { "epoch": 0.38136112814224404, "grad_norm": 0.5580164327978047, "learning_rate": 3.436982968369829e-05, "loss": 0.6612, "step": 13062 }, { "epoch": 0.3813903243700914, "grad_norm": 0.49995709767929614, "learning_rate": 3.436820762368208e-05, "loss": 0.5526, "step": 13063 }, { "epoch": 0.38141952059793877, "grad_norm": 0.5651699898113979, "learning_rate": 3.436658556366586e-05, "loss": 0.6957, "step": 13064 }, { "epoch": 0.3814487168257861, "grad_norm": 0.5612803640497973, "learning_rate": 3.436496350364964e-05, "loss": 0.6737, "step": 13065 }, { "epoch": 0.3814779130536335, "grad_norm": 0.5389862449394407, "learning_rate": 3.436334144363342e-05, "loss": 0.5962, "step": 13066 }, { "epoch": 0.38150710928148085, "grad_norm": 0.5641844371425149, "learning_rate": 3.4361719383617195e-05, "loss": 0.6344, "step": 13067 }, { "epoch": 0.3815363055093282, "grad_norm": 0.5424655673491341, "learning_rate": 3.436009732360098e-05, "loss": 0.6533, "step": 13068 }, { "epoch": 0.38156550173717557, "grad_norm": 0.5301067082526594, "learning_rate": 3.435847526358475e-05, "loss": 0.6394, "step": 13069 }, { "epoch": 0.38159469796502293, "grad_norm": 0.5417749151058957, "learning_rate": 3.4356853203568534e-05, "loss": 0.6242, "step": 13070 }, { "epoch": 0.3816238941928703, "grad_norm": 0.5147853256444004, "learning_rate": 3.4355231143552316e-05, "loss": 0.5743, "step": 13071 }, { "epoch": 0.38165309042071766, "grad_norm": 0.5710804039634981, "learning_rate": 3.435360908353609e-05, "loss": 0.6704, "step": 13072 }, { "epoch": 0.381682286648565, "grad_norm": 0.551108493526681, "learning_rate": 3.435198702351987e-05, "loss": 0.6657, "step": 13073 }, { "epoch": 0.3817114828764124, "grad_norm": 0.4931171377217673, "learning_rate": 3.435036496350365e-05, "loss": 0.5842, "step": 13074 }, { "epoch": 0.38174067910425974, "grad_norm": 0.5082107032515107, "learning_rate": 3.434874290348743e-05, "loss": 0.5689, "step": 13075 }, { "epoch": 0.3817698753321071, "grad_norm": 0.532502072604113, "learning_rate": 3.434712084347121e-05, "loss": 0.6529, "step": 13076 }, { "epoch": 0.38179907155995446, "grad_norm": 0.521436832781169, "learning_rate": 3.4345498783454986e-05, "loss": 0.6105, "step": 13077 }, { "epoch": 0.3818282677878018, "grad_norm": 0.5507692041922693, "learning_rate": 3.434387672343877e-05, "loss": 0.6502, "step": 13078 }, { "epoch": 0.3818574640156492, "grad_norm": 0.5701568308881608, "learning_rate": 3.434225466342255e-05, "loss": 0.6328, "step": 13079 }, { "epoch": 0.38188666024349655, "grad_norm": 0.5750180229660656, "learning_rate": 3.4340632603406324e-05, "loss": 0.7365, "step": 13080 }, { "epoch": 0.3819158564713439, "grad_norm": 0.4663593934119921, "learning_rate": 3.4339010543390106e-05, "loss": 0.4994, "step": 13081 }, { "epoch": 0.38194505269919127, "grad_norm": 0.5113681354360561, "learning_rate": 3.433738848337389e-05, "loss": 0.5789, "step": 13082 }, { "epoch": 0.38197424892703863, "grad_norm": 0.5062159154624046, "learning_rate": 3.433576642335767e-05, "loss": 0.6044, "step": 13083 }, { "epoch": 0.382003445154886, "grad_norm": 0.49837569566500933, "learning_rate": 3.4334144363341445e-05, "loss": 0.5756, "step": 13084 }, { "epoch": 0.38203264138273335, "grad_norm": 0.5243406558337738, "learning_rate": 3.4332522303325226e-05, "loss": 0.6113, "step": 13085 }, { "epoch": 0.3820618376105807, "grad_norm": 0.5799356755899869, "learning_rate": 3.433090024330901e-05, "loss": 0.7393, "step": 13086 }, { "epoch": 0.3820910338384281, "grad_norm": 0.5346091889447789, "learning_rate": 3.432927818329278e-05, "loss": 0.59, "step": 13087 }, { "epoch": 0.38212023006627543, "grad_norm": 0.5600205027056331, "learning_rate": 3.4327656123276565e-05, "loss": 0.6549, "step": 13088 }, { "epoch": 0.3821494262941228, "grad_norm": 0.5596531360789875, "learning_rate": 3.432603406326034e-05, "loss": 0.6393, "step": 13089 }, { "epoch": 0.38217862252197016, "grad_norm": 0.5393758754355203, "learning_rate": 3.432441200324412e-05, "loss": 0.62, "step": 13090 }, { "epoch": 0.3822078187498175, "grad_norm": 0.5254775129423328, "learning_rate": 3.4322789943227903e-05, "loss": 0.6194, "step": 13091 }, { "epoch": 0.3822370149776649, "grad_norm": 0.5630083388510858, "learning_rate": 3.432116788321168e-05, "loss": 0.6866, "step": 13092 }, { "epoch": 0.38226621120551224, "grad_norm": 0.5188971902894018, "learning_rate": 3.431954582319546e-05, "loss": 0.6629, "step": 13093 }, { "epoch": 0.3822954074333596, "grad_norm": 0.609879268840745, "learning_rate": 3.4317923763179235e-05, "loss": 0.7316, "step": 13094 }, { "epoch": 0.38232460366120696, "grad_norm": 0.528155916209063, "learning_rate": 3.431630170316302e-05, "loss": 0.6518, "step": 13095 }, { "epoch": 0.3823537998890543, "grad_norm": 0.5207389383037032, "learning_rate": 3.43146796431468e-05, "loss": 0.5967, "step": 13096 }, { "epoch": 0.3823829961169017, "grad_norm": 0.5310472212290243, "learning_rate": 3.4313057583130574e-05, "loss": 0.6287, "step": 13097 }, { "epoch": 0.38241219234474905, "grad_norm": 0.47645762465324665, "learning_rate": 3.4311435523114355e-05, "loss": 0.5574, "step": 13098 }, { "epoch": 0.3824413885725964, "grad_norm": 0.544344865760685, "learning_rate": 3.430981346309814e-05, "loss": 0.6774, "step": 13099 }, { "epoch": 0.38247058480044377, "grad_norm": 0.4951012082197805, "learning_rate": 3.430819140308191e-05, "loss": 0.5498, "step": 13100 }, { "epoch": 0.38249978102829113, "grad_norm": 0.46813789493541125, "learning_rate": 3.43065693430657e-05, "loss": 0.5008, "step": 13101 }, { "epoch": 0.3825289772561385, "grad_norm": 0.5256186234328674, "learning_rate": 3.4304947283049476e-05, "loss": 0.5852, "step": 13102 }, { "epoch": 0.38255817348398585, "grad_norm": 0.5245784351142372, "learning_rate": 3.430332522303326e-05, "loss": 0.5675, "step": 13103 }, { "epoch": 0.3825873697118332, "grad_norm": 0.5746598125955851, "learning_rate": 3.430170316301703e-05, "loss": 0.7085, "step": 13104 }, { "epoch": 0.3826165659396806, "grad_norm": 0.5281805520581556, "learning_rate": 3.4300081103000814e-05, "loss": 0.6651, "step": 13105 }, { "epoch": 0.38264576216752794, "grad_norm": 0.5026325846615739, "learning_rate": 3.4298459042984596e-05, "loss": 0.5514, "step": 13106 }, { "epoch": 0.3826749583953753, "grad_norm": 0.5476634469349824, "learning_rate": 3.429683698296837e-05, "loss": 0.7025, "step": 13107 }, { "epoch": 0.38270415462322266, "grad_norm": 0.5183357526250894, "learning_rate": 3.429521492295215e-05, "loss": 0.6422, "step": 13108 }, { "epoch": 0.38273335085107, "grad_norm": 0.536486673361118, "learning_rate": 3.429359286293593e-05, "loss": 0.6447, "step": 13109 }, { "epoch": 0.3827625470789174, "grad_norm": 0.4843046343434164, "learning_rate": 3.429197080291971e-05, "loss": 0.565, "step": 13110 }, { "epoch": 0.38279174330676474, "grad_norm": 0.43699662443146603, "learning_rate": 3.429034874290349e-05, "loss": 0.4502, "step": 13111 }, { "epoch": 0.3828209395346121, "grad_norm": 0.5358905243510296, "learning_rate": 3.4288726682887266e-05, "loss": 0.6468, "step": 13112 }, { "epoch": 0.38285013576245946, "grad_norm": 0.5442934071507545, "learning_rate": 3.428710462287105e-05, "loss": 0.6289, "step": 13113 }, { "epoch": 0.3828793319903068, "grad_norm": 0.5384015606985798, "learning_rate": 3.428548256285482e-05, "loss": 0.6485, "step": 13114 }, { "epoch": 0.3829085282181542, "grad_norm": 0.6027800336232734, "learning_rate": 3.4283860502838605e-05, "loss": 0.7729, "step": 13115 }, { "epoch": 0.3829377244460016, "grad_norm": 0.5337791795596557, "learning_rate": 3.4282238442822386e-05, "loss": 0.6602, "step": 13116 }, { "epoch": 0.38296692067384897, "grad_norm": 0.5124266955547918, "learning_rate": 3.428061638280616e-05, "loss": 0.6082, "step": 13117 }, { "epoch": 0.3829961169016963, "grad_norm": 0.5180351837656224, "learning_rate": 3.427899432278994e-05, "loss": 0.6228, "step": 13118 }, { "epoch": 0.3830253131295437, "grad_norm": 0.48298415982921183, "learning_rate": 3.4277372262773725e-05, "loss": 0.5375, "step": 13119 }, { "epoch": 0.38305450935739105, "grad_norm": 0.5277958539850599, "learning_rate": 3.427575020275751e-05, "loss": 0.5702, "step": 13120 }, { "epoch": 0.3830837055852384, "grad_norm": 0.5428139262871151, "learning_rate": 3.427412814274129e-05, "loss": 0.6376, "step": 13121 }, { "epoch": 0.38311290181308577, "grad_norm": 0.5569546880450427, "learning_rate": 3.4272506082725064e-05, "loss": 0.6589, "step": 13122 }, { "epoch": 0.38314209804093313, "grad_norm": 0.5556676450945512, "learning_rate": 3.4270884022708845e-05, "loss": 0.6846, "step": 13123 }, { "epoch": 0.3831712942687805, "grad_norm": 0.5688950065659549, "learning_rate": 3.426926196269262e-05, "loss": 0.6787, "step": 13124 }, { "epoch": 0.38320049049662785, "grad_norm": 0.5487776154554467, "learning_rate": 3.42676399026764e-05, "loss": 0.669, "step": 13125 }, { "epoch": 0.3832296867244752, "grad_norm": 0.5609019735218854, "learning_rate": 3.4266017842660184e-05, "loss": 0.6698, "step": 13126 }, { "epoch": 0.3832588829523226, "grad_norm": 0.5182807865195941, "learning_rate": 3.426439578264396e-05, "loss": 0.6199, "step": 13127 }, { "epoch": 0.38328807918016994, "grad_norm": 0.512860253085893, "learning_rate": 3.426277372262774e-05, "loss": 0.5804, "step": 13128 }, { "epoch": 0.3833172754080173, "grad_norm": 0.576555251693127, "learning_rate": 3.4261151662611516e-05, "loss": 0.7208, "step": 13129 }, { "epoch": 0.38334647163586466, "grad_norm": 0.5407547197105629, "learning_rate": 3.42595296025953e-05, "loss": 0.6439, "step": 13130 }, { "epoch": 0.383375667863712, "grad_norm": 0.5006903574001766, "learning_rate": 3.425790754257908e-05, "loss": 0.5797, "step": 13131 }, { "epoch": 0.3834048640915594, "grad_norm": 0.5072309641148824, "learning_rate": 3.4256285482562854e-05, "loss": 0.6061, "step": 13132 }, { "epoch": 0.38343406031940674, "grad_norm": 0.5541493298256955, "learning_rate": 3.4254663422546636e-05, "loss": 0.5785, "step": 13133 }, { "epoch": 0.3834632565472541, "grad_norm": 0.49450797013516623, "learning_rate": 3.425304136253041e-05, "loss": 0.578, "step": 13134 }, { "epoch": 0.38349245277510147, "grad_norm": 0.5724737739195649, "learning_rate": 3.425141930251419e-05, "loss": 0.716, "step": 13135 }, { "epoch": 0.38352164900294883, "grad_norm": 0.5026613107775769, "learning_rate": 3.4249797242497974e-05, "loss": 0.5608, "step": 13136 }, { "epoch": 0.3835508452307962, "grad_norm": 0.5404654883610426, "learning_rate": 3.424817518248175e-05, "loss": 0.6567, "step": 13137 }, { "epoch": 0.38358004145864355, "grad_norm": 0.5419088426947408, "learning_rate": 3.424655312246554e-05, "loss": 0.6611, "step": 13138 }, { "epoch": 0.3836092376864909, "grad_norm": 0.510612723048774, "learning_rate": 3.424493106244931e-05, "loss": 0.5865, "step": 13139 }, { "epoch": 0.3836384339143383, "grad_norm": 0.5327879365706545, "learning_rate": 3.4243309002433095e-05, "loss": 0.6342, "step": 13140 }, { "epoch": 0.38366763014218563, "grad_norm": 0.511127818677626, "learning_rate": 3.424168694241687e-05, "loss": 0.5894, "step": 13141 }, { "epoch": 0.383696826370033, "grad_norm": 0.5306233677385055, "learning_rate": 3.424006488240065e-05, "loss": 0.6099, "step": 13142 }, { "epoch": 0.38372602259788036, "grad_norm": 0.6061029139618211, "learning_rate": 3.423844282238443e-05, "loss": 0.7199, "step": 13143 }, { "epoch": 0.3837552188257277, "grad_norm": 0.4669321780690304, "learning_rate": 3.423682076236821e-05, "loss": 0.4928, "step": 13144 }, { "epoch": 0.3837844150535751, "grad_norm": 0.5388814971473681, "learning_rate": 3.423519870235199e-05, "loss": 0.6651, "step": 13145 }, { "epoch": 0.38381361128142244, "grad_norm": 0.5260851446997541, "learning_rate": 3.423357664233577e-05, "loss": 0.6175, "step": 13146 }, { "epoch": 0.3838428075092698, "grad_norm": 0.5772353003371267, "learning_rate": 3.4231954582319547e-05, "loss": 0.6179, "step": 13147 }, { "epoch": 0.38387200373711716, "grad_norm": 0.5308799967360857, "learning_rate": 3.423033252230333e-05, "loss": 0.5885, "step": 13148 }, { "epoch": 0.3839011999649645, "grad_norm": 0.538356038360135, "learning_rate": 3.42287104622871e-05, "loss": 0.6341, "step": 13149 }, { "epoch": 0.3839303961928119, "grad_norm": 0.5205038019759153, "learning_rate": 3.4227088402270885e-05, "loss": 0.5989, "step": 13150 }, { "epoch": 0.38395959242065925, "grad_norm": 0.5231574503871947, "learning_rate": 3.422546634225467e-05, "loss": 0.6191, "step": 13151 }, { "epoch": 0.3839887886485066, "grad_norm": 0.5392188299793242, "learning_rate": 3.422384428223844e-05, "loss": 0.6955, "step": 13152 }, { "epoch": 0.38401798487635397, "grad_norm": 0.6521645222459765, "learning_rate": 3.4222222222222224e-05, "loss": 0.7348, "step": 13153 }, { "epoch": 0.38404718110420133, "grad_norm": 0.5028665329724735, "learning_rate": 3.4220600162206e-05, "loss": 0.5866, "step": 13154 }, { "epoch": 0.3840763773320487, "grad_norm": 0.5154143706412458, "learning_rate": 3.421897810218978e-05, "loss": 0.572, "step": 13155 }, { "epoch": 0.38410557355989605, "grad_norm": 0.5504843364475532, "learning_rate": 3.421735604217356e-05, "loss": 0.6167, "step": 13156 }, { "epoch": 0.3841347697877434, "grad_norm": 0.5426535069444326, "learning_rate": 3.4215733982157344e-05, "loss": 0.639, "step": 13157 }, { "epoch": 0.3841639660155908, "grad_norm": 0.5368367744011963, "learning_rate": 3.4214111922141126e-05, "loss": 0.6123, "step": 13158 }, { "epoch": 0.38419316224343814, "grad_norm": 0.5163306580706343, "learning_rate": 3.42124898621249e-05, "loss": 0.5841, "step": 13159 }, { "epoch": 0.3842223584712855, "grad_norm": 0.5281678725727269, "learning_rate": 3.421086780210868e-05, "loss": 0.6377, "step": 13160 }, { "epoch": 0.38425155469913286, "grad_norm": 0.5623894991345164, "learning_rate": 3.420924574209246e-05, "loss": 0.7147, "step": 13161 }, { "epoch": 0.3842807509269802, "grad_norm": 0.5415854490225631, "learning_rate": 3.420762368207624e-05, "loss": 0.6131, "step": 13162 }, { "epoch": 0.3843099471548276, "grad_norm": 0.5627979374501237, "learning_rate": 3.420600162206002e-05, "loss": 0.633, "step": 13163 }, { "epoch": 0.38433914338267494, "grad_norm": 0.5471079989968805, "learning_rate": 3.4204379562043796e-05, "loss": 0.6714, "step": 13164 }, { "epoch": 0.3843683396105223, "grad_norm": 0.5401560376246347, "learning_rate": 3.420275750202758e-05, "loss": 0.6366, "step": 13165 }, { "epoch": 0.38439753583836966, "grad_norm": 0.55625572750918, "learning_rate": 3.420113544201136e-05, "loss": 0.6427, "step": 13166 }, { "epoch": 0.384426732066217, "grad_norm": 0.5501885626024031, "learning_rate": 3.4199513381995134e-05, "loss": 0.6755, "step": 13167 }, { "epoch": 0.3844559282940644, "grad_norm": 0.5297042146626242, "learning_rate": 3.4197891321978916e-05, "loss": 0.639, "step": 13168 }, { "epoch": 0.38448512452191175, "grad_norm": 0.7970887261954953, "learning_rate": 3.419626926196269e-05, "loss": 0.6387, "step": 13169 }, { "epoch": 0.3845143207497591, "grad_norm": 0.5452320899968908, "learning_rate": 3.419464720194647e-05, "loss": 0.6565, "step": 13170 }, { "epoch": 0.38454351697760647, "grad_norm": 0.514027156189009, "learning_rate": 3.4193025141930255e-05, "loss": 0.5801, "step": 13171 }, { "epoch": 0.38457271320545383, "grad_norm": 0.5645126207447507, "learning_rate": 3.419140308191403e-05, "loss": 0.657, "step": 13172 }, { "epoch": 0.3846019094333012, "grad_norm": 0.5233834573625973, "learning_rate": 3.418978102189781e-05, "loss": 0.5887, "step": 13173 }, { "epoch": 0.38463110566114855, "grad_norm": 0.5133032511388105, "learning_rate": 3.4188158961881586e-05, "loss": 0.6, "step": 13174 }, { "epoch": 0.3846603018889959, "grad_norm": 0.5061119532173708, "learning_rate": 3.418653690186537e-05, "loss": 0.5948, "step": 13175 }, { "epoch": 0.38468949811684333, "grad_norm": 0.49206252199196376, "learning_rate": 3.418491484184915e-05, "loss": 0.5371, "step": 13176 }, { "epoch": 0.3847186943446907, "grad_norm": 0.5507166610402704, "learning_rate": 3.418329278183293e-05, "loss": 0.6235, "step": 13177 }, { "epoch": 0.38474789057253805, "grad_norm": 0.5026903525597809, "learning_rate": 3.4181670721816713e-05, "loss": 0.5452, "step": 13178 }, { "epoch": 0.3847770868003854, "grad_norm": 0.5527353892233136, "learning_rate": 3.418004866180049e-05, "loss": 0.6496, "step": 13179 }, { "epoch": 0.3848062830282328, "grad_norm": 0.513200392749967, "learning_rate": 3.417842660178427e-05, "loss": 0.6069, "step": 13180 }, { "epoch": 0.38483547925608014, "grad_norm": 0.5457633077262029, "learning_rate": 3.4176804541768045e-05, "loss": 0.6457, "step": 13181 }, { "epoch": 0.3848646754839275, "grad_norm": 0.5088640481889365, "learning_rate": 3.417518248175183e-05, "loss": 0.5851, "step": 13182 }, { "epoch": 0.38489387171177486, "grad_norm": 0.4845208004036246, "learning_rate": 3.417356042173561e-05, "loss": 0.541, "step": 13183 }, { "epoch": 0.3849230679396222, "grad_norm": 0.6067890019700243, "learning_rate": 3.4171938361719384e-05, "loss": 0.7091, "step": 13184 }, { "epoch": 0.3849522641674696, "grad_norm": 0.5082554739849293, "learning_rate": 3.4170316301703165e-05, "loss": 0.5542, "step": 13185 }, { "epoch": 0.38498146039531694, "grad_norm": 0.5254892200016531, "learning_rate": 3.416869424168694e-05, "loss": 0.6011, "step": 13186 }, { "epoch": 0.3850106566231643, "grad_norm": 0.7763454328752712, "learning_rate": 3.416707218167072e-05, "loss": 0.658, "step": 13187 }, { "epoch": 0.38503985285101167, "grad_norm": 0.5179273776593941, "learning_rate": 3.4165450121654504e-05, "loss": 0.5932, "step": 13188 }, { "epoch": 0.385069049078859, "grad_norm": 0.612733145281652, "learning_rate": 3.416382806163828e-05, "loss": 0.6452, "step": 13189 }, { "epoch": 0.3850982453067064, "grad_norm": 0.49200789162445363, "learning_rate": 3.416220600162206e-05, "loss": 0.5713, "step": 13190 }, { "epoch": 0.38512744153455375, "grad_norm": 0.5372379781108261, "learning_rate": 3.416058394160584e-05, "loss": 0.6313, "step": 13191 }, { "epoch": 0.3851566377624011, "grad_norm": 0.532866098784523, "learning_rate": 3.415896188158962e-05, "loss": 0.6719, "step": 13192 }, { "epoch": 0.38518583399024847, "grad_norm": 0.5467714920024963, "learning_rate": 3.41573398215734e-05, "loss": 0.6866, "step": 13193 }, { "epoch": 0.38521503021809583, "grad_norm": 0.5848285693287484, "learning_rate": 3.4155717761557174e-05, "loss": 0.6441, "step": 13194 }, { "epoch": 0.3852442264459432, "grad_norm": 0.510867956783063, "learning_rate": 3.415409570154096e-05, "loss": 0.6091, "step": 13195 }, { "epoch": 0.38527342267379056, "grad_norm": 0.5592387571013355, "learning_rate": 3.415247364152474e-05, "loss": 0.6858, "step": 13196 }, { "epoch": 0.3853026189016379, "grad_norm": 0.49111712762602916, "learning_rate": 3.415085158150852e-05, "loss": 0.5456, "step": 13197 }, { "epoch": 0.3853318151294853, "grad_norm": 0.5282734604142946, "learning_rate": 3.41492295214923e-05, "loss": 0.6076, "step": 13198 }, { "epoch": 0.38536101135733264, "grad_norm": 0.6003983257603338, "learning_rate": 3.4147607461476076e-05, "loss": 0.7316, "step": 13199 }, { "epoch": 0.38539020758518, "grad_norm": 0.5288789093331308, "learning_rate": 3.414598540145986e-05, "loss": 0.6016, "step": 13200 }, { "epoch": 0.38541940381302736, "grad_norm": 0.5440602785324615, "learning_rate": 3.414436334144363e-05, "loss": 0.6581, "step": 13201 }, { "epoch": 0.3854486000408747, "grad_norm": 0.5399932641173499, "learning_rate": 3.4142741281427415e-05, "loss": 0.6495, "step": 13202 }, { "epoch": 0.3854777962687221, "grad_norm": 0.5191051537061607, "learning_rate": 3.4141119221411196e-05, "loss": 0.5933, "step": 13203 }, { "epoch": 0.38550699249656945, "grad_norm": 0.5111238734178333, "learning_rate": 3.413949716139497e-05, "loss": 0.6343, "step": 13204 }, { "epoch": 0.3855361887244168, "grad_norm": 0.5284762613143265, "learning_rate": 3.413787510137875e-05, "loss": 0.6202, "step": 13205 }, { "epoch": 0.38556538495226417, "grad_norm": 0.4988247393498885, "learning_rate": 3.413625304136253e-05, "loss": 0.5648, "step": 13206 }, { "epoch": 0.38559458118011153, "grad_norm": 0.5157229746551605, "learning_rate": 3.413463098134631e-05, "loss": 0.6083, "step": 13207 }, { "epoch": 0.3856237774079589, "grad_norm": 0.5720588440443256, "learning_rate": 3.413300892133009e-05, "loss": 0.5671, "step": 13208 }, { "epoch": 0.38565297363580625, "grad_norm": 0.5153385470703836, "learning_rate": 3.413138686131387e-05, "loss": 0.5853, "step": 13209 }, { "epoch": 0.3856821698636536, "grad_norm": 0.532691246137109, "learning_rate": 3.412976480129765e-05, "loss": 0.6154, "step": 13210 }, { "epoch": 0.385711366091501, "grad_norm": 0.5238104081443733, "learning_rate": 3.412814274128143e-05, "loss": 0.6398, "step": 13211 }, { "epoch": 0.38574056231934833, "grad_norm": 0.5490021304959604, "learning_rate": 3.4126520681265205e-05, "loss": 0.6764, "step": 13212 }, { "epoch": 0.3857697585471957, "grad_norm": 0.5688339320068704, "learning_rate": 3.412489862124899e-05, "loss": 0.7019, "step": 13213 }, { "epoch": 0.38579895477504306, "grad_norm": 0.6800634148236608, "learning_rate": 3.412327656123277e-05, "loss": 0.7248, "step": 13214 }, { "epoch": 0.3858281510028904, "grad_norm": 0.5362859061217601, "learning_rate": 3.412165450121655e-05, "loss": 0.6442, "step": 13215 }, { "epoch": 0.3858573472307378, "grad_norm": 0.5252497168925453, "learning_rate": 3.4120032441200326e-05, "loss": 0.6443, "step": 13216 }, { "epoch": 0.38588654345858514, "grad_norm": 0.5044856943088087, "learning_rate": 3.411841038118411e-05, "loss": 0.548, "step": 13217 }, { "epoch": 0.3859157396864325, "grad_norm": 0.5222100943531747, "learning_rate": 3.411678832116789e-05, "loss": 0.5992, "step": 13218 }, { "epoch": 0.38594493591427986, "grad_norm": 0.5793179927341467, "learning_rate": 3.4115166261151664e-05, "loss": 0.7215, "step": 13219 }, { "epoch": 0.3859741321421272, "grad_norm": 0.5333087923755859, "learning_rate": 3.4113544201135446e-05, "loss": 0.6402, "step": 13220 }, { "epoch": 0.3860033283699746, "grad_norm": 0.4853538945745611, "learning_rate": 3.411192214111922e-05, "loss": 0.5163, "step": 13221 }, { "epoch": 0.38603252459782195, "grad_norm": 0.5468354147175265, "learning_rate": 3.4110300081103e-05, "loss": 0.7172, "step": 13222 }, { "epoch": 0.3860617208256693, "grad_norm": 0.5509631977926003, "learning_rate": 3.4108678021086784e-05, "loss": 0.6684, "step": 13223 }, { "epoch": 0.38609091705351667, "grad_norm": 0.5787068851421286, "learning_rate": 3.410705596107056e-05, "loss": 0.683, "step": 13224 }, { "epoch": 0.38612011328136403, "grad_norm": 0.4986755879321201, "learning_rate": 3.410543390105434e-05, "loss": 0.5727, "step": 13225 }, { "epoch": 0.3861493095092114, "grad_norm": 0.5324501354560679, "learning_rate": 3.4103811841038116e-05, "loss": 0.682, "step": 13226 }, { "epoch": 0.38617850573705875, "grad_norm": 0.5431793909363696, "learning_rate": 3.41021897810219e-05, "loss": 0.622, "step": 13227 }, { "epoch": 0.3862077019649061, "grad_norm": 0.5106421466641211, "learning_rate": 3.410056772100568e-05, "loss": 0.5775, "step": 13228 }, { "epoch": 0.3862368981927535, "grad_norm": 0.533138135974333, "learning_rate": 3.4098945660989455e-05, "loss": 0.6641, "step": 13229 }, { "epoch": 0.38626609442060084, "grad_norm": 0.5139399648668808, "learning_rate": 3.4097323600973236e-05, "loss": 0.5669, "step": 13230 }, { "epoch": 0.3862952906484482, "grad_norm": 0.5017838491657318, "learning_rate": 3.409570154095701e-05, "loss": 0.5912, "step": 13231 }, { "epoch": 0.38632448687629556, "grad_norm": 0.5564291798838703, "learning_rate": 3.409407948094079e-05, "loss": 0.6197, "step": 13232 }, { "epoch": 0.3863536831041429, "grad_norm": 0.54677360165996, "learning_rate": 3.409245742092458e-05, "loss": 0.6519, "step": 13233 }, { "epoch": 0.3863828793319903, "grad_norm": 0.5704154152894981, "learning_rate": 3.4090835360908357e-05, "loss": 0.6456, "step": 13234 }, { "epoch": 0.38641207555983764, "grad_norm": 0.511370156758807, "learning_rate": 3.408921330089214e-05, "loss": 0.6307, "step": 13235 }, { "epoch": 0.386441271787685, "grad_norm": 0.5247298947923279, "learning_rate": 3.408759124087591e-05, "loss": 0.5698, "step": 13236 }, { "epoch": 0.3864704680155324, "grad_norm": 0.5100349469562595, "learning_rate": 3.4085969180859695e-05, "loss": 0.5692, "step": 13237 }, { "epoch": 0.3864996642433798, "grad_norm": 0.5441341102860875, "learning_rate": 3.408434712084348e-05, "loss": 0.6418, "step": 13238 }, { "epoch": 0.38652886047122714, "grad_norm": 0.48461504270597583, "learning_rate": 3.408272506082725e-05, "loss": 0.5476, "step": 13239 }, { "epoch": 0.3865580566990745, "grad_norm": 0.5530738963589046, "learning_rate": 3.4081103000811034e-05, "loss": 0.6651, "step": 13240 }, { "epoch": 0.38658725292692186, "grad_norm": 0.561816553824431, "learning_rate": 3.407948094079481e-05, "loss": 0.6368, "step": 13241 }, { "epoch": 0.3866164491547692, "grad_norm": 0.5893122342657826, "learning_rate": 3.407785888077859e-05, "loss": 0.691, "step": 13242 }, { "epoch": 0.3866456453826166, "grad_norm": 0.5703501936827038, "learning_rate": 3.407623682076237e-05, "loss": 0.6843, "step": 13243 }, { "epoch": 0.38667484161046395, "grad_norm": 0.535818051249795, "learning_rate": 3.407461476074615e-05, "loss": 0.6122, "step": 13244 }, { "epoch": 0.3867040378383113, "grad_norm": 0.542159393246046, "learning_rate": 3.407299270072993e-05, "loss": 0.6265, "step": 13245 }, { "epoch": 0.38673323406615867, "grad_norm": 0.5236918932493552, "learning_rate": 3.4071370640713704e-05, "loss": 0.5979, "step": 13246 }, { "epoch": 0.38676243029400603, "grad_norm": 0.5833751283716022, "learning_rate": 3.4069748580697486e-05, "loss": 0.7694, "step": 13247 }, { "epoch": 0.3867916265218534, "grad_norm": 0.5634549583657327, "learning_rate": 3.406812652068127e-05, "loss": 0.6693, "step": 13248 }, { "epoch": 0.38682082274970075, "grad_norm": 0.5473526205307729, "learning_rate": 3.406650446066504e-05, "loss": 0.5689, "step": 13249 }, { "epoch": 0.3868500189775481, "grad_norm": 0.5129646866243122, "learning_rate": 3.4064882400648824e-05, "loss": 0.5889, "step": 13250 }, { "epoch": 0.3868792152053955, "grad_norm": 0.5307477729659041, "learning_rate": 3.40632603406326e-05, "loss": 0.6486, "step": 13251 }, { "epoch": 0.38690841143324284, "grad_norm": 0.5643832805309136, "learning_rate": 3.406163828061639e-05, "loss": 0.6662, "step": 13252 }, { "epoch": 0.3869376076610902, "grad_norm": 0.5606654005868023, "learning_rate": 3.406001622060017e-05, "loss": 0.6249, "step": 13253 }, { "epoch": 0.38696680388893756, "grad_norm": 0.5100896899103778, "learning_rate": 3.4058394160583944e-05, "loss": 0.6086, "step": 13254 }, { "epoch": 0.3869960001167849, "grad_norm": 0.5139270913503017, "learning_rate": 3.4056772100567726e-05, "loss": 0.5929, "step": 13255 }, { "epoch": 0.3870251963446323, "grad_norm": 0.532838913977726, "learning_rate": 3.40551500405515e-05, "loss": 0.5995, "step": 13256 }, { "epoch": 0.38705439257247964, "grad_norm": 0.6026319076116524, "learning_rate": 3.405352798053528e-05, "loss": 0.7245, "step": 13257 }, { "epoch": 0.387083588800327, "grad_norm": 0.5372572832538972, "learning_rate": 3.4051905920519065e-05, "loss": 0.6425, "step": 13258 }, { "epoch": 0.38711278502817437, "grad_norm": 0.529537279564923, "learning_rate": 3.405028386050284e-05, "loss": 0.6212, "step": 13259 }, { "epoch": 0.3871419812560217, "grad_norm": 0.5708623376338854, "learning_rate": 3.404866180048662e-05, "loss": 0.6751, "step": 13260 }, { "epoch": 0.3871711774838691, "grad_norm": 0.5207412325858722, "learning_rate": 3.4047039740470396e-05, "loss": 0.6194, "step": 13261 }, { "epoch": 0.38720037371171645, "grad_norm": 0.5656943585251057, "learning_rate": 3.404541768045418e-05, "loss": 0.6275, "step": 13262 }, { "epoch": 0.3872295699395638, "grad_norm": 0.6342782024042416, "learning_rate": 3.404379562043796e-05, "loss": 0.713, "step": 13263 }, { "epoch": 0.3872587661674112, "grad_norm": 0.55294595995194, "learning_rate": 3.4042173560421735e-05, "loss": 0.5842, "step": 13264 }, { "epoch": 0.38728796239525853, "grad_norm": 0.5581887598904559, "learning_rate": 3.404055150040552e-05, "loss": 0.6671, "step": 13265 }, { "epoch": 0.3873171586231059, "grad_norm": 0.5333882285124877, "learning_rate": 3.403892944038929e-05, "loss": 0.614, "step": 13266 }, { "epoch": 0.38734635485095326, "grad_norm": 0.4888485492827698, "learning_rate": 3.4037307380373073e-05, "loss": 0.5327, "step": 13267 }, { "epoch": 0.3873755510788006, "grad_norm": 0.5582876244020476, "learning_rate": 3.4035685320356855e-05, "loss": 0.6885, "step": 13268 }, { "epoch": 0.387404747306648, "grad_norm": 0.5211762427168861, "learning_rate": 3.403406326034063e-05, "loss": 0.5994, "step": 13269 }, { "epoch": 0.38743394353449534, "grad_norm": 0.5218219999358839, "learning_rate": 3.403244120032442e-05, "loss": 0.5991, "step": 13270 }, { "epoch": 0.3874631397623427, "grad_norm": 0.5763012125136701, "learning_rate": 3.4030819140308194e-05, "loss": 0.7226, "step": 13271 }, { "epoch": 0.38749233599019006, "grad_norm": 0.5143297682461497, "learning_rate": 3.4029197080291975e-05, "loss": 0.5712, "step": 13272 }, { "epoch": 0.3875215322180374, "grad_norm": 0.48902573607239486, "learning_rate": 3.402757502027575e-05, "loss": 0.56, "step": 13273 }, { "epoch": 0.3875507284458848, "grad_norm": 0.5667021525986832, "learning_rate": 3.402595296025953e-05, "loss": 0.6401, "step": 13274 }, { "epoch": 0.38757992467373215, "grad_norm": 0.5912745375346533, "learning_rate": 3.4024330900243314e-05, "loss": 0.6954, "step": 13275 }, { "epoch": 0.3876091209015795, "grad_norm": 0.5153831323419967, "learning_rate": 3.402270884022709e-05, "loss": 0.561, "step": 13276 }, { "epoch": 0.38763831712942687, "grad_norm": 0.5201778694261706, "learning_rate": 3.402108678021087e-05, "loss": 0.5778, "step": 13277 }, { "epoch": 0.38766751335727423, "grad_norm": 0.5248290799143236, "learning_rate": 3.401946472019465e-05, "loss": 0.6368, "step": 13278 }, { "epoch": 0.3876967095851216, "grad_norm": 0.602124548829867, "learning_rate": 3.401784266017843e-05, "loss": 0.7367, "step": 13279 }, { "epoch": 0.38772590581296895, "grad_norm": 0.6384527611803698, "learning_rate": 3.401622060016221e-05, "loss": 0.7353, "step": 13280 }, { "epoch": 0.3877551020408163, "grad_norm": 0.5761099439234155, "learning_rate": 3.4014598540145984e-05, "loss": 0.7228, "step": 13281 }, { "epoch": 0.3877842982686637, "grad_norm": 0.49440694043320327, "learning_rate": 3.4012976480129766e-05, "loss": 0.5638, "step": 13282 }, { "epoch": 0.38781349449651104, "grad_norm": 0.5547795726304481, "learning_rate": 3.401135442011355e-05, "loss": 0.6845, "step": 13283 }, { "epoch": 0.3878426907243584, "grad_norm": 0.5621546363681766, "learning_rate": 3.400973236009732e-05, "loss": 0.7035, "step": 13284 }, { "epoch": 0.38787188695220576, "grad_norm": 0.5134550548293891, "learning_rate": 3.4008110300081104e-05, "loss": 0.578, "step": 13285 }, { "epoch": 0.3879010831800531, "grad_norm": 0.49177343583394534, "learning_rate": 3.400648824006488e-05, "loss": 0.5563, "step": 13286 }, { "epoch": 0.3879302794079005, "grad_norm": 0.5325196321509544, "learning_rate": 3.400486618004866e-05, "loss": 0.632, "step": 13287 }, { "epoch": 0.38795947563574784, "grad_norm": 0.5178588997613397, "learning_rate": 3.400324412003244e-05, "loss": 0.5874, "step": 13288 }, { "epoch": 0.3879886718635952, "grad_norm": 0.5350299005414579, "learning_rate": 3.4001622060016225e-05, "loss": 0.6244, "step": 13289 }, { "epoch": 0.38801786809144256, "grad_norm": 0.4891564175605552, "learning_rate": 3.4000000000000007e-05, "loss": 0.5165, "step": 13290 }, { "epoch": 0.3880470643192899, "grad_norm": 0.5630641201994554, "learning_rate": 3.399837793998378e-05, "loss": 0.6952, "step": 13291 }, { "epoch": 0.3880762605471373, "grad_norm": 0.5533712514534416, "learning_rate": 3.399675587996756e-05, "loss": 0.629, "step": 13292 }, { "epoch": 0.38810545677498465, "grad_norm": 0.5362985693342555, "learning_rate": 3.399513381995134e-05, "loss": 0.6466, "step": 13293 }, { "epoch": 0.388134653002832, "grad_norm": 0.5467641173071404, "learning_rate": 3.399351175993512e-05, "loss": 0.6385, "step": 13294 }, { "epoch": 0.38816384923067937, "grad_norm": 0.5692182254134187, "learning_rate": 3.39918896999189e-05, "loss": 0.7064, "step": 13295 }, { "epoch": 0.38819304545852673, "grad_norm": 0.5485502789523975, "learning_rate": 3.399026763990268e-05, "loss": 0.6481, "step": 13296 }, { "epoch": 0.38822224168637415, "grad_norm": 0.5402616341667185, "learning_rate": 3.398864557988646e-05, "loss": 0.6315, "step": 13297 }, { "epoch": 0.3882514379142215, "grad_norm": 0.5176471946058949, "learning_rate": 3.398702351987024e-05, "loss": 0.6045, "step": 13298 }, { "epoch": 0.38828063414206887, "grad_norm": 0.5142902990386321, "learning_rate": 3.3985401459854015e-05, "loss": 0.5639, "step": 13299 }, { "epoch": 0.38830983036991623, "grad_norm": 0.6288092340922996, "learning_rate": 3.39837793998378e-05, "loss": 0.6217, "step": 13300 }, { "epoch": 0.3883390265977636, "grad_norm": 0.528762668631425, "learning_rate": 3.398215733982157e-05, "loss": 0.6, "step": 13301 }, { "epoch": 0.38836822282561095, "grad_norm": 0.5129051778383712, "learning_rate": 3.3980535279805354e-05, "loss": 0.5507, "step": 13302 }, { "epoch": 0.3883974190534583, "grad_norm": 0.520938966356661, "learning_rate": 3.3978913219789136e-05, "loss": 0.5458, "step": 13303 }, { "epoch": 0.3884266152813057, "grad_norm": 0.5341175678920067, "learning_rate": 3.397729115977291e-05, "loss": 0.6364, "step": 13304 }, { "epoch": 0.38845581150915304, "grad_norm": 0.5112499262214265, "learning_rate": 3.397566909975669e-05, "loss": 0.524, "step": 13305 }, { "epoch": 0.3884850077370004, "grad_norm": 0.5607696094351929, "learning_rate": 3.397404703974047e-05, "loss": 0.641, "step": 13306 }, { "epoch": 0.38851420396484776, "grad_norm": 0.6314231651106562, "learning_rate": 3.397242497972425e-05, "loss": 0.7113, "step": 13307 }, { "epoch": 0.3885434001926951, "grad_norm": 0.5080048144822225, "learning_rate": 3.397080291970803e-05, "loss": 0.5997, "step": 13308 }, { "epoch": 0.3885725964205425, "grad_norm": 0.5288516977310521, "learning_rate": 3.396918085969181e-05, "loss": 0.6006, "step": 13309 }, { "epoch": 0.38860179264838984, "grad_norm": 0.5220024732691773, "learning_rate": 3.3967558799675594e-05, "loss": 0.6533, "step": 13310 }, { "epoch": 0.3886309888762372, "grad_norm": 0.49020879042889787, "learning_rate": 3.396593673965937e-05, "loss": 0.5403, "step": 13311 }, { "epoch": 0.38866018510408457, "grad_norm": 0.5609916220683552, "learning_rate": 3.396431467964315e-05, "loss": 0.7048, "step": 13312 }, { "epoch": 0.3886893813319319, "grad_norm": 0.5574929295698523, "learning_rate": 3.3962692619626926e-05, "loss": 0.6964, "step": 13313 }, { "epoch": 0.3887185775597793, "grad_norm": 0.5047355117764768, "learning_rate": 3.396107055961071e-05, "loss": 0.5544, "step": 13314 }, { "epoch": 0.38874777378762665, "grad_norm": 0.544587010216972, "learning_rate": 3.395944849959449e-05, "loss": 0.6939, "step": 13315 }, { "epoch": 0.388776970015474, "grad_norm": 0.5462609859827564, "learning_rate": 3.3957826439578265e-05, "loss": 0.645, "step": 13316 }, { "epoch": 0.38880616624332137, "grad_norm": 0.534720600671827, "learning_rate": 3.3956204379562046e-05, "loss": 0.6674, "step": 13317 }, { "epoch": 0.38883536247116873, "grad_norm": 0.5449321003881717, "learning_rate": 3.395458231954582e-05, "loss": 0.6545, "step": 13318 }, { "epoch": 0.3888645586990161, "grad_norm": 0.5300651649235163, "learning_rate": 3.39529602595296e-05, "loss": 0.654, "step": 13319 }, { "epoch": 0.38889375492686346, "grad_norm": 0.5315339148768071, "learning_rate": 3.3951338199513385e-05, "loss": 0.6296, "step": 13320 }, { "epoch": 0.3889229511547108, "grad_norm": 0.5360787780666484, "learning_rate": 3.394971613949716e-05, "loss": 0.6224, "step": 13321 }, { "epoch": 0.3889521473825582, "grad_norm": 0.5331018329681547, "learning_rate": 3.394809407948094e-05, "loss": 0.6168, "step": 13322 }, { "epoch": 0.38898134361040554, "grad_norm": 0.5406379524121463, "learning_rate": 3.394647201946472e-05, "loss": 0.6481, "step": 13323 }, { "epoch": 0.3890105398382529, "grad_norm": 0.5624204344682213, "learning_rate": 3.39448499594485e-05, "loss": 0.6755, "step": 13324 }, { "epoch": 0.38903973606610026, "grad_norm": 0.5643842319419247, "learning_rate": 3.394322789943228e-05, "loss": 0.6852, "step": 13325 }, { "epoch": 0.3890689322939476, "grad_norm": 0.5002661368727953, "learning_rate": 3.3941605839416055e-05, "loss": 0.5952, "step": 13326 }, { "epoch": 0.389098128521795, "grad_norm": 0.5258074095553477, "learning_rate": 3.3939983779399844e-05, "loss": 0.5626, "step": 13327 }, { "epoch": 0.38912732474964234, "grad_norm": 0.5589500513584776, "learning_rate": 3.393836171938362e-05, "loss": 0.6004, "step": 13328 }, { "epoch": 0.3891565209774897, "grad_norm": 0.5316858896691029, "learning_rate": 3.39367396593674e-05, "loss": 0.6543, "step": 13329 }, { "epoch": 0.38918571720533707, "grad_norm": 0.5902987341606025, "learning_rate": 3.393511759935118e-05, "loss": 0.735, "step": 13330 }, { "epoch": 0.38921491343318443, "grad_norm": 0.5465301466786073, "learning_rate": 3.393349553933496e-05, "loss": 0.6273, "step": 13331 }, { "epoch": 0.3892441096610318, "grad_norm": 0.5262937696437083, "learning_rate": 3.393187347931874e-05, "loss": 0.6751, "step": 13332 }, { "epoch": 0.38927330588887915, "grad_norm": 0.5281960822322989, "learning_rate": 3.3930251419302514e-05, "loss": 0.6614, "step": 13333 }, { "epoch": 0.3893025021167265, "grad_norm": 0.5099099375630322, "learning_rate": 3.3928629359286296e-05, "loss": 0.6036, "step": 13334 }, { "epoch": 0.3893316983445739, "grad_norm": 0.5198168023460212, "learning_rate": 3.392700729927008e-05, "loss": 0.6297, "step": 13335 }, { "epoch": 0.38936089457242123, "grad_norm": 0.5780336216426212, "learning_rate": 3.392538523925385e-05, "loss": 0.7356, "step": 13336 }, { "epoch": 0.3893900908002686, "grad_norm": 0.5452738930187543, "learning_rate": 3.3923763179237634e-05, "loss": 0.6503, "step": 13337 }, { "epoch": 0.38941928702811596, "grad_norm": 0.5443710406834797, "learning_rate": 3.392214111922141e-05, "loss": 0.6502, "step": 13338 }, { "epoch": 0.3894484832559633, "grad_norm": 0.5848143771640024, "learning_rate": 3.392051905920519e-05, "loss": 0.7101, "step": 13339 }, { "epoch": 0.3894776794838107, "grad_norm": 0.5514977754731752, "learning_rate": 3.391889699918897e-05, "loss": 0.6682, "step": 13340 }, { "epoch": 0.38950687571165804, "grad_norm": 0.5130360039661597, "learning_rate": 3.391727493917275e-05, "loss": 0.6274, "step": 13341 }, { "epoch": 0.3895360719395054, "grad_norm": 0.4850861762505791, "learning_rate": 3.391565287915653e-05, "loss": 0.5542, "step": 13342 }, { "epoch": 0.38956526816735276, "grad_norm": 0.5293173871266671, "learning_rate": 3.391403081914031e-05, "loss": 0.6452, "step": 13343 }, { "epoch": 0.3895944643952001, "grad_norm": 0.5139875388305193, "learning_rate": 3.3912408759124086e-05, "loss": 0.6509, "step": 13344 }, { "epoch": 0.3896236606230475, "grad_norm": 0.5143534870261088, "learning_rate": 3.391078669910787e-05, "loss": 0.601, "step": 13345 }, { "epoch": 0.38965285685089485, "grad_norm": 0.5177878006964083, "learning_rate": 3.390916463909165e-05, "loss": 0.632, "step": 13346 }, { "epoch": 0.3896820530787422, "grad_norm": 0.5451337649589827, "learning_rate": 3.390754257907543e-05, "loss": 0.6073, "step": 13347 }, { "epoch": 0.38971124930658957, "grad_norm": 0.5370868004119486, "learning_rate": 3.3905920519059206e-05, "loss": 0.632, "step": 13348 }, { "epoch": 0.38974044553443693, "grad_norm": 0.500054308053791, "learning_rate": 3.390429845904299e-05, "loss": 0.6026, "step": 13349 }, { "epoch": 0.3897696417622843, "grad_norm": 0.5347178191558689, "learning_rate": 3.390267639902677e-05, "loss": 0.6164, "step": 13350 }, { "epoch": 0.38979883799013165, "grad_norm": 0.5257454305437234, "learning_rate": 3.3901054339010545e-05, "loss": 0.6312, "step": 13351 }, { "epoch": 0.389828034217979, "grad_norm": 0.5112219031866604, "learning_rate": 3.389943227899433e-05, "loss": 0.6182, "step": 13352 }, { "epoch": 0.3898572304458264, "grad_norm": 0.5156661422576583, "learning_rate": 3.38978102189781e-05, "loss": 0.5853, "step": 13353 }, { "epoch": 0.38988642667367374, "grad_norm": 0.5049207584095144, "learning_rate": 3.3896188158961883e-05, "loss": 0.5595, "step": 13354 }, { "epoch": 0.3899156229015211, "grad_norm": 0.5181160271588476, "learning_rate": 3.3894566098945665e-05, "loss": 0.631, "step": 13355 }, { "epoch": 0.38994481912936846, "grad_norm": 0.5325793239934127, "learning_rate": 3.389294403892944e-05, "loss": 0.652, "step": 13356 }, { "epoch": 0.3899740153572159, "grad_norm": 0.5110746832465511, "learning_rate": 3.389132197891322e-05, "loss": 0.5842, "step": 13357 }, { "epoch": 0.39000321158506324, "grad_norm": 0.5159260940490046, "learning_rate": 3.3889699918897e-05, "loss": 0.5821, "step": 13358 }, { "epoch": 0.3900324078129106, "grad_norm": 0.5481943857233982, "learning_rate": 3.388807785888078e-05, "loss": 0.6351, "step": 13359 }, { "epoch": 0.39006160404075796, "grad_norm": 0.5598369204333393, "learning_rate": 3.388645579886456e-05, "loss": 0.6017, "step": 13360 }, { "epoch": 0.3900908002686053, "grad_norm": 0.593161022069191, "learning_rate": 3.3884833738848335e-05, "loss": 0.6658, "step": 13361 }, { "epoch": 0.3901199964964527, "grad_norm": 0.5261767483106871, "learning_rate": 3.388321167883212e-05, "loss": 0.6064, "step": 13362 }, { "epoch": 0.39014919272430004, "grad_norm": 0.5433256603662604, "learning_rate": 3.388158961881589e-05, "loss": 0.6228, "step": 13363 }, { "epoch": 0.3901783889521474, "grad_norm": 0.5185715429900055, "learning_rate": 3.3879967558799674e-05, "loss": 0.6094, "step": 13364 }, { "epoch": 0.39020758517999476, "grad_norm": 0.5794467598461857, "learning_rate": 3.387834549878346e-05, "loss": 0.7067, "step": 13365 }, { "epoch": 0.3902367814078421, "grad_norm": 0.5548202840747408, "learning_rate": 3.387672343876724e-05, "loss": 0.6103, "step": 13366 }, { "epoch": 0.3902659776356895, "grad_norm": 0.519484920883444, "learning_rate": 3.387510137875102e-05, "loss": 0.6214, "step": 13367 }, { "epoch": 0.39029517386353685, "grad_norm": 0.514110036489632, "learning_rate": 3.3873479318734794e-05, "loss": 0.6169, "step": 13368 }, { "epoch": 0.3903243700913842, "grad_norm": 0.5048213542989125, "learning_rate": 3.3871857258718576e-05, "loss": 0.5814, "step": 13369 }, { "epoch": 0.39035356631923157, "grad_norm": 0.5526725951064577, "learning_rate": 3.387023519870236e-05, "loss": 0.686, "step": 13370 }, { "epoch": 0.39038276254707893, "grad_norm": 0.5191844013336885, "learning_rate": 3.386861313868613e-05, "loss": 0.586, "step": 13371 }, { "epoch": 0.3904119587749263, "grad_norm": 0.5534833789522084, "learning_rate": 3.3866991078669914e-05, "loss": 0.6598, "step": 13372 }, { "epoch": 0.39044115500277365, "grad_norm": 0.5835616045215232, "learning_rate": 3.386536901865369e-05, "loss": 0.7294, "step": 13373 }, { "epoch": 0.390470351230621, "grad_norm": 0.5257970229823025, "learning_rate": 3.386374695863747e-05, "loss": 0.6186, "step": 13374 }, { "epoch": 0.3904995474584684, "grad_norm": 0.5230061654520658, "learning_rate": 3.386212489862125e-05, "loss": 0.6429, "step": 13375 }, { "epoch": 0.39052874368631574, "grad_norm": 0.5207630417583733, "learning_rate": 3.386050283860503e-05, "loss": 0.6173, "step": 13376 }, { "epoch": 0.3905579399141631, "grad_norm": 0.5679428884812598, "learning_rate": 3.385888077858881e-05, "loss": 0.6875, "step": 13377 }, { "epoch": 0.39058713614201046, "grad_norm": 0.5414470472492111, "learning_rate": 3.3857258718572585e-05, "loss": 0.6101, "step": 13378 }, { "epoch": 0.3906163323698578, "grad_norm": 0.5187407647694577, "learning_rate": 3.3855636658556366e-05, "loss": 0.5197, "step": 13379 }, { "epoch": 0.3906455285977052, "grad_norm": 0.5489262277086379, "learning_rate": 3.385401459854015e-05, "loss": 0.6703, "step": 13380 }, { "epoch": 0.39067472482555254, "grad_norm": 0.5667639285815438, "learning_rate": 3.385239253852392e-05, "loss": 0.6399, "step": 13381 }, { "epoch": 0.3907039210533999, "grad_norm": 0.5368132633697132, "learning_rate": 3.3850770478507705e-05, "loss": 0.6337, "step": 13382 }, { "epoch": 0.39073311728124727, "grad_norm": 0.5767221056822641, "learning_rate": 3.384914841849148e-05, "loss": 0.7171, "step": 13383 }, { "epoch": 0.3907623135090946, "grad_norm": 0.5397853922972761, "learning_rate": 3.384752635847527e-05, "loss": 0.6808, "step": 13384 }, { "epoch": 0.390791509736942, "grad_norm": 0.5234158495445458, "learning_rate": 3.3845904298459044e-05, "loss": 0.6019, "step": 13385 }, { "epoch": 0.39082070596478935, "grad_norm": 0.5416103453548067, "learning_rate": 3.3844282238442825e-05, "loss": 0.6742, "step": 13386 }, { "epoch": 0.3908499021926367, "grad_norm": 0.5527016897799607, "learning_rate": 3.384266017842661e-05, "loss": 0.6712, "step": 13387 }, { "epoch": 0.3908790984204841, "grad_norm": 0.5512456062081581, "learning_rate": 3.384103811841038e-05, "loss": 0.6845, "step": 13388 }, { "epoch": 0.39090829464833143, "grad_norm": 0.5547425538200292, "learning_rate": 3.3839416058394164e-05, "loss": 0.7018, "step": 13389 }, { "epoch": 0.3909374908761788, "grad_norm": 0.5189618948269209, "learning_rate": 3.3837793998377946e-05, "loss": 0.6203, "step": 13390 }, { "epoch": 0.39096668710402616, "grad_norm": 0.5365952726840116, "learning_rate": 3.383617193836172e-05, "loss": 0.6299, "step": 13391 }, { "epoch": 0.3909958833318735, "grad_norm": 0.5770794682149962, "learning_rate": 3.38345498783455e-05, "loss": 0.7516, "step": 13392 }, { "epoch": 0.3910250795597209, "grad_norm": 0.49061206959132303, "learning_rate": 3.383292781832928e-05, "loss": 0.546, "step": 13393 }, { "epoch": 0.39105427578756824, "grad_norm": 0.5193305602678714, "learning_rate": 3.383130575831306e-05, "loss": 0.61, "step": 13394 }, { "epoch": 0.3910834720154156, "grad_norm": 0.5108467570554313, "learning_rate": 3.382968369829684e-05, "loss": 0.607, "step": 13395 }, { "epoch": 0.39111266824326296, "grad_norm": 0.5288491579988864, "learning_rate": 3.3828061638280616e-05, "loss": 0.5436, "step": 13396 }, { "epoch": 0.3911418644711103, "grad_norm": 0.4971228557233951, "learning_rate": 3.38264395782644e-05, "loss": 0.5785, "step": 13397 }, { "epoch": 0.3911710606989577, "grad_norm": 0.46889149646680583, "learning_rate": 3.382481751824817e-05, "loss": 0.5355, "step": 13398 }, { "epoch": 0.39120025692680505, "grad_norm": 0.5248442746508828, "learning_rate": 3.3823195458231954e-05, "loss": 0.5997, "step": 13399 }, { "epoch": 0.3912294531546524, "grad_norm": 0.5216061981679081, "learning_rate": 3.3821573398215736e-05, "loss": 0.671, "step": 13400 }, { "epoch": 0.39125864938249977, "grad_norm": 0.5455448971100431, "learning_rate": 3.381995133819951e-05, "loss": 0.6434, "step": 13401 }, { "epoch": 0.39128784561034713, "grad_norm": 0.5327730491653174, "learning_rate": 3.381832927818329e-05, "loss": 0.6792, "step": 13402 }, { "epoch": 0.3913170418381945, "grad_norm": 0.5407669317243656, "learning_rate": 3.3816707218167075e-05, "loss": 0.6614, "step": 13403 }, { "epoch": 0.39134623806604185, "grad_norm": 0.5215650629434037, "learning_rate": 3.3815085158150856e-05, "loss": 0.5825, "step": 13404 }, { "epoch": 0.3913754342938892, "grad_norm": 0.5934694127936888, "learning_rate": 3.381346309813463e-05, "loss": 0.6631, "step": 13405 }, { "epoch": 0.3914046305217366, "grad_norm": 0.5103499283412173, "learning_rate": 3.381184103811841e-05, "loss": 0.5843, "step": 13406 }, { "epoch": 0.39143382674958394, "grad_norm": 0.5951444492802489, "learning_rate": 3.3810218978102195e-05, "loss": 0.7264, "step": 13407 }, { "epoch": 0.3914630229774313, "grad_norm": 0.5098361791219058, "learning_rate": 3.380859691808597e-05, "loss": 0.5448, "step": 13408 }, { "epoch": 0.39149221920527866, "grad_norm": 0.5087874692918853, "learning_rate": 3.380697485806975e-05, "loss": 0.5985, "step": 13409 }, { "epoch": 0.391521415433126, "grad_norm": 0.5226001596833753, "learning_rate": 3.380535279805353e-05, "loss": 0.6211, "step": 13410 }, { "epoch": 0.3915506116609734, "grad_norm": 0.5274397492960815, "learning_rate": 3.380373073803731e-05, "loss": 0.6524, "step": 13411 }, { "epoch": 0.39157980788882074, "grad_norm": 0.5746695451050324, "learning_rate": 3.380210867802109e-05, "loss": 0.7377, "step": 13412 }, { "epoch": 0.3916090041166681, "grad_norm": 0.5017561012134333, "learning_rate": 3.3800486618004865e-05, "loss": 0.5713, "step": 13413 }, { "epoch": 0.39163820034451546, "grad_norm": 0.5915556052356292, "learning_rate": 3.379886455798865e-05, "loss": 0.6841, "step": 13414 }, { "epoch": 0.3916673965723628, "grad_norm": 0.4767878770970746, "learning_rate": 3.379724249797243e-05, "loss": 0.5149, "step": 13415 }, { "epoch": 0.3916965928002102, "grad_norm": 0.5535671618040808, "learning_rate": 3.3795620437956204e-05, "loss": 0.6389, "step": 13416 }, { "epoch": 0.3917257890280576, "grad_norm": 0.5761819245725803, "learning_rate": 3.3793998377939985e-05, "loss": 0.735, "step": 13417 }, { "epoch": 0.39175498525590496, "grad_norm": 0.5175328108599379, "learning_rate": 3.379237631792376e-05, "loss": 0.62, "step": 13418 }, { "epoch": 0.3917841814837523, "grad_norm": 0.5427618334531683, "learning_rate": 3.379075425790754e-05, "loss": 0.6252, "step": 13419 }, { "epoch": 0.3918133777115997, "grad_norm": 0.7473390901819058, "learning_rate": 3.3789132197891324e-05, "loss": 0.6966, "step": 13420 }, { "epoch": 0.39184257393944705, "grad_norm": 0.5568149859525426, "learning_rate": 3.37875101378751e-05, "loss": 0.7076, "step": 13421 }, { "epoch": 0.3918717701672944, "grad_norm": 0.5245030842201859, "learning_rate": 3.378588807785889e-05, "loss": 0.6039, "step": 13422 }, { "epoch": 0.39190096639514177, "grad_norm": 0.5390935482441434, "learning_rate": 3.378426601784266e-05, "loss": 0.5969, "step": 13423 }, { "epoch": 0.39193016262298913, "grad_norm": 0.5081322747936404, "learning_rate": 3.3782643957826444e-05, "loss": 0.5625, "step": 13424 }, { "epoch": 0.3919593588508365, "grad_norm": 0.5424571821190545, "learning_rate": 3.378102189781022e-05, "loss": 0.6536, "step": 13425 }, { "epoch": 0.39198855507868385, "grad_norm": 0.5343203481320419, "learning_rate": 3.3779399837794e-05, "loss": 0.6391, "step": 13426 }, { "epoch": 0.3920177513065312, "grad_norm": 0.5299860865187666, "learning_rate": 3.377777777777778e-05, "loss": 0.6423, "step": 13427 }, { "epoch": 0.3920469475343786, "grad_norm": 0.5259600699781042, "learning_rate": 3.377615571776156e-05, "loss": 0.6041, "step": 13428 }, { "epoch": 0.39207614376222594, "grad_norm": 0.49092021344202164, "learning_rate": 3.377453365774534e-05, "loss": 0.5392, "step": 13429 }, { "epoch": 0.3921053399900733, "grad_norm": 0.5168522477846939, "learning_rate": 3.3772911597729114e-05, "loss": 0.5801, "step": 13430 }, { "epoch": 0.39213453621792066, "grad_norm": 0.5502152972861959, "learning_rate": 3.3771289537712896e-05, "loss": 0.6647, "step": 13431 }, { "epoch": 0.392163732445768, "grad_norm": 0.5238913068431661, "learning_rate": 3.376966747769668e-05, "loss": 0.6272, "step": 13432 }, { "epoch": 0.3921929286736154, "grad_norm": 0.5451468556275972, "learning_rate": 3.376804541768045e-05, "loss": 0.6328, "step": 13433 }, { "epoch": 0.39222212490146274, "grad_norm": 0.5737748461244984, "learning_rate": 3.3766423357664235e-05, "loss": 0.7278, "step": 13434 }, { "epoch": 0.3922513211293101, "grad_norm": 0.488749356454123, "learning_rate": 3.3764801297648016e-05, "loss": 0.5391, "step": 13435 }, { "epoch": 0.39228051735715747, "grad_norm": 0.5549027666145947, "learning_rate": 3.376317923763179e-05, "loss": 0.6819, "step": 13436 }, { "epoch": 0.3923097135850048, "grad_norm": 0.5777335855409345, "learning_rate": 3.376155717761557e-05, "loss": 0.69, "step": 13437 }, { "epoch": 0.3923389098128522, "grad_norm": 0.5609077993842052, "learning_rate": 3.375993511759935e-05, "loss": 0.6704, "step": 13438 }, { "epoch": 0.39236810604069955, "grad_norm": 0.514660451521545, "learning_rate": 3.375831305758313e-05, "loss": 0.6087, "step": 13439 }, { "epoch": 0.3923973022685469, "grad_norm": 0.561429816489423, "learning_rate": 3.375669099756691e-05, "loss": 0.6606, "step": 13440 }, { "epoch": 0.39242649849639427, "grad_norm": 0.519934659749961, "learning_rate": 3.3755068937550693e-05, "loss": 0.5981, "step": 13441 }, { "epoch": 0.39245569472424163, "grad_norm": 0.5491615262062062, "learning_rate": 3.3753446877534475e-05, "loss": 0.6204, "step": 13442 }, { "epoch": 0.392484890952089, "grad_norm": 0.5549434113641432, "learning_rate": 3.375182481751825e-05, "loss": 0.6704, "step": 13443 }, { "epoch": 0.39251408717993636, "grad_norm": 0.5431382812369778, "learning_rate": 3.375020275750203e-05, "loss": 0.6391, "step": 13444 }, { "epoch": 0.3925432834077837, "grad_norm": 0.5264230902855384, "learning_rate": 3.374858069748581e-05, "loss": 0.5496, "step": 13445 }, { "epoch": 0.3925724796356311, "grad_norm": 0.5239076029415907, "learning_rate": 3.374695863746959e-05, "loss": 0.6181, "step": 13446 }, { "epoch": 0.39260167586347844, "grad_norm": 0.5364855365634391, "learning_rate": 3.374533657745337e-05, "loss": 0.6053, "step": 13447 }, { "epoch": 0.3926308720913258, "grad_norm": 0.5513375758974192, "learning_rate": 3.3743714517437145e-05, "loss": 0.6812, "step": 13448 }, { "epoch": 0.39266006831917316, "grad_norm": 0.5559383261957552, "learning_rate": 3.374209245742093e-05, "loss": 0.6857, "step": 13449 }, { "epoch": 0.3926892645470205, "grad_norm": 0.47436752813570643, "learning_rate": 3.37404703974047e-05, "loss": 0.5172, "step": 13450 }, { "epoch": 0.3927184607748679, "grad_norm": 0.5338145944007227, "learning_rate": 3.3738848337388484e-05, "loss": 0.6345, "step": 13451 }, { "epoch": 0.39274765700271524, "grad_norm": 0.5632551910639513, "learning_rate": 3.3737226277372266e-05, "loss": 0.6786, "step": 13452 }, { "epoch": 0.3927768532305626, "grad_norm": 0.5444902429094233, "learning_rate": 3.373560421735604e-05, "loss": 0.5992, "step": 13453 }, { "epoch": 0.39280604945840997, "grad_norm": 0.5271303081187471, "learning_rate": 3.373398215733982e-05, "loss": 0.5869, "step": 13454 }, { "epoch": 0.39283524568625733, "grad_norm": 0.5859306227075417, "learning_rate": 3.3732360097323604e-05, "loss": 0.7253, "step": 13455 }, { "epoch": 0.3928644419141047, "grad_norm": 0.5473170560942701, "learning_rate": 3.373073803730738e-05, "loss": 0.611, "step": 13456 }, { "epoch": 0.39289363814195205, "grad_norm": 0.4687226675940902, "learning_rate": 3.372911597729116e-05, "loss": 0.4845, "step": 13457 }, { "epoch": 0.3929228343697994, "grad_norm": 0.5353033432231272, "learning_rate": 3.3727493917274936e-05, "loss": 0.5954, "step": 13458 }, { "epoch": 0.3929520305976468, "grad_norm": 0.5376381811584193, "learning_rate": 3.3725871857258724e-05, "loss": 0.6243, "step": 13459 }, { "epoch": 0.39298122682549413, "grad_norm": 0.5584450487648743, "learning_rate": 3.37242497972425e-05, "loss": 0.6644, "step": 13460 }, { "epoch": 0.3930104230533415, "grad_norm": 0.5105879508864704, "learning_rate": 3.372262773722628e-05, "loss": 0.5703, "step": 13461 }, { "epoch": 0.39303961928118886, "grad_norm": 0.5527622753392518, "learning_rate": 3.372100567721006e-05, "loss": 0.6066, "step": 13462 }, { "epoch": 0.3930688155090362, "grad_norm": 0.5979802823956135, "learning_rate": 3.371938361719384e-05, "loss": 0.7481, "step": 13463 }, { "epoch": 0.3930980117368836, "grad_norm": 0.49688628179516603, "learning_rate": 3.371776155717762e-05, "loss": 0.5913, "step": 13464 }, { "epoch": 0.39312720796473094, "grad_norm": 0.516624540037377, "learning_rate": 3.3716139497161395e-05, "loss": 0.6178, "step": 13465 }, { "epoch": 0.3931564041925783, "grad_norm": 0.5455569338422935, "learning_rate": 3.3714517437145177e-05, "loss": 0.6934, "step": 13466 }, { "epoch": 0.39318560042042566, "grad_norm": 0.5507987513591361, "learning_rate": 3.371289537712896e-05, "loss": 0.665, "step": 13467 }, { "epoch": 0.393214796648273, "grad_norm": 0.5388211004142844, "learning_rate": 3.371127331711273e-05, "loss": 0.6279, "step": 13468 }, { "epoch": 0.3932439928761204, "grad_norm": 0.5029039728887532, "learning_rate": 3.3709651257096515e-05, "loss": 0.6153, "step": 13469 }, { "epoch": 0.39327318910396775, "grad_norm": 0.4826987787443443, "learning_rate": 3.370802919708029e-05, "loss": 0.5309, "step": 13470 }, { "epoch": 0.3933023853318151, "grad_norm": 0.5411730748983378, "learning_rate": 3.370640713706407e-05, "loss": 0.6559, "step": 13471 }, { "epoch": 0.39333158155966247, "grad_norm": 0.5638207306893044, "learning_rate": 3.3704785077047854e-05, "loss": 0.7191, "step": 13472 }, { "epoch": 0.39336077778750983, "grad_norm": 0.5550442832021187, "learning_rate": 3.370316301703163e-05, "loss": 0.6689, "step": 13473 }, { "epoch": 0.3933899740153572, "grad_norm": 0.5091940528593213, "learning_rate": 3.370154095701541e-05, "loss": 0.5565, "step": 13474 }, { "epoch": 0.39341917024320455, "grad_norm": 0.580125034751306, "learning_rate": 3.3699918896999185e-05, "loss": 0.6591, "step": 13475 }, { "epoch": 0.3934483664710519, "grad_norm": 0.5318544920819116, "learning_rate": 3.369829683698297e-05, "loss": 0.6283, "step": 13476 }, { "epoch": 0.3934775626988993, "grad_norm": 0.5416609947134726, "learning_rate": 3.369667477696675e-05, "loss": 0.605, "step": 13477 }, { "epoch": 0.3935067589267467, "grad_norm": 0.5230587337843391, "learning_rate": 3.369505271695053e-05, "loss": 0.6324, "step": 13478 }, { "epoch": 0.39353595515459405, "grad_norm": 0.4997402424861951, "learning_rate": 3.369343065693431e-05, "loss": 0.5772, "step": 13479 }, { "epoch": 0.3935651513824414, "grad_norm": 0.5590768475242861, "learning_rate": 3.369180859691809e-05, "loss": 0.728, "step": 13480 }, { "epoch": 0.3935943476102888, "grad_norm": 0.5335305631891863, "learning_rate": 3.369018653690187e-05, "loss": 0.6324, "step": 13481 }, { "epoch": 0.39362354383813614, "grad_norm": 0.5198001495026475, "learning_rate": 3.368856447688565e-05, "loss": 0.5759, "step": 13482 }, { "epoch": 0.3936527400659835, "grad_norm": 0.5211933955073516, "learning_rate": 3.3686942416869426e-05, "loss": 0.5696, "step": 13483 }, { "epoch": 0.39368193629383086, "grad_norm": 0.5593047126928673, "learning_rate": 3.368532035685321e-05, "loss": 0.6847, "step": 13484 }, { "epoch": 0.3937111325216782, "grad_norm": 0.5155533855046867, "learning_rate": 3.368369829683698e-05, "loss": 0.5978, "step": 13485 }, { "epoch": 0.3937403287495256, "grad_norm": 0.5434020017806326, "learning_rate": 3.3682076236820764e-05, "loss": 0.6688, "step": 13486 }, { "epoch": 0.39376952497737294, "grad_norm": 0.5408475969720006, "learning_rate": 3.3680454176804546e-05, "loss": 0.6177, "step": 13487 }, { "epoch": 0.3937987212052203, "grad_norm": 0.5968093454192183, "learning_rate": 3.367883211678832e-05, "loss": 0.71, "step": 13488 }, { "epoch": 0.39382791743306766, "grad_norm": 0.5090272336560425, "learning_rate": 3.36772100567721e-05, "loss": 0.5788, "step": 13489 }, { "epoch": 0.393857113660915, "grad_norm": 0.5382948967574791, "learning_rate": 3.367558799675588e-05, "loss": 0.6773, "step": 13490 }, { "epoch": 0.3938863098887624, "grad_norm": 0.5164215130153083, "learning_rate": 3.367396593673966e-05, "loss": 0.6017, "step": 13491 }, { "epoch": 0.39391550611660975, "grad_norm": 0.5414976157072596, "learning_rate": 3.367234387672344e-05, "loss": 0.6335, "step": 13492 }, { "epoch": 0.3939447023444571, "grad_norm": 0.541891739808527, "learning_rate": 3.3670721816707216e-05, "loss": 0.5944, "step": 13493 }, { "epoch": 0.39397389857230447, "grad_norm": 0.5086786145943856, "learning_rate": 3.3669099756691e-05, "loss": 0.6063, "step": 13494 }, { "epoch": 0.39400309480015183, "grad_norm": 0.5021485471581344, "learning_rate": 3.366747769667477e-05, "loss": 0.5867, "step": 13495 }, { "epoch": 0.3940322910279992, "grad_norm": 0.5453407925183505, "learning_rate": 3.3665855636658555e-05, "loss": 0.6246, "step": 13496 }, { "epoch": 0.39406148725584655, "grad_norm": 0.5621032478314909, "learning_rate": 3.366423357664234e-05, "loss": 0.7167, "step": 13497 }, { "epoch": 0.3940906834836939, "grad_norm": 0.49807026249139286, "learning_rate": 3.366261151662612e-05, "loss": 0.5776, "step": 13498 }, { "epoch": 0.3941198797115413, "grad_norm": 0.4981003935359523, "learning_rate": 3.36609894566099e-05, "loss": 0.5559, "step": 13499 }, { "epoch": 0.39414907593938864, "grad_norm": 0.6088168860016939, "learning_rate": 3.3659367396593675e-05, "loss": 0.6334, "step": 13500 }, { "epoch": 0.394178272167236, "grad_norm": 0.4754002904544814, "learning_rate": 3.365774533657746e-05, "loss": 0.5221, "step": 13501 }, { "epoch": 0.39420746839508336, "grad_norm": 0.5669652919179782, "learning_rate": 3.365612327656124e-05, "loss": 0.7126, "step": 13502 }, { "epoch": 0.3942366646229307, "grad_norm": 0.5346483155750343, "learning_rate": 3.3654501216545014e-05, "loss": 0.6022, "step": 13503 }, { "epoch": 0.3942658608507781, "grad_norm": 0.5260036251830367, "learning_rate": 3.3652879156528795e-05, "loss": 0.6398, "step": 13504 }, { "epoch": 0.39429505707862544, "grad_norm": 0.5704179120366133, "learning_rate": 3.365125709651257e-05, "loss": 0.724, "step": 13505 }, { "epoch": 0.3943242533064728, "grad_norm": 0.5302856546920162, "learning_rate": 3.364963503649635e-05, "loss": 0.6389, "step": 13506 }, { "epoch": 0.39435344953432017, "grad_norm": 0.489040948403542, "learning_rate": 3.3648012976480134e-05, "loss": 0.5633, "step": 13507 }, { "epoch": 0.3943826457621675, "grad_norm": 0.4620004666174048, "learning_rate": 3.364639091646391e-05, "loss": 0.488, "step": 13508 }, { "epoch": 0.3944118419900149, "grad_norm": 0.5114174661127914, "learning_rate": 3.364476885644769e-05, "loss": 0.606, "step": 13509 }, { "epoch": 0.39444103821786225, "grad_norm": 0.5600332955433798, "learning_rate": 3.3643146796431466e-05, "loss": 0.664, "step": 13510 }, { "epoch": 0.3944702344457096, "grad_norm": 0.48926813103798655, "learning_rate": 3.364152473641525e-05, "loss": 0.5652, "step": 13511 }, { "epoch": 0.39449943067355697, "grad_norm": 0.504349371285412, "learning_rate": 3.363990267639903e-05, "loss": 0.557, "step": 13512 }, { "epoch": 0.39452862690140433, "grad_norm": 0.5069245908125354, "learning_rate": 3.3638280616382804e-05, "loss": 0.6401, "step": 13513 }, { "epoch": 0.3945578231292517, "grad_norm": 0.5731706352160779, "learning_rate": 3.3636658556366586e-05, "loss": 0.7216, "step": 13514 }, { "epoch": 0.39458701935709906, "grad_norm": 0.509041221866838, "learning_rate": 3.363503649635036e-05, "loss": 0.6097, "step": 13515 }, { "epoch": 0.3946162155849464, "grad_norm": 0.5157270106326638, "learning_rate": 3.363341443633415e-05, "loss": 0.5986, "step": 13516 }, { "epoch": 0.3946454118127938, "grad_norm": 0.55752231057908, "learning_rate": 3.3631792376317924e-05, "loss": 0.7034, "step": 13517 }, { "epoch": 0.39467460804064114, "grad_norm": 0.59487439919767, "learning_rate": 3.3630170316301706e-05, "loss": 0.7259, "step": 13518 }, { "epoch": 0.3947038042684885, "grad_norm": 0.5701656760715827, "learning_rate": 3.362854825628549e-05, "loss": 0.7143, "step": 13519 }, { "epoch": 0.39473300049633586, "grad_norm": 0.5400229870619037, "learning_rate": 3.362692619626926e-05, "loss": 0.5905, "step": 13520 }, { "epoch": 0.3947621967241832, "grad_norm": 0.508242900090459, "learning_rate": 3.3625304136253045e-05, "loss": 0.6178, "step": 13521 }, { "epoch": 0.3947913929520306, "grad_norm": 0.5660091168099922, "learning_rate": 3.3623682076236826e-05, "loss": 0.6886, "step": 13522 }, { "epoch": 0.39482058917987795, "grad_norm": 0.524260503442952, "learning_rate": 3.36220600162206e-05, "loss": 0.5909, "step": 13523 }, { "epoch": 0.3948497854077253, "grad_norm": 0.4917067007897709, "learning_rate": 3.362043795620438e-05, "loss": 0.5616, "step": 13524 }, { "epoch": 0.39487898163557267, "grad_norm": 0.5218473214546082, "learning_rate": 3.361881589618816e-05, "loss": 0.5894, "step": 13525 }, { "epoch": 0.39490817786342003, "grad_norm": 0.4992005064284743, "learning_rate": 3.361719383617194e-05, "loss": 0.5511, "step": 13526 }, { "epoch": 0.3949373740912674, "grad_norm": 0.558855138539563, "learning_rate": 3.361557177615572e-05, "loss": 0.6813, "step": 13527 }, { "epoch": 0.39496657031911475, "grad_norm": 0.5161552000853529, "learning_rate": 3.36139497161395e-05, "loss": 0.611, "step": 13528 }, { "epoch": 0.3949957665469621, "grad_norm": 0.5119582258128423, "learning_rate": 3.361232765612328e-05, "loss": 0.5559, "step": 13529 }, { "epoch": 0.3950249627748095, "grad_norm": 0.5468656518860296, "learning_rate": 3.3610705596107053e-05, "loss": 0.6903, "step": 13530 }, { "epoch": 0.39505415900265684, "grad_norm": 0.5494049344125124, "learning_rate": 3.3609083536090835e-05, "loss": 0.6411, "step": 13531 }, { "epoch": 0.3950833552305042, "grad_norm": 0.5136311489999545, "learning_rate": 3.360746147607462e-05, "loss": 0.5883, "step": 13532 }, { "epoch": 0.39511255145835156, "grad_norm": 0.4964572662525967, "learning_rate": 3.360583941605839e-05, "loss": 0.5712, "step": 13533 }, { "epoch": 0.3951417476861989, "grad_norm": 0.5616712215096648, "learning_rate": 3.3604217356042174e-05, "loss": 0.6647, "step": 13534 }, { "epoch": 0.3951709439140463, "grad_norm": 0.5027394735130819, "learning_rate": 3.3602595296025955e-05, "loss": 0.5491, "step": 13535 }, { "epoch": 0.39520014014189364, "grad_norm": 0.5328921107161427, "learning_rate": 3.360097323600974e-05, "loss": 0.64, "step": 13536 }, { "epoch": 0.395229336369741, "grad_norm": 0.5135340751465078, "learning_rate": 3.359935117599351e-05, "loss": 0.5969, "step": 13537 }, { "epoch": 0.3952585325975884, "grad_norm": 0.5525183316229267, "learning_rate": 3.3597729115977294e-05, "loss": 0.6709, "step": 13538 }, { "epoch": 0.3952877288254358, "grad_norm": 0.5375436071224604, "learning_rate": 3.3596107055961076e-05, "loss": 0.6493, "step": 13539 }, { "epoch": 0.39531692505328314, "grad_norm": 0.5311910915249416, "learning_rate": 3.359448499594485e-05, "loss": 0.6184, "step": 13540 }, { "epoch": 0.3953461212811305, "grad_norm": 0.5200745772991783, "learning_rate": 3.359286293592863e-05, "loss": 0.6149, "step": 13541 }, { "epoch": 0.39537531750897786, "grad_norm": 0.5630997277024244, "learning_rate": 3.3591240875912414e-05, "loss": 0.7213, "step": 13542 }, { "epoch": 0.3954045137368252, "grad_norm": 0.4914888244751994, "learning_rate": 3.358961881589619e-05, "loss": 0.5398, "step": 13543 }, { "epoch": 0.3954337099646726, "grad_norm": 0.5368465381407549, "learning_rate": 3.358799675587997e-05, "loss": 0.6689, "step": 13544 }, { "epoch": 0.39546290619251995, "grad_norm": 0.504372711473002, "learning_rate": 3.3586374695863746e-05, "loss": 0.6147, "step": 13545 }, { "epoch": 0.3954921024203673, "grad_norm": 0.5079094750500849, "learning_rate": 3.358475263584753e-05, "loss": 0.5991, "step": 13546 }, { "epoch": 0.39552129864821467, "grad_norm": 0.5295865334657486, "learning_rate": 3.358313057583131e-05, "loss": 0.6007, "step": 13547 }, { "epoch": 0.39555049487606203, "grad_norm": 0.679262249492475, "learning_rate": 3.3581508515815084e-05, "loss": 0.7428, "step": 13548 }, { "epoch": 0.3955796911039094, "grad_norm": 0.49167417315105605, "learning_rate": 3.3579886455798866e-05, "loss": 0.5537, "step": 13549 }, { "epoch": 0.39560888733175675, "grad_norm": 0.4765579606391866, "learning_rate": 3.357826439578264e-05, "loss": 0.5533, "step": 13550 }, { "epoch": 0.3956380835596041, "grad_norm": 0.4884158469336107, "learning_rate": 3.357664233576642e-05, "loss": 0.5418, "step": 13551 }, { "epoch": 0.3956672797874515, "grad_norm": 0.5123493696522943, "learning_rate": 3.3575020275750205e-05, "loss": 0.5701, "step": 13552 }, { "epoch": 0.39569647601529884, "grad_norm": 0.5854365608733061, "learning_rate": 3.357339821573398e-05, "loss": 0.747, "step": 13553 }, { "epoch": 0.3957256722431462, "grad_norm": 0.5258979752761803, "learning_rate": 3.357177615571777e-05, "loss": 0.5543, "step": 13554 }, { "epoch": 0.39575486847099356, "grad_norm": 0.5458115909602914, "learning_rate": 3.357015409570154e-05, "loss": 0.6543, "step": 13555 }, { "epoch": 0.3957840646988409, "grad_norm": 0.48281371735369916, "learning_rate": 3.3568532035685325e-05, "loss": 0.5495, "step": 13556 }, { "epoch": 0.3958132609266883, "grad_norm": 0.5567213552280484, "learning_rate": 3.35669099756691e-05, "loss": 0.6471, "step": 13557 }, { "epoch": 0.39584245715453564, "grad_norm": 0.5295572811122907, "learning_rate": 3.356528791565288e-05, "loss": 0.5968, "step": 13558 }, { "epoch": 0.395871653382383, "grad_norm": 0.5049669731954506, "learning_rate": 3.3563665855636664e-05, "loss": 0.5703, "step": 13559 }, { "epoch": 0.39590084961023037, "grad_norm": 0.5168461697618474, "learning_rate": 3.356204379562044e-05, "loss": 0.5869, "step": 13560 }, { "epoch": 0.3959300458380777, "grad_norm": 0.5224658658976656, "learning_rate": 3.356042173560422e-05, "loss": 0.595, "step": 13561 }, { "epoch": 0.3959592420659251, "grad_norm": 0.5133205816025402, "learning_rate": 3.3558799675587995e-05, "loss": 0.5435, "step": 13562 }, { "epoch": 0.39598843829377245, "grad_norm": 0.5580964669557726, "learning_rate": 3.355717761557178e-05, "loss": 0.6579, "step": 13563 }, { "epoch": 0.3960176345216198, "grad_norm": 0.5648591163023486, "learning_rate": 3.355555555555556e-05, "loss": 0.7185, "step": 13564 }, { "epoch": 0.39604683074946717, "grad_norm": 0.5377496220494193, "learning_rate": 3.3553933495539334e-05, "loss": 0.5727, "step": 13565 }, { "epoch": 0.39607602697731453, "grad_norm": 0.5304333782349261, "learning_rate": 3.3552311435523116e-05, "loss": 0.6425, "step": 13566 }, { "epoch": 0.3961052232051619, "grad_norm": 0.5054175490807636, "learning_rate": 3.35506893755069e-05, "loss": 0.5981, "step": 13567 }, { "epoch": 0.39613441943300925, "grad_norm": 0.5291989609712359, "learning_rate": 3.354906731549067e-05, "loss": 0.6543, "step": 13568 }, { "epoch": 0.3961636156608566, "grad_norm": 0.5475481108105023, "learning_rate": 3.3547445255474454e-05, "loss": 0.6416, "step": 13569 }, { "epoch": 0.396192811888704, "grad_norm": 0.5330931359866496, "learning_rate": 3.354582319545823e-05, "loss": 0.6232, "step": 13570 }, { "epoch": 0.39622200811655134, "grad_norm": 0.5166299314723374, "learning_rate": 3.354420113544201e-05, "loss": 0.613, "step": 13571 }, { "epoch": 0.3962512043443987, "grad_norm": 0.5746451808193528, "learning_rate": 3.354257907542579e-05, "loss": 0.7329, "step": 13572 }, { "epoch": 0.39628040057224606, "grad_norm": 0.4916186838343214, "learning_rate": 3.3540957015409574e-05, "loss": 0.5665, "step": 13573 }, { "epoch": 0.3963095968000934, "grad_norm": 0.6283018500690841, "learning_rate": 3.3539334955393356e-05, "loss": 0.703, "step": 13574 }, { "epoch": 0.3963387930279408, "grad_norm": 0.5170463759426873, "learning_rate": 3.353771289537713e-05, "loss": 0.6367, "step": 13575 }, { "epoch": 0.39636798925578814, "grad_norm": 0.6860723529241393, "learning_rate": 3.353609083536091e-05, "loss": 0.6617, "step": 13576 }, { "epoch": 0.3963971854836355, "grad_norm": 0.501298485605091, "learning_rate": 3.353446877534469e-05, "loss": 0.5743, "step": 13577 }, { "epoch": 0.39642638171148287, "grad_norm": 0.5699662438230937, "learning_rate": 3.353284671532847e-05, "loss": 0.699, "step": 13578 }, { "epoch": 0.39645557793933023, "grad_norm": 0.5189764317078989, "learning_rate": 3.353122465531225e-05, "loss": 0.5541, "step": 13579 }, { "epoch": 0.3964847741671776, "grad_norm": 0.5400473024096363, "learning_rate": 3.3529602595296026e-05, "loss": 0.6624, "step": 13580 }, { "epoch": 0.39651397039502495, "grad_norm": 0.5406820250940464, "learning_rate": 3.352798053527981e-05, "loss": 0.6222, "step": 13581 }, { "epoch": 0.3965431666228723, "grad_norm": 0.5163697197350879, "learning_rate": 3.352635847526358e-05, "loss": 0.5734, "step": 13582 }, { "epoch": 0.3965723628507197, "grad_norm": 0.5097394732906625, "learning_rate": 3.3524736415247365e-05, "loss": 0.5864, "step": 13583 }, { "epoch": 0.39660155907856703, "grad_norm": 0.5985392425666033, "learning_rate": 3.3523114355231147e-05, "loss": 0.6631, "step": 13584 }, { "epoch": 0.3966307553064144, "grad_norm": 0.5618633152762331, "learning_rate": 3.352149229521492e-05, "loss": 0.6892, "step": 13585 }, { "epoch": 0.39665995153426176, "grad_norm": 0.5975137370751492, "learning_rate": 3.35198702351987e-05, "loss": 0.6452, "step": 13586 }, { "epoch": 0.3966891477621091, "grad_norm": 0.5175674283776778, "learning_rate": 3.3518248175182485e-05, "loss": 0.5953, "step": 13587 }, { "epoch": 0.3967183439899565, "grad_norm": 0.5378352677254264, "learning_rate": 3.351662611516626e-05, "loss": 0.6525, "step": 13588 }, { "epoch": 0.39674754021780384, "grad_norm": 0.5302223365376942, "learning_rate": 3.351500405515004e-05, "loss": 0.6383, "step": 13589 }, { "epoch": 0.3967767364456512, "grad_norm": 0.5040180595146254, "learning_rate": 3.351338199513382e-05, "loss": 0.5661, "step": 13590 }, { "epoch": 0.39680593267349856, "grad_norm": 0.5008447319112199, "learning_rate": 3.3511759935117605e-05, "loss": 0.5875, "step": 13591 }, { "epoch": 0.3968351289013459, "grad_norm": 0.5576762409939304, "learning_rate": 3.351013787510138e-05, "loss": 0.7328, "step": 13592 }, { "epoch": 0.3968643251291933, "grad_norm": 0.53364275337242, "learning_rate": 3.350851581508516e-05, "loss": 0.6437, "step": 13593 }, { "epoch": 0.39689352135704065, "grad_norm": 0.5193878988211299, "learning_rate": 3.3506893755068944e-05, "loss": 0.6003, "step": 13594 }, { "epoch": 0.396922717584888, "grad_norm": 0.5452806746903591, "learning_rate": 3.350527169505272e-05, "loss": 0.686, "step": 13595 }, { "epoch": 0.39695191381273537, "grad_norm": 0.4944422087296415, "learning_rate": 3.35036496350365e-05, "loss": 0.5743, "step": 13596 }, { "epoch": 0.39698111004058273, "grad_norm": 0.55088376286301, "learning_rate": 3.3502027575020276e-05, "loss": 0.6282, "step": 13597 }, { "epoch": 0.39701030626843015, "grad_norm": 0.5330740845263522, "learning_rate": 3.350040551500406e-05, "loss": 0.6472, "step": 13598 }, { "epoch": 0.3970395024962775, "grad_norm": 0.49965552275266145, "learning_rate": 3.349878345498784e-05, "loss": 0.5683, "step": 13599 }, { "epoch": 0.39706869872412487, "grad_norm": 0.5532777653351595, "learning_rate": 3.3497161394971614e-05, "loss": 0.6606, "step": 13600 }, { "epoch": 0.39709789495197223, "grad_norm": 0.5544473716603523, "learning_rate": 3.3495539334955396e-05, "loss": 0.7109, "step": 13601 }, { "epoch": 0.3971270911798196, "grad_norm": 0.5084946298287427, "learning_rate": 3.349391727493917e-05, "loss": 0.5866, "step": 13602 }, { "epoch": 0.39715628740766695, "grad_norm": 0.509899679839018, "learning_rate": 3.349229521492295e-05, "loss": 0.5754, "step": 13603 }, { "epoch": 0.3971854836355143, "grad_norm": 0.56342406870841, "learning_rate": 3.3490673154906734e-05, "loss": 0.6549, "step": 13604 }, { "epoch": 0.3972146798633617, "grad_norm": 0.5138252678282283, "learning_rate": 3.348905109489051e-05, "loss": 0.5731, "step": 13605 }, { "epoch": 0.39724387609120904, "grad_norm": 0.6132842212808627, "learning_rate": 3.348742903487429e-05, "loss": 0.672, "step": 13606 }, { "epoch": 0.3972730723190564, "grad_norm": 0.5576114964305833, "learning_rate": 3.3485806974858066e-05, "loss": 0.7159, "step": 13607 }, { "epoch": 0.39730226854690376, "grad_norm": 0.5484731771954007, "learning_rate": 3.348418491484185e-05, "loss": 0.6761, "step": 13608 }, { "epoch": 0.3973314647747511, "grad_norm": 0.5228511992103908, "learning_rate": 3.348256285482563e-05, "loss": 0.5895, "step": 13609 }, { "epoch": 0.3973606610025985, "grad_norm": 0.5106796681443757, "learning_rate": 3.348094079480941e-05, "loss": 0.6082, "step": 13610 }, { "epoch": 0.39738985723044584, "grad_norm": 0.5570690778601866, "learning_rate": 3.347931873479319e-05, "loss": 0.726, "step": 13611 }, { "epoch": 0.3974190534582932, "grad_norm": 0.5346150282552294, "learning_rate": 3.347769667477697e-05, "loss": 0.6152, "step": 13612 }, { "epoch": 0.39744824968614056, "grad_norm": 0.5038641034716144, "learning_rate": 3.347607461476075e-05, "loss": 0.5704, "step": 13613 }, { "epoch": 0.3974774459139879, "grad_norm": 0.5201042337086424, "learning_rate": 3.347445255474453e-05, "loss": 0.5652, "step": 13614 }, { "epoch": 0.3975066421418353, "grad_norm": 0.5507149996541447, "learning_rate": 3.347283049472831e-05, "loss": 0.6983, "step": 13615 }, { "epoch": 0.39753583836968265, "grad_norm": 0.508745735584875, "learning_rate": 3.347120843471209e-05, "loss": 0.6103, "step": 13616 }, { "epoch": 0.39756503459753, "grad_norm": 0.5150341068307369, "learning_rate": 3.3469586374695863e-05, "loss": 0.557, "step": 13617 }, { "epoch": 0.39759423082537737, "grad_norm": 0.557076099865982, "learning_rate": 3.3467964314679645e-05, "loss": 0.7025, "step": 13618 }, { "epoch": 0.39762342705322473, "grad_norm": 0.5592718723481426, "learning_rate": 3.346634225466343e-05, "loss": 0.6583, "step": 13619 }, { "epoch": 0.3976526232810721, "grad_norm": 0.5326978140360177, "learning_rate": 3.34647201946472e-05, "loss": 0.5489, "step": 13620 }, { "epoch": 0.39768181950891945, "grad_norm": 0.493666120280675, "learning_rate": 3.3463098134630984e-05, "loss": 0.5359, "step": 13621 }, { "epoch": 0.3977110157367668, "grad_norm": 0.5091400909053838, "learning_rate": 3.346147607461476e-05, "loss": 0.5933, "step": 13622 }, { "epoch": 0.3977402119646142, "grad_norm": 0.5286740025906902, "learning_rate": 3.345985401459854e-05, "loss": 0.5708, "step": 13623 }, { "epoch": 0.39776940819246154, "grad_norm": 0.548720444454312, "learning_rate": 3.345823195458232e-05, "loss": 0.6483, "step": 13624 }, { "epoch": 0.3977986044203089, "grad_norm": 0.5570588933319467, "learning_rate": 3.34566098945661e-05, "loss": 0.6248, "step": 13625 }, { "epoch": 0.39782780064815626, "grad_norm": 0.5332528388500402, "learning_rate": 3.345498783454988e-05, "loss": 0.6298, "step": 13626 }, { "epoch": 0.3978569968760036, "grad_norm": 0.5144609869023822, "learning_rate": 3.3453365774533654e-05, "loss": 0.6237, "step": 13627 }, { "epoch": 0.397886193103851, "grad_norm": 0.5165176957563683, "learning_rate": 3.3451743714517436e-05, "loss": 0.5768, "step": 13628 }, { "epoch": 0.39791538933169834, "grad_norm": 0.56042957815262, "learning_rate": 3.345012165450122e-05, "loss": 0.6859, "step": 13629 }, { "epoch": 0.3979445855595457, "grad_norm": 0.5543789792807481, "learning_rate": 3.3448499594485e-05, "loss": 0.6304, "step": 13630 }, { "epoch": 0.39797378178739307, "grad_norm": 0.5432914119826477, "learning_rate": 3.344687753446878e-05, "loss": 0.602, "step": 13631 }, { "epoch": 0.3980029780152404, "grad_norm": 0.5262568638713572, "learning_rate": 3.3445255474452556e-05, "loss": 0.6181, "step": 13632 }, { "epoch": 0.3980321742430878, "grad_norm": 0.6724360912286714, "learning_rate": 3.344363341443634e-05, "loss": 0.6969, "step": 13633 }, { "epoch": 0.39806137047093515, "grad_norm": 0.510814180545047, "learning_rate": 3.344201135442012e-05, "loss": 0.6113, "step": 13634 }, { "epoch": 0.3980905666987825, "grad_norm": 0.4995157435109277, "learning_rate": 3.3440389294403894e-05, "loss": 0.6018, "step": 13635 }, { "epoch": 0.39811976292662987, "grad_norm": 0.5475235741392143, "learning_rate": 3.3438767234387676e-05, "loss": 0.683, "step": 13636 }, { "epoch": 0.39814895915447723, "grad_norm": 0.49692005985119225, "learning_rate": 3.343714517437145e-05, "loss": 0.57, "step": 13637 }, { "epoch": 0.3981781553823246, "grad_norm": 0.5447668988058992, "learning_rate": 3.343552311435523e-05, "loss": 0.6525, "step": 13638 }, { "epoch": 0.39820735161017196, "grad_norm": 0.5442810243224943, "learning_rate": 3.3433901054339015e-05, "loss": 0.6434, "step": 13639 }, { "epoch": 0.3982365478380193, "grad_norm": 0.5323359364397017, "learning_rate": 3.343227899432279e-05, "loss": 0.6522, "step": 13640 }, { "epoch": 0.3982657440658667, "grad_norm": 0.5766433911439379, "learning_rate": 3.343065693430657e-05, "loss": 0.7246, "step": 13641 }, { "epoch": 0.39829494029371404, "grad_norm": 0.5181353875373842, "learning_rate": 3.3429034874290346e-05, "loss": 0.5842, "step": 13642 }, { "epoch": 0.3983241365215614, "grad_norm": 0.5764283605104137, "learning_rate": 3.342741281427413e-05, "loss": 0.719, "step": 13643 }, { "epoch": 0.39835333274940876, "grad_norm": 0.5444913367132068, "learning_rate": 3.342579075425791e-05, "loss": 0.6394, "step": 13644 }, { "epoch": 0.3983825289772561, "grad_norm": 0.5371966977263827, "learning_rate": 3.3424168694241685e-05, "loss": 0.6683, "step": 13645 }, { "epoch": 0.3984117252051035, "grad_norm": 0.47664333714729906, "learning_rate": 3.342254663422547e-05, "loss": 0.5672, "step": 13646 }, { "epoch": 0.39844092143295085, "grad_norm": 0.5681824666420633, "learning_rate": 3.342092457420924e-05, "loss": 0.6513, "step": 13647 }, { "epoch": 0.3984701176607982, "grad_norm": 0.5868470496647403, "learning_rate": 3.341930251419303e-05, "loss": 0.7405, "step": 13648 }, { "epoch": 0.39849931388864557, "grad_norm": 0.5395195598903779, "learning_rate": 3.3417680454176805e-05, "loss": 0.6392, "step": 13649 }, { "epoch": 0.39852851011649293, "grad_norm": 0.5252877819218115, "learning_rate": 3.341605839416059e-05, "loss": 0.6341, "step": 13650 }, { "epoch": 0.3985577063443403, "grad_norm": 0.5153367804011738, "learning_rate": 3.341443633414437e-05, "loss": 0.5893, "step": 13651 }, { "epoch": 0.39858690257218765, "grad_norm": 0.5198121846130979, "learning_rate": 3.3412814274128144e-05, "loss": 0.6007, "step": 13652 }, { "epoch": 0.398616098800035, "grad_norm": 0.5490342343365296, "learning_rate": 3.3411192214111926e-05, "loss": 0.6713, "step": 13653 }, { "epoch": 0.3986452950278824, "grad_norm": 0.5947656178657105, "learning_rate": 3.340957015409571e-05, "loss": 0.7187, "step": 13654 }, { "epoch": 0.39867449125572973, "grad_norm": 0.4813107299624593, "learning_rate": 3.340794809407948e-05, "loss": 0.5495, "step": 13655 }, { "epoch": 0.3987036874835771, "grad_norm": 0.5414585848567854, "learning_rate": 3.3406326034063264e-05, "loss": 0.6861, "step": 13656 }, { "epoch": 0.39873288371142446, "grad_norm": 0.5490562200391966, "learning_rate": 3.340470397404704e-05, "loss": 0.6926, "step": 13657 }, { "epoch": 0.3987620799392718, "grad_norm": 0.5196284071113394, "learning_rate": 3.340308191403082e-05, "loss": 0.6003, "step": 13658 }, { "epoch": 0.39879127616711924, "grad_norm": 0.4992663311268699, "learning_rate": 3.34014598540146e-05, "loss": 0.5309, "step": 13659 }, { "epoch": 0.3988204723949666, "grad_norm": 0.5191477909214485, "learning_rate": 3.339983779399838e-05, "loss": 0.5797, "step": 13660 }, { "epoch": 0.39884966862281396, "grad_norm": 0.5500447476508336, "learning_rate": 3.339821573398216e-05, "loss": 0.6611, "step": 13661 }, { "epoch": 0.3988788648506613, "grad_norm": 0.562194862274996, "learning_rate": 3.3396593673965934e-05, "loss": 0.6881, "step": 13662 }, { "epoch": 0.3989080610785087, "grad_norm": 0.5326700748256087, "learning_rate": 3.3394971613949716e-05, "loss": 0.6133, "step": 13663 }, { "epoch": 0.39893725730635604, "grad_norm": 0.5926190982703019, "learning_rate": 3.33933495539335e-05, "loss": 0.7015, "step": 13664 }, { "epoch": 0.3989664535342034, "grad_norm": 0.5549655661529211, "learning_rate": 3.339172749391727e-05, "loss": 0.6617, "step": 13665 }, { "epoch": 0.39899564976205076, "grad_norm": 0.4982276620887267, "learning_rate": 3.3390105433901055e-05, "loss": 0.5968, "step": 13666 }, { "epoch": 0.3990248459898981, "grad_norm": 0.5391851179044599, "learning_rate": 3.3388483373884836e-05, "loss": 0.6336, "step": 13667 }, { "epoch": 0.3990540422177455, "grad_norm": 0.5079241397995488, "learning_rate": 3.338686131386862e-05, "loss": 0.5571, "step": 13668 }, { "epoch": 0.39908323844559285, "grad_norm": 0.569862245880633, "learning_rate": 3.338523925385239e-05, "loss": 0.6086, "step": 13669 }, { "epoch": 0.3991124346734402, "grad_norm": 0.5586837517587185, "learning_rate": 3.3383617193836175e-05, "loss": 0.6651, "step": 13670 }, { "epoch": 0.39914163090128757, "grad_norm": 0.5318609664823961, "learning_rate": 3.3381995133819957e-05, "loss": 0.6357, "step": 13671 }, { "epoch": 0.39917082712913493, "grad_norm": 0.5568266162487959, "learning_rate": 3.338037307380373e-05, "loss": 0.6795, "step": 13672 }, { "epoch": 0.3992000233569823, "grad_norm": 0.4901009094905483, "learning_rate": 3.337875101378751e-05, "loss": 0.5415, "step": 13673 }, { "epoch": 0.39922921958482965, "grad_norm": 0.5640139423611567, "learning_rate": 3.337712895377129e-05, "loss": 0.7237, "step": 13674 }, { "epoch": 0.399258415812677, "grad_norm": 0.5205235314431187, "learning_rate": 3.337550689375507e-05, "loss": 0.6026, "step": 13675 }, { "epoch": 0.3992876120405244, "grad_norm": 0.5298200090226601, "learning_rate": 3.337388483373885e-05, "loss": 0.6409, "step": 13676 }, { "epoch": 0.39931680826837174, "grad_norm": 0.5599179291901295, "learning_rate": 3.337226277372263e-05, "loss": 0.6787, "step": 13677 }, { "epoch": 0.3993460044962191, "grad_norm": 0.5500454502226063, "learning_rate": 3.337064071370641e-05, "loss": 0.5572, "step": 13678 }, { "epoch": 0.39937520072406646, "grad_norm": 0.5767285663751551, "learning_rate": 3.336901865369019e-05, "loss": 0.6611, "step": 13679 }, { "epoch": 0.3994043969519138, "grad_norm": 0.5455970439478988, "learning_rate": 3.3367396593673965e-05, "loss": 0.6534, "step": 13680 }, { "epoch": 0.3994335931797612, "grad_norm": 0.5485405893215687, "learning_rate": 3.336577453365775e-05, "loss": 0.6468, "step": 13681 }, { "epoch": 0.39946278940760854, "grad_norm": 0.545002780226874, "learning_rate": 3.336415247364152e-05, "loss": 0.7002, "step": 13682 }, { "epoch": 0.3994919856354559, "grad_norm": 0.5345537986604251, "learning_rate": 3.3362530413625304e-05, "loss": 0.6171, "step": 13683 }, { "epoch": 0.39952118186330327, "grad_norm": 0.5767373353951368, "learning_rate": 3.3360908353609086e-05, "loss": 0.744, "step": 13684 }, { "epoch": 0.3995503780911506, "grad_norm": 0.5512806258587543, "learning_rate": 3.335928629359286e-05, "loss": 0.6283, "step": 13685 }, { "epoch": 0.399579574318998, "grad_norm": 0.5360859012744358, "learning_rate": 3.335766423357665e-05, "loss": 0.6446, "step": 13686 }, { "epoch": 0.39960877054684535, "grad_norm": 0.525200460875059, "learning_rate": 3.3356042173560424e-05, "loss": 0.6208, "step": 13687 }, { "epoch": 0.3996379667746927, "grad_norm": 0.5455623889166357, "learning_rate": 3.3354420113544206e-05, "loss": 0.6512, "step": 13688 }, { "epoch": 0.39966716300254007, "grad_norm": 0.4964527602890269, "learning_rate": 3.335279805352798e-05, "loss": 0.5964, "step": 13689 }, { "epoch": 0.39969635923038743, "grad_norm": 0.5303178781270654, "learning_rate": 3.335117599351176e-05, "loss": 0.6055, "step": 13690 }, { "epoch": 0.3997255554582348, "grad_norm": 0.5329114936346008, "learning_rate": 3.3349553933495544e-05, "loss": 0.6521, "step": 13691 }, { "epoch": 0.39975475168608215, "grad_norm": 0.5346310984135629, "learning_rate": 3.334793187347932e-05, "loss": 0.6152, "step": 13692 }, { "epoch": 0.3997839479139295, "grad_norm": 0.5268848386889025, "learning_rate": 3.33463098134631e-05, "loss": 0.5997, "step": 13693 }, { "epoch": 0.3998131441417769, "grad_norm": 0.4921017127378669, "learning_rate": 3.3344687753446876e-05, "loss": 0.5703, "step": 13694 }, { "epoch": 0.39984234036962424, "grad_norm": 0.5535291713484941, "learning_rate": 3.334306569343066e-05, "loss": 0.6442, "step": 13695 }, { "epoch": 0.3998715365974716, "grad_norm": 0.545895844855062, "learning_rate": 3.334144363341444e-05, "loss": 0.6162, "step": 13696 }, { "epoch": 0.39990073282531896, "grad_norm": 0.49940661998061253, "learning_rate": 3.3339821573398215e-05, "loss": 0.5686, "step": 13697 }, { "epoch": 0.3999299290531663, "grad_norm": 0.49345620113893784, "learning_rate": 3.3338199513381996e-05, "loss": 0.5746, "step": 13698 }, { "epoch": 0.3999591252810137, "grad_norm": 0.5082973467475901, "learning_rate": 3.333657745336578e-05, "loss": 0.6134, "step": 13699 }, { "epoch": 0.39998832150886104, "grad_norm": 0.5543725858073204, "learning_rate": 3.333495539334955e-05, "loss": 0.6433, "step": 13700 }, { "epoch": 0.4000175177367084, "grad_norm": 0.546524792156951, "learning_rate": 3.3333333333333335e-05, "loss": 0.6681, "step": 13701 }, { "epoch": 0.40004671396455577, "grad_norm": 0.5603622292660877, "learning_rate": 3.333171127331711e-05, "loss": 0.6339, "step": 13702 }, { "epoch": 0.40007591019240313, "grad_norm": 0.5419545450472154, "learning_rate": 3.333008921330089e-05, "loss": 0.6062, "step": 13703 }, { "epoch": 0.4001051064202505, "grad_norm": 0.5840387657062353, "learning_rate": 3.3328467153284673e-05, "loss": 0.5778, "step": 13704 }, { "epoch": 0.40013430264809785, "grad_norm": 0.4967463300110529, "learning_rate": 3.3326845093268455e-05, "loss": 0.5659, "step": 13705 }, { "epoch": 0.4001634988759452, "grad_norm": 0.5767337470804837, "learning_rate": 3.332522303325224e-05, "loss": 0.6361, "step": 13706 }, { "epoch": 0.4001926951037926, "grad_norm": 0.5412745668993016, "learning_rate": 3.332360097323601e-05, "loss": 0.6339, "step": 13707 }, { "epoch": 0.40022189133163993, "grad_norm": 0.5480967129286954, "learning_rate": 3.3321978913219794e-05, "loss": 0.6342, "step": 13708 }, { "epoch": 0.4002510875594873, "grad_norm": 0.5483852228555085, "learning_rate": 3.332035685320357e-05, "loss": 0.6499, "step": 13709 }, { "epoch": 0.40028028378733466, "grad_norm": 0.5620634343884816, "learning_rate": 3.331873479318735e-05, "loss": 0.7262, "step": 13710 }, { "epoch": 0.400309480015182, "grad_norm": 0.5153777395835561, "learning_rate": 3.331711273317113e-05, "loss": 0.5787, "step": 13711 }, { "epoch": 0.4003386762430294, "grad_norm": 0.5366039207397428, "learning_rate": 3.331549067315491e-05, "loss": 0.5819, "step": 13712 }, { "epoch": 0.40036787247087674, "grad_norm": 0.5476478362949081, "learning_rate": 3.331386861313869e-05, "loss": 0.6682, "step": 13713 }, { "epoch": 0.4003970686987241, "grad_norm": 0.5127329708965657, "learning_rate": 3.3312246553122464e-05, "loss": 0.6141, "step": 13714 }, { "epoch": 0.40042626492657146, "grad_norm": 0.5235440669409208, "learning_rate": 3.3310624493106246e-05, "loss": 0.6054, "step": 13715 }, { "epoch": 0.4004554611544188, "grad_norm": 0.5305192031641083, "learning_rate": 3.330900243309003e-05, "loss": 0.629, "step": 13716 }, { "epoch": 0.4004846573822662, "grad_norm": 0.5234705589237596, "learning_rate": 3.33073803730738e-05, "loss": 0.5744, "step": 13717 }, { "epoch": 0.40051385361011355, "grad_norm": 0.4645168501498028, "learning_rate": 3.3305758313057584e-05, "loss": 0.4857, "step": 13718 }, { "epoch": 0.40054304983796096, "grad_norm": 0.5529180375829568, "learning_rate": 3.330413625304136e-05, "loss": 0.6777, "step": 13719 }, { "epoch": 0.4005722460658083, "grad_norm": 0.4900987147578146, "learning_rate": 3.330251419302514e-05, "loss": 0.4983, "step": 13720 }, { "epoch": 0.4006014422936557, "grad_norm": 0.5987861098629627, "learning_rate": 3.330089213300892e-05, "loss": 0.759, "step": 13721 }, { "epoch": 0.40063063852150305, "grad_norm": 0.5360566101935667, "learning_rate": 3.32992700729927e-05, "loss": 0.6182, "step": 13722 }, { "epoch": 0.4006598347493504, "grad_norm": 0.5159867078794139, "learning_rate": 3.329764801297648e-05, "loss": 0.6149, "step": 13723 }, { "epoch": 0.40068903097719777, "grad_norm": 0.5230554983488166, "learning_rate": 3.329602595296026e-05, "loss": 0.6394, "step": 13724 }, { "epoch": 0.40071822720504513, "grad_norm": 0.5520223497579017, "learning_rate": 3.329440389294404e-05, "loss": 0.668, "step": 13725 }, { "epoch": 0.4007474234328925, "grad_norm": 0.4967285459770864, "learning_rate": 3.3292781832927825e-05, "loss": 0.5668, "step": 13726 }, { "epoch": 0.40077661966073985, "grad_norm": 0.510537947308246, "learning_rate": 3.32911597729116e-05, "loss": 0.6238, "step": 13727 }, { "epoch": 0.4008058158885872, "grad_norm": 0.5496074151900044, "learning_rate": 3.328953771289538e-05, "loss": 0.671, "step": 13728 }, { "epoch": 0.4008350121164346, "grad_norm": 0.6091112196892463, "learning_rate": 3.3287915652879157e-05, "loss": 0.7067, "step": 13729 }, { "epoch": 0.40086420834428194, "grad_norm": 0.5228384103501114, "learning_rate": 3.328629359286294e-05, "loss": 0.6322, "step": 13730 }, { "epoch": 0.4008934045721293, "grad_norm": 0.5548079774266598, "learning_rate": 3.328467153284672e-05, "loss": 0.6481, "step": 13731 }, { "epoch": 0.40092260079997666, "grad_norm": 0.5203450126002414, "learning_rate": 3.3283049472830495e-05, "loss": 0.6166, "step": 13732 }, { "epoch": 0.400951797027824, "grad_norm": 0.479009316878669, "learning_rate": 3.328142741281428e-05, "loss": 0.5219, "step": 13733 }, { "epoch": 0.4009809932556714, "grad_norm": 0.5419639716164413, "learning_rate": 3.327980535279805e-05, "loss": 0.6397, "step": 13734 }, { "epoch": 0.40101018948351874, "grad_norm": 0.5227805699846237, "learning_rate": 3.3278183292781834e-05, "loss": 0.6047, "step": 13735 }, { "epoch": 0.4010393857113661, "grad_norm": 0.5463439828702151, "learning_rate": 3.3276561232765615e-05, "loss": 0.657, "step": 13736 }, { "epoch": 0.40106858193921346, "grad_norm": 0.5224526262643652, "learning_rate": 3.327493917274939e-05, "loss": 0.6026, "step": 13737 }, { "epoch": 0.4010977781670608, "grad_norm": 0.5492144932889809, "learning_rate": 3.327331711273317e-05, "loss": 0.6337, "step": 13738 }, { "epoch": 0.4011269743949082, "grad_norm": 0.5273578440544682, "learning_rate": 3.327169505271695e-05, "loss": 0.5555, "step": 13739 }, { "epoch": 0.40115617062275555, "grad_norm": 0.5211686967391479, "learning_rate": 3.327007299270073e-05, "loss": 0.5626, "step": 13740 }, { "epoch": 0.4011853668506029, "grad_norm": 0.5041138241764338, "learning_rate": 3.326845093268451e-05, "loss": 0.55, "step": 13741 }, { "epoch": 0.40121456307845027, "grad_norm": 0.5386025778944138, "learning_rate": 3.326682887266829e-05, "loss": 0.6489, "step": 13742 }, { "epoch": 0.40124375930629763, "grad_norm": 0.48919657867209765, "learning_rate": 3.3265206812652074e-05, "loss": 0.5398, "step": 13743 }, { "epoch": 0.401272955534145, "grad_norm": 0.5222271587005445, "learning_rate": 3.326358475263585e-05, "loss": 0.5875, "step": 13744 }, { "epoch": 0.40130215176199235, "grad_norm": 0.6974169881439005, "learning_rate": 3.326196269261963e-05, "loss": 0.6701, "step": 13745 }, { "epoch": 0.4013313479898397, "grad_norm": 0.5706293290144507, "learning_rate": 3.326034063260341e-05, "loss": 0.7062, "step": 13746 }, { "epoch": 0.4013605442176871, "grad_norm": 0.5679282878964458, "learning_rate": 3.325871857258719e-05, "loss": 0.7011, "step": 13747 }, { "epoch": 0.40138974044553444, "grad_norm": 0.5094476327250858, "learning_rate": 3.325709651257097e-05, "loss": 0.6063, "step": 13748 }, { "epoch": 0.4014189366733818, "grad_norm": 0.6019634450690918, "learning_rate": 3.3255474452554744e-05, "loss": 0.7755, "step": 13749 }, { "epoch": 0.40144813290122916, "grad_norm": 0.5939248527619736, "learning_rate": 3.3253852392538526e-05, "loss": 0.7092, "step": 13750 }, { "epoch": 0.4014773291290765, "grad_norm": 0.5587040316213261, "learning_rate": 3.325223033252231e-05, "loss": 0.6645, "step": 13751 }, { "epoch": 0.4015065253569239, "grad_norm": 0.5002838046042462, "learning_rate": 3.325060827250608e-05, "loss": 0.5678, "step": 13752 }, { "epoch": 0.40153572158477124, "grad_norm": 0.5567948610110133, "learning_rate": 3.3248986212489865e-05, "loss": 0.663, "step": 13753 }, { "epoch": 0.4015649178126186, "grad_norm": 0.4997151929303482, "learning_rate": 3.324736415247364e-05, "loss": 0.5928, "step": 13754 }, { "epoch": 0.40159411404046597, "grad_norm": 0.5529403556102196, "learning_rate": 3.324574209245742e-05, "loss": 0.6699, "step": 13755 }, { "epoch": 0.4016233102683133, "grad_norm": 0.47964797691224026, "learning_rate": 3.32441200324412e-05, "loss": 0.5459, "step": 13756 }, { "epoch": 0.4016525064961607, "grad_norm": 0.4627627792074353, "learning_rate": 3.324249797242498e-05, "loss": 0.4946, "step": 13757 }, { "epoch": 0.40168170272400805, "grad_norm": 0.5168736815940256, "learning_rate": 3.324087591240876e-05, "loss": 0.5945, "step": 13758 }, { "epoch": 0.4017108989518554, "grad_norm": 0.5137008880623726, "learning_rate": 3.3239253852392535e-05, "loss": 0.615, "step": 13759 }, { "epoch": 0.40174009517970277, "grad_norm": 0.517627675818614, "learning_rate": 3.3237631792376317e-05, "loss": 0.6051, "step": 13760 }, { "epoch": 0.40176929140755013, "grad_norm": 0.5022654548780022, "learning_rate": 3.32360097323601e-05, "loss": 0.5486, "step": 13761 }, { "epoch": 0.4017984876353975, "grad_norm": 0.5205116446536628, "learning_rate": 3.323438767234388e-05, "loss": 0.61, "step": 13762 }, { "epoch": 0.40182768386324486, "grad_norm": 0.4914988989172154, "learning_rate": 3.323276561232766e-05, "loss": 0.5425, "step": 13763 }, { "epoch": 0.4018568800910922, "grad_norm": 0.7197873828415591, "learning_rate": 3.323114355231144e-05, "loss": 0.7021, "step": 13764 }, { "epoch": 0.4018860763189396, "grad_norm": 0.5235049422382472, "learning_rate": 3.322952149229522e-05, "loss": 0.6263, "step": 13765 }, { "epoch": 0.40191527254678694, "grad_norm": 0.5408984732203675, "learning_rate": 3.3227899432279e-05, "loss": 0.6354, "step": 13766 }, { "epoch": 0.4019444687746343, "grad_norm": 0.5716569561421998, "learning_rate": 3.3226277372262775e-05, "loss": 0.7204, "step": 13767 }, { "epoch": 0.40197366500248166, "grad_norm": 0.5032019425051114, "learning_rate": 3.322465531224656e-05, "loss": 0.614, "step": 13768 }, { "epoch": 0.402002861230329, "grad_norm": 0.5289464256117884, "learning_rate": 3.322303325223033e-05, "loss": 0.577, "step": 13769 }, { "epoch": 0.4020320574581764, "grad_norm": 0.4865480384233177, "learning_rate": 3.3221411192214114e-05, "loss": 0.5358, "step": 13770 }, { "epoch": 0.40206125368602375, "grad_norm": 0.5356523689952054, "learning_rate": 3.3219789132197896e-05, "loss": 0.6421, "step": 13771 }, { "epoch": 0.4020904499138711, "grad_norm": 0.5481443386948854, "learning_rate": 3.321816707218167e-05, "loss": 0.6152, "step": 13772 }, { "epoch": 0.40211964614171847, "grad_norm": 0.5397726868081163, "learning_rate": 3.321654501216545e-05, "loss": 0.6387, "step": 13773 }, { "epoch": 0.40214884236956583, "grad_norm": 0.5136557223171597, "learning_rate": 3.321492295214923e-05, "loss": 0.6118, "step": 13774 }, { "epoch": 0.4021780385974132, "grad_norm": 0.536818730660869, "learning_rate": 3.321330089213301e-05, "loss": 0.6653, "step": 13775 }, { "epoch": 0.40220723482526055, "grad_norm": 0.588949855920404, "learning_rate": 3.321167883211679e-05, "loss": 0.7228, "step": 13776 }, { "epoch": 0.4022364310531079, "grad_norm": 0.5361933325127244, "learning_rate": 3.3210056772100566e-05, "loss": 0.6048, "step": 13777 }, { "epoch": 0.4022656272809553, "grad_norm": 0.565691899125586, "learning_rate": 3.320843471208435e-05, "loss": 0.6551, "step": 13778 }, { "epoch": 0.4022948235088027, "grad_norm": 0.5531109604077601, "learning_rate": 3.320681265206812e-05, "loss": 0.677, "step": 13779 }, { "epoch": 0.40232401973665005, "grad_norm": 0.49496641435201777, "learning_rate": 3.320519059205191e-05, "loss": 0.5586, "step": 13780 }, { "epoch": 0.4023532159644974, "grad_norm": 0.5725542416477085, "learning_rate": 3.3203568532035686e-05, "loss": 0.6722, "step": 13781 }, { "epoch": 0.4023824121923448, "grad_norm": 0.5384436644705194, "learning_rate": 3.320194647201947e-05, "loss": 0.6517, "step": 13782 }, { "epoch": 0.40241160842019214, "grad_norm": 0.5053854219760943, "learning_rate": 3.320032441200325e-05, "loss": 0.5969, "step": 13783 }, { "epoch": 0.4024408046480395, "grad_norm": 0.577520958392717, "learning_rate": 3.3198702351987025e-05, "loss": 0.6374, "step": 13784 }, { "epoch": 0.40247000087588686, "grad_norm": 0.5133535074962057, "learning_rate": 3.3197080291970806e-05, "loss": 0.6056, "step": 13785 }, { "epoch": 0.4024991971037342, "grad_norm": 0.5395091123835141, "learning_rate": 3.319545823195459e-05, "loss": 0.5761, "step": 13786 }, { "epoch": 0.4025283933315816, "grad_norm": 0.5044707463106719, "learning_rate": 3.319383617193836e-05, "loss": 0.5546, "step": 13787 }, { "epoch": 0.40255758955942894, "grad_norm": 0.5241581011812823, "learning_rate": 3.3192214111922145e-05, "loss": 0.6247, "step": 13788 }, { "epoch": 0.4025867857872763, "grad_norm": 0.5239796773616808, "learning_rate": 3.319059205190592e-05, "loss": 0.5997, "step": 13789 }, { "epoch": 0.40261598201512366, "grad_norm": 0.5467850185195983, "learning_rate": 3.31889699918897e-05, "loss": 0.6827, "step": 13790 }, { "epoch": 0.402645178242971, "grad_norm": 0.5032453716890118, "learning_rate": 3.3187347931873483e-05, "loss": 0.5999, "step": 13791 }, { "epoch": 0.4026743744708184, "grad_norm": 0.49889745553315046, "learning_rate": 3.318572587185726e-05, "loss": 0.5263, "step": 13792 }, { "epoch": 0.40270357069866575, "grad_norm": 0.5036544639189131, "learning_rate": 3.318410381184104e-05, "loss": 0.5892, "step": 13793 }, { "epoch": 0.4027327669265131, "grad_norm": 0.54339523315095, "learning_rate": 3.3182481751824815e-05, "loss": 0.5807, "step": 13794 }, { "epoch": 0.40276196315436047, "grad_norm": 0.5504755838098995, "learning_rate": 3.31808596918086e-05, "loss": 0.5929, "step": 13795 }, { "epoch": 0.40279115938220783, "grad_norm": 0.5063448149674885, "learning_rate": 3.317923763179238e-05, "loss": 0.5726, "step": 13796 }, { "epoch": 0.4028203556100552, "grad_norm": 0.5591058569538189, "learning_rate": 3.3177615571776154e-05, "loss": 0.6424, "step": 13797 }, { "epoch": 0.40284955183790255, "grad_norm": 0.5793178964470276, "learning_rate": 3.3175993511759935e-05, "loss": 0.7186, "step": 13798 }, { "epoch": 0.4028787480657499, "grad_norm": 0.5829986273360066, "learning_rate": 3.317437145174372e-05, "loss": 0.6939, "step": 13799 }, { "epoch": 0.4029079442935973, "grad_norm": 0.5168671410972505, "learning_rate": 3.31727493917275e-05, "loss": 0.5858, "step": 13800 }, { "epoch": 0.40293714052144464, "grad_norm": 0.5155952703712496, "learning_rate": 3.3171127331711274e-05, "loss": 0.5597, "step": 13801 }, { "epoch": 0.402966336749292, "grad_norm": 0.537939477289162, "learning_rate": 3.3169505271695056e-05, "loss": 0.6537, "step": 13802 }, { "epoch": 0.40299553297713936, "grad_norm": 0.5243013650292623, "learning_rate": 3.316788321167884e-05, "loss": 0.6724, "step": 13803 }, { "epoch": 0.4030247292049867, "grad_norm": 0.5361895582684629, "learning_rate": 3.316626115166261e-05, "loss": 0.6149, "step": 13804 }, { "epoch": 0.4030539254328341, "grad_norm": 0.5551424001675712, "learning_rate": 3.3164639091646394e-05, "loss": 0.6789, "step": 13805 }, { "epoch": 0.40308312166068144, "grad_norm": 0.5400912533968104, "learning_rate": 3.316301703163017e-05, "loss": 0.6178, "step": 13806 }, { "epoch": 0.4031123178885288, "grad_norm": 0.6007394163748505, "learning_rate": 3.316139497161395e-05, "loss": 0.6728, "step": 13807 }, { "epoch": 0.40314151411637617, "grad_norm": 0.5449523416050914, "learning_rate": 3.315977291159773e-05, "loss": 0.6689, "step": 13808 }, { "epoch": 0.4031707103442235, "grad_norm": 0.5015192972558258, "learning_rate": 3.315815085158151e-05, "loss": 0.6076, "step": 13809 }, { "epoch": 0.4031999065720709, "grad_norm": 0.5378276070056902, "learning_rate": 3.315652879156529e-05, "loss": 0.6877, "step": 13810 }, { "epoch": 0.40322910279991825, "grad_norm": 0.538183283763841, "learning_rate": 3.315490673154907e-05, "loss": 0.6673, "step": 13811 }, { "epoch": 0.4032582990277656, "grad_norm": 0.4967792290179236, "learning_rate": 3.3153284671532846e-05, "loss": 0.5816, "step": 13812 }, { "epoch": 0.40328749525561297, "grad_norm": 0.5216164576339364, "learning_rate": 3.315166261151663e-05, "loss": 0.6205, "step": 13813 }, { "epoch": 0.40331669148346033, "grad_norm": 0.5749501624315404, "learning_rate": 3.31500405515004e-05, "loss": 0.6867, "step": 13814 }, { "epoch": 0.4033458877113077, "grad_norm": 0.5420246773782074, "learning_rate": 3.3148418491484185e-05, "loss": 0.6205, "step": 13815 }, { "epoch": 0.40337508393915505, "grad_norm": 0.49631436659837674, "learning_rate": 3.3146796431467967e-05, "loss": 0.5781, "step": 13816 }, { "epoch": 0.4034042801670024, "grad_norm": 0.5339332744095452, "learning_rate": 3.314517437145174e-05, "loss": 0.583, "step": 13817 }, { "epoch": 0.4034334763948498, "grad_norm": 0.5707866299386757, "learning_rate": 3.314355231143553e-05, "loss": 0.6923, "step": 13818 }, { "epoch": 0.40346267262269714, "grad_norm": 0.5498523706435126, "learning_rate": 3.3141930251419305e-05, "loss": 0.6984, "step": 13819 }, { "epoch": 0.4034918688505445, "grad_norm": 0.528790514826499, "learning_rate": 3.314030819140309e-05, "loss": 0.6105, "step": 13820 }, { "epoch": 0.40352106507839186, "grad_norm": 0.5744817398077792, "learning_rate": 3.313868613138686e-05, "loss": 0.6771, "step": 13821 }, { "epoch": 0.4035502613062392, "grad_norm": 0.5424069934878895, "learning_rate": 3.3137064071370644e-05, "loss": 0.629, "step": 13822 }, { "epoch": 0.4035794575340866, "grad_norm": 0.5251701800529401, "learning_rate": 3.3135442011354425e-05, "loss": 0.5866, "step": 13823 }, { "epoch": 0.40360865376193394, "grad_norm": 0.5024175387842826, "learning_rate": 3.31338199513382e-05, "loss": 0.5501, "step": 13824 }, { "epoch": 0.4036378499897813, "grad_norm": 0.5251558048133591, "learning_rate": 3.313219789132198e-05, "loss": 0.6452, "step": 13825 }, { "epoch": 0.40366704621762867, "grad_norm": 0.4998642589640576, "learning_rate": 3.313057583130576e-05, "loss": 0.597, "step": 13826 }, { "epoch": 0.40369624244547603, "grad_norm": 0.5713431907365073, "learning_rate": 3.312895377128954e-05, "loss": 0.6536, "step": 13827 }, { "epoch": 0.4037254386733234, "grad_norm": 0.5575743963127139, "learning_rate": 3.312733171127332e-05, "loss": 0.6325, "step": 13828 }, { "epoch": 0.40375463490117075, "grad_norm": 0.5602806381636385, "learning_rate": 3.3125709651257096e-05, "loss": 0.7159, "step": 13829 }, { "epoch": 0.4037838311290181, "grad_norm": 0.547367812232481, "learning_rate": 3.312408759124088e-05, "loss": 0.6694, "step": 13830 }, { "epoch": 0.4038130273568655, "grad_norm": 0.5170927999290013, "learning_rate": 3.312246553122465e-05, "loss": 0.6059, "step": 13831 }, { "epoch": 0.40384222358471283, "grad_norm": 0.5257891368036829, "learning_rate": 3.3120843471208434e-05, "loss": 0.594, "step": 13832 }, { "epoch": 0.4038714198125602, "grad_norm": 0.533931553463071, "learning_rate": 3.3119221411192216e-05, "loss": 0.6394, "step": 13833 }, { "epoch": 0.40390061604040756, "grad_norm": 0.5000461123286357, "learning_rate": 3.311759935117599e-05, "loss": 0.5561, "step": 13834 }, { "epoch": 0.4039298122682549, "grad_norm": 0.5350360558079961, "learning_rate": 3.311597729115977e-05, "loss": 0.6814, "step": 13835 }, { "epoch": 0.4039590084961023, "grad_norm": 0.5089911945383876, "learning_rate": 3.3114355231143554e-05, "loss": 0.5906, "step": 13836 }, { "epoch": 0.40398820472394964, "grad_norm": 0.5690029230928586, "learning_rate": 3.3112733171127336e-05, "loss": 0.6597, "step": 13837 }, { "epoch": 0.404017400951797, "grad_norm": 0.5324301865883834, "learning_rate": 3.311111111111112e-05, "loss": 0.6525, "step": 13838 }, { "epoch": 0.40404659717964436, "grad_norm": 0.5640351564467656, "learning_rate": 3.310948905109489e-05, "loss": 0.6021, "step": 13839 }, { "epoch": 0.4040757934074918, "grad_norm": 0.5454407398402834, "learning_rate": 3.3107866991078675e-05, "loss": 0.6953, "step": 13840 }, { "epoch": 0.40410498963533914, "grad_norm": 0.5109140718016212, "learning_rate": 3.310624493106245e-05, "loss": 0.5789, "step": 13841 }, { "epoch": 0.4041341858631865, "grad_norm": 0.48864686931763235, "learning_rate": 3.310462287104623e-05, "loss": 0.5798, "step": 13842 }, { "epoch": 0.40416338209103386, "grad_norm": 0.5408689179835479, "learning_rate": 3.310300081103001e-05, "loss": 0.629, "step": 13843 }, { "epoch": 0.4041925783188812, "grad_norm": 0.5528915440264273, "learning_rate": 3.310137875101379e-05, "loss": 0.6405, "step": 13844 }, { "epoch": 0.4042217745467286, "grad_norm": 0.5283918143369236, "learning_rate": 3.309975669099757e-05, "loss": 0.6665, "step": 13845 }, { "epoch": 0.40425097077457595, "grad_norm": 0.5325933893249413, "learning_rate": 3.3098134630981345e-05, "loss": 0.6456, "step": 13846 }, { "epoch": 0.4042801670024233, "grad_norm": 0.5311639775766613, "learning_rate": 3.3096512570965127e-05, "loss": 0.644, "step": 13847 }, { "epoch": 0.40430936323027067, "grad_norm": 0.5082305917059297, "learning_rate": 3.309489051094891e-05, "loss": 0.6268, "step": 13848 }, { "epoch": 0.40433855945811803, "grad_norm": 0.5315198876646575, "learning_rate": 3.309326845093268e-05, "loss": 0.573, "step": 13849 }, { "epoch": 0.4043677556859654, "grad_norm": 0.5599092307327282, "learning_rate": 3.3091646390916465e-05, "loss": 0.6675, "step": 13850 }, { "epoch": 0.40439695191381275, "grad_norm": 0.5081925424692714, "learning_rate": 3.309002433090024e-05, "loss": 0.5502, "step": 13851 }, { "epoch": 0.4044261481416601, "grad_norm": 0.6060382725453758, "learning_rate": 3.308840227088402e-05, "loss": 0.6857, "step": 13852 }, { "epoch": 0.4044553443695075, "grad_norm": 0.5341950744838272, "learning_rate": 3.3086780210867804e-05, "loss": 0.5814, "step": 13853 }, { "epoch": 0.40448454059735484, "grad_norm": 0.5236775635380175, "learning_rate": 3.308515815085158e-05, "loss": 0.5222, "step": 13854 }, { "epoch": 0.4045137368252022, "grad_norm": 0.5395981343075676, "learning_rate": 3.308353609083536e-05, "loss": 0.6351, "step": 13855 }, { "epoch": 0.40454293305304956, "grad_norm": 0.5007649580508277, "learning_rate": 3.308191403081914e-05, "loss": 0.5708, "step": 13856 }, { "epoch": 0.4045721292808969, "grad_norm": 0.5630105928937388, "learning_rate": 3.3080291970802924e-05, "loss": 0.6772, "step": 13857 }, { "epoch": 0.4046013255087443, "grad_norm": 0.5554413276854564, "learning_rate": 3.3078669910786706e-05, "loss": 0.6557, "step": 13858 }, { "epoch": 0.40463052173659164, "grad_norm": 0.5398183724449157, "learning_rate": 3.307704785077048e-05, "loss": 0.6004, "step": 13859 }, { "epoch": 0.404659717964439, "grad_norm": 0.582943618210681, "learning_rate": 3.307542579075426e-05, "loss": 0.7072, "step": 13860 }, { "epoch": 0.40468891419228636, "grad_norm": 0.5306874478872751, "learning_rate": 3.307380373073804e-05, "loss": 0.6257, "step": 13861 }, { "epoch": 0.4047181104201337, "grad_norm": 0.5585977012759994, "learning_rate": 3.307218167072182e-05, "loss": 0.6079, "step": 13862 }, { "epoch": 0.4047473066479811, "grad_norm": 0.5080808434761493, "learning_rate": 3.30705596107056e-05, "loss": 0.5871, "step": 13863 }, { "epoch": 0.40477650287582845, "grad_norm": 0.5257422555863481, "learning_rate": 3.3068937550689376e-05, "loss": 0.6092, "step": 13864 }, { "epoch": 0.4048056991036758, "grad_norm": 0.5303004841259784, "learning_rate": 3.306731549067316e-05, "loss": 0.6745, "step": 13865 }, { "epoch": 0.40483489533152317, "grad_norm": 0.4809715857504072, "learning_rate": 3.306569343065693e-05, "loss": 0.5606, "step": 13866 }, { "epoch": 0.40486409155937053, "grad_norm": 0.5504178364502593, "learning_rate": 3.3064071370640714e-05, "loss": 0.6376, "step": 13867 }, { "epoch": 0.4048932877872179, "grad_norm": 0.5574729298877332, "learning_rate": 3.3062449310624496e-05, "loss": 0.6954, "step": 13868 }, { "epoch": 0.40492248401506525, "grad_norm": 0.5021040768464551, "learning_rate": 3.306082725060827e-05, "loss": 0.5905, "step": 13869 }, { "epoch": 0.4049516802429126, "grad_norm": 0.5189736784000244, "learning_rate": 3.305920519059205e-05, "loss": 0.6165, "step": 13870 }, { "epoch": 0.40498087647076, "grad_norm": 0.5501563400330662, "learning_rate": 3.305758313057583e-05, "loss": 0.6433, "step": 13871 }, { "epoch": 0.40501007269860734, "grad_norm": 0.4917646882818178, "learning_rate": 3.305596107055961e-05, "loss": 0.6052, "step": 13872 }, { "epoch": 0.4050392689264547, "grad_norm": 0.5096446336604278, "learning_rate": 3.305433901054339e-05, "loss": 0.6047, "step": 13873 }, { "epoch": 0.40506846515430206, "grad_norm": 0.5376706683250978, "learning_rate": 3.3052716950527166e-05, "loss": 0.6579, "step": 13874 }, { "epoch": 0.4050976613821494, "grad_norm": 0.5685521013080347, "learning_rate": 3.3051094890510955e-05, "loss": 0.6605, "step": 13875 }, { "epoch": 0.4051268576099968, "grad_norm": 0.5263589328167882, "learning_rate": 3.304947283049473e-05, "loss": 0.6357, "step": 13876 }, { "epoch": 0.40515605383784414, "grad_norm": 0.5746187821313378, "learning_rate": 3.304785077047851e-05, "loss": 0.6054, "step": 13877 }, { "epoch": 0.4051852500656915, "grad_norm": 0.5574146389851264, "learning_rate": 3.3046228710462293e-05, "loss": 0.671, "step": 13878 }, { "epoch": 0.40521444629353887, "grad_norm": 0.5369830862254519, "learning_rate": 3.304460665044607e-05, "loss": 0.6163, "step": 13879 }, { "epoch": 0.4052436425213862, "grad_norm": 0.5360886112525356, "learning_rate": 3.304298459042985e-05, "loss": 0.6584, "step": 13880 }, { "epoch": 0.4052728387492336, "grad_norm": 0.5113893011029206, "learning_rate": 3.3041362530413625e-05, "loss": 0.5432, "step": 13881 }, { "epoch": 0.40530203497708095, "grad_norm": 0.5417650962286538, "learning_rate": 3.303974047039741e-05, "loss": 0.6734, "step": 13882 }, { "epoch": 0.4053312312049283, "grad_norm": 0.549335385294546, "learning_rate": 3.303811841038119e-05, "loss": 0.6732, "step": 13883 }, { "epoch": 0.40536042743277567, "grad_norm": 0.5023983851724166, "learning_rate": 3.3036496350364964e-05, "loss": 0.5831, "step": 13884 }, { "epoch": 0.40538962366062303, "grad_norm": 0.5752633828130302, "learning_rate": 3.3034874290348745e-05, "loss": 0.6892, "step": 13885 }, { "epoch": 0.4054188198884704, "grad_norm": 0.5366571511642232, "learning_rate": 3.303325223033252e-05, "loss": 0.6045, "step": 13886 }, { "epoch": 0.40544801611631776, "grad_norm": 0.5561010699698402, "learning_rate": 3.30316301703163e-05, "loss": 0.6597, "step": 13887 }, { "epoch": 0.4054772123441651, "grad_norm": 0.5164158767705841, "learning_rate": 3.3030008110300084e-05, "loss": 0.6278, "step": 13888 }, { "epoch": 0.4055064085720125, "grad_norm": 0.4722638602640886, "learning_rate": 3.302838605028386e-05, "loss": 0.5204, "step": 13889 }, { "epoch": 0.40553560479985984, "grad_norm": 0.518945897015729, "learning_rate": 3.302676399026764e-05, "loss": 0.6198, "step": 13890 }, { "epoch": 0.4055648010277072, "grad_norm": 0.5440836750717584, "learning_rate": 3.3025141930251416e-05, "loss": 0.6669, "step": 13891 }, { "epoch": 0.40559399725555456, "grad_norm": 0.5324562796729312, "learning_rate": 3.30235198702352e-05, "loss": 0.6199, "step": 13892 }, { "epoch": 0.4056231934834019, "grad_norm": 0.5511167751598064, "learning_rate": 3.302189781021898e-05, "loss": 0.6537, "step": 13893 }, { "epoch": 0.4056523897112493, "grad_norm": 0.5987902995013765, "learning_rate": 3.302027575020276e-05, "loss": 0.8115, "step": 13894 }, { "epoch": 0.40568158593909664, "grad_norm": 0.4797866208593358, "learning_rate": 3.301865369018654e-05, "loss": 0.5081, "step": 13895 }, { "epoch": 0.405710782166944, "grad_norm": 0.5249719190827566, "learning_rate": 3.301703163017032e-05, "loss": 0.626, "step": 13896 }, { "epoch": 0.40573997839479137, "grad_norm": 0.5069876147475846, "learning_rate": 3.30154095701541e-05, "loss": 0.5792, "step": 13897 }, { "epoch": 0.40576917462263873, "grad_norm": 0.5433182052203466, "learning_rate": 3.301378751013788e-05, "loss": 0.6579, "step": 13898 }, { "epoch": 0.4057983708504861, "grad_norm": 0.5262570003925543, "learning_rate": 3.3012165450121656e-05, "loss": 0.6307, "step": 13899 }, { "epoch": 0.4058275670783335, "grad_norm": 0.515158875219357, "learning_rate": 3.301054339010544e-05, "loss": 0.6197, "step": 13900 }, { "epoch": 0.40585676330618087, "grad_norm": 0.5086011321306796, "learning_rate": 3.300892133008921e-05, "loss": 0.5663, "step": 13901 }, { "epoch": 0.40588595953402823, "grad_norm": 0.5361714885441881, "learning_rate": 3.3007299270072995e-05, "loss": 0.648, "step": 13902 }, { "epoch": 0.4059151557618756, "grad_norm": 0.548204113197876, "learning_rate": 3.3005677210056777e-05, "loss": 0.6623, "step": 13903 }, { "epoch": 0.40594435198972295, "grad_norm": 0.5290566369077393, "learning_rate": 3.300405515004055e-05, "loss": 0.644, "step": 13904 }, { "epoch": 0.4059735482175703, "grad_norm": 0.5313209373691435, "learning_rate": 3.300243309002433e-05, "loss": 0.6218, "step": 13905 }, { "epoch": 0.4060027444454177, "grad_norm": 0.506891075330215, "learning_rate": 3.300081103000811e-05, "loss": 0.5567, "step": 13906 }, { "epoch": 0.40603194067326503, "grad_norm": 0.5730980381641179, "learning_rate": 3.299918896999189e-05, "loss": 0.6983, "step": 13907 }, { "epoch": 0.4060611369011124, "grad_norm": 0.5312639669998688, "learning_rate": 3.299756690997567e-05, "loss": 0.6107, "step": 13908 }, { "epoch": 0.40609033312895976, "grad_norm": 0.5228401689322402, "learning_rate": 3.299594484995945e-05, "loss": 0.6527, "step": 13909 }, { "epoch": 0.4061195293568071, "grad_norm": 0.5102528125462884, "learning_rate": 3.299432278994323e-05, "loss": 0.585, "step": 13910 }, { "epoch": 0.4061487255846545, "grad_norm": 0.5394644938825257, "learning_rate": 3.2992700729927004e-05, "loss": 0.6235, "step": 13911 }, { "epoch": 0.40617792181250184, "grad_norm": 0.5230840956088749, "learning_rate": 3.299107866991079e-05, "loss": 0.6065, "step": 13912 }, { "epoch": 0.4062071180403492, "grad_norm": 0.5133137792391214, "learning_rate": 3.298945660989457e-05, "loss": 0.5528, "step": 13913 }, { "epoch": 0.40623631426819656, "grad_norm": 0.5532334322221145, "learning_rate": 3.298783454987835e-05, "loss": 0.6851, "step": 13914 }, { "epoch": 0.4062655104960439, "grad_norm": 0.5407987225701746, "learning_rate": 3.298621248986213e-05, "loss": 0.6904, "step": 13915 }, { "epoch": 0.4062947067238913, "grad_norm": 0.5695422886707164, "learning_rate": 3.2984590429845906e-05, "loss": 0.6622, "step": 13916 }, { "epoch": 0.40632390295173865, "grad_norm": 0.5394466006840241, "learning_rate": 3.298296836982969e-05, "loss": 0.6492, "step": 13917 }, { "epoch": 0.406353099179586, "grad_norm": 0.5749814652575691, "learning_rate": 3.298134630981346e-05, "loss": 0.6854, "step": 13918 }, { "epoch": 0.40638229540743337, "grad_norm": 0.527528367273311, "learning_rate": 3.2979724249797244e-05, "loss": 0.6089, "step": 13919 }, { "epoch": 0.40641149163528073, "grad_norm": 0.575042647055226, "learning_rate": 3.2978102189781026e-05, "loss": 0.6537, "step": 13920 }, { "epoch": 0.4064406878631281, "grad_norm": 0.5622964435419777, "learning_rate": 3.29764801297648e-05, "loss": 0.68, "step": 13921 }, { "epoch": 0.40646988409097545, "grad_norm": 0.56322850146661, "learning_rate": 3.297485806974858e-05, "loss": 0.6716, "step": 13922 }, { "epoch": 0.4064990803188228, "grad_norm": 0.47725056612063116, "learning_rate": 3.2973236009732364e-05, "loss": 0.4986, "step": 13923 }, { "epoch": 0.4065282765466702, "grad_norm": 0.5405054577554519, "learning_rate": 3.297161394971614e-05, "loss": 0.655, "step": 13924 }, { "epoch": 0.40655747277451754, "grad_norm": 0.5627998459297361, "learning_rate": 3.296999188969992e-05, "loss": 0.7342, "step": 13925 }, { "epoch": 0.4065866690023649, "grad_norm": 0.5578746295591692, "learning_rate": 3.2968369829683696e-05, "loss": 0.7085, "step": 13926 }, { "epoch": 0.40661586523021226, "grad_norm": 0.5066285576146853, "learning_rate": 3.296674776966748e-05, "loss": 0.5599, "step": 13927 }, { "epoch": 0.4066450614580596, "grad_norm": 0.4820591554566279, "learning_rate": 3.296512570965126e-05, "loss": 0.526, "step": 13928 }, { "epoch": 0.406674257685907, "grad_norm": 0.6231712904658493, "learning_rate": 3.2963503649635035e-05, "loss": 0.6909, "step": 13929 }, { "epoch": 0.40670345391375434, "grad_norm": 0.507674900916566, "learning_rate": 3.2961881589618816e-05, "loss": 0.5999, "step": 13930 }, { "epoch": 0.4067326501416017, "grad_norm": 0.5426862433247652, "learning_rate": 3.29602595296026e-05, "loss": 0.6384, "step": 13931 }, { "epoch": 0.40676184636944906, "grad_norm": 0.5257327622969535, "learning_rate": 3.295863746958638e-05, "loss": 0.6014, "step": 13932 }, { "epoch": 0.4067910425972964, "grad_norm": 0.5075520863455669, "learning_rate": 3.2957015409570155e-05, "loss": 0.6247, "step": 13933 }, { "epoch": 0.4068202388251438, "grad_norm": 0.5919749890094191, "learning_rate": 3.295539334955394e-05, "loss": 0.7429, "step": 13934 }, { "epoch": 0.40684943505299115, "grad_norm": 0.5074576652602042, "learning_rate": 3.295377128953772e-05, "loss": 0.5745, "step": 13935 }, { "epoch": 0.4068786312808385, "grad_norm": 0.5206468516015561, "learning_rate": 3.295214922952149e-05, "loss": 0.5837, "step": 13936 }, { "epoch": 0.40690782750868587, "grad_norm": 0.5391070120445766, "learning_rate": 3.2950527169505275e-05, "loss": 0.6423, "step": 13937 }, { "epoch": 0.40693702373653323, "grad_norm": 0.525167845082752, "learning_rate": 3.294890510948905e-05, "loss": 0.6263, "step": 13938 }, { "epoch": 0.4069662199643806, "grad_norm": 0.5432776482067084, "learning_rate": 3.294728304947283e-05, "loss": 0.6414, "step": 13939 }, { "epoch": 0.40699541619222795, "grad_norm": 0.6228322347116017, "learning_rate": 3.2945660989456614e-05, "loss": 0.6679, "step": 13940 }, { "epoch": 0.4070246124200753, "grad_norm": 0.5283514914933605, "learning_rate": 3.294403892944039e-05, "loss": 0.6581, "step": 13941 }, { "epoch": 0.4070538086479227, "grad_norm": 0.5285687953060736, "learning_rate": 3.294241686942417e-05, "loss": 0.6565, "step": 13942 }, { "epoch": 0.40708300487577004, "grad_norm": 0.564642595497705, "learning_rate": 3.294079480940795e-05, "loss": 0.6722, "step": 13943 }, { "epoch": 0.4071122011036174, "grad_norm": 0.5559739758244114, "learning_rate": 3.293917274939173e-05, "loss": 0.7004, "step": 13944 }, { "epoch": 0.40714139733146476, "grad_norm": 0.5441001467654616, "learning_rate": 3.293755068937551e-05, "loss": 0.6218, "step": 13945 }, { "epoch": 0.4071705935593121, "grad_norm": 0.49937104079932854, "learning_rate": 3.2935928629359284e-05, "loss": 0.5925, "step": 13946 }, { "epoch": 0.4071997897871595, "grad_norm": 0.5411678022103525, "learning_rate": 3.2934306569343066e-05, "loss": 0.6254, "step": 13947 }, { "epoch": 0.40722898601500684, "grad_norm": 0.49321698853392737, "learning_rate": 3.293268450932685e-05, "loss": 0.5567, "step": 13948 }, { "epoch": 0.4072581822428542, "grad_norm": 0.5000234218461604, "learning_rate": 3.293106244931062e-05, "loss": 0.6283, "step": 13949 }, { "epoch": 0.40728737847070157, "grad_norm": 0.5465431638648247, "learning_rate": 3.292944038929441e-05, "loss": 0.6044, "step": 13950 }, { "epoch": 0.4073165746985489, "grad_norm": 0.5473485295873072, "learning_rate": 3.2927818329278186e-05, "loss": 0.6474, "step": 13951 }, { "epoch": 0.4073457709263963, "grad_norm": 0.5821930702586737, "learning_rate": 3.292619626926197e-05, "loss": 0.6763, "step": 13952 }, { "epoch": 0.40737496715424365, "grad_norm": 0.559737925404498, "learning_rate": 3.292457420924574e-05, "loss": 0.6178, "step": 13953 }, { "epoch": 0.407404163382091, "grad_norm": 0.5086375588278401, "learning_rate": 3.2922952149229524e-05, "loss": 0.5909, "step": 13954 }, { "epoch": 0.4074333596099384, "grad_norm": 0.5104458517101941, "learning_rate": 3.2921330089213306e-05, "loss": 0.5641, "step": 13955 }, { "epoch": 0.40746255583778573, "grad_norm": 0.4888128596272559, "learning_rate": 3.291970802919708e-05, "loss": 0.5206, "step": 13956 }, { "epoch": 0.4074917520656331, "grad_norm": 0.5574270635615765, "learning_rate": 3.291808596918086e-05, "loss": 0.6673, "step": 13957 }, { "epoch": 0.40752094829348046, "grad_norm": 0.5221540013073455, "learning_rate": 3.291646390916464e-05, "loss": 0.6225, "step": 13958 }, { "epoch": 0.4075501445213278, "grad_norm": 0.5551460112613397, "learning_rate": 3.291484184914842e-05, "loss": 0.6644, "step": 13959 }, { "epoch": 0.40757934074917523, "grad_norm": 0.5423000495865635, "learning_rate": 3.29132197891322e-05, "loss": 0.6187, "step": 13960 }, { "epoch": 0.4076085369770226, "grad_norm": 0.5234031239072139, "learning_rate": 3.2911597729115976e-05, "loss": 0.5792, "step": 13961 }, { "epoch": 0.40763773320486996, "grad_norm": 0.4775923108161947, "learning_rate": 3.290997566909976e-05, "loss": 0.5419, "step": 13962 }, { "epoch": 0.4076669294327173, "grad_norm": 0.5140901737774258, "learning_rate": 3.290835360908353e-05, "loss": 0.592, "step": 13963 }, { "epoch": 0.4076961256605647, "grad_norm": 0.5668769493047707, "learning_rate": 3.2906731549067315e-05, "loss": 0.6592, "step": 13964 }, { "epoch": 0.40772532188841204, "grad_norm": 0.5300254894493526, "learning_rate": 3.29051094890511e-05, "loss": 0.6191, "step": 13965 }, { "epoch": 0.4077545181162594, "grad_norm": 0.5100065527964325, "learning_rate": 3.290348742903487e-05, "loss": 0.6175, "step": 13966 }, { "epoch": 0.40778371434410676, "grad_norm": 0.5304846786657647, "learning_rate": 3.2901865369018653e-05, "loss": 0.5874, "step": 13967 }, { "epoch": 0.4078129105719541, "grad_norm": 0.5188016740112845, "learning_rate": 3.2900243309002435e-05, "loss": 0.6225, "step": 13968 }, { "epoch": 0.4078421067998015, "grad_norm": 0.4991688554261641, "learning_rate": 3.289862124898622e-05, "loss": 0.5758, "step": 13969 }, { "epoch": 0.40787130302764885, "grad_norm": 0.549925074630948, "learning_rate": 3.289699918897e-05, "loss": 0.6808, "step": 13970 }, { "epoch": 0.4079004992554962, "grad_norm": 0.5472140867615852, "learning_rate": 3.2895377128953774e-05, "loss": 0.6423, "step": 13971 }, { "epoch": 0.40792969548334357, "grad_norm": 0.5205236676415798, "learning_rate": 3.2893755068937555e-05, "loss": 0.5765, "step": 13972 }, { "epoch": 0.40795889171119093, "grad_norm": 0.5187997006532038, "learning_rate": 3.289213300892133e-05, "loss": 0.6356, "step": 13973 }, { "epoch": 0.4079880879390383, "grad_norm": 0.5113082650213024, "learning_rate": 3.289051094890511e-05, "loss": 0.6063, "step": 13974 }, { "epoch": 0.40801728416688565, "grad_norm": 0.5516167560374957, "learning_rate": 3.2888888888888894e-05, "loss": 0.5753, "step": 13975 }, { "epoch": 0.408046480394733, "grad_norm": 0.5356275009733739, "learning_rate": 3.288726682887267e-05, "loss": 0.6273, "step": 13976 }, { "epoch": 0.4080756766225804, "grad_norm": 0.5189582696433709, "learning_rate": 3.288564476885645e-05, "loss": 0.6481, "step": 13977 }, { "epoch": 0.40810487285042774, "grad_norm": 0.5692457273164047, "learning_rate": 3.2884022708840226e-05, "loss": 0.6803, "step": 13978 }, { "epoch": 0.4081340690782751, "grad_norm": 0.5180509315478947, "learning_rate": 3.288240064882401e-05, "loss": 0.6098, "step": 13979 }, { "epoch": 0.40816326530612246, "grad_norm": 0.5713235414526142, "learning_rate": 3.288077858880779e-05, "loss": 0.7104, "step": 13980 }, { "epoch": 0.4081924615339698, "grad_norm": 0.5154669954575978, "learning_rate": 3.2879156528791564e-05, "loss": 0.6278, "step": 13981 }, { "epoch": 0.4082216577618172, "grad_norm": 0.5226257145864428, "learning_rate": 3.2877534468775346e-05, "loss": 0.6033, "step": 13982 }, { "epoch": 0.40825085398966454, "grad_norm": 0.5891086640629152, "learning_rate": 3.287591240875912e-05, "loss": 0.7129, "step": 13983 }, { "epoch": 0.4082800502175119, "grad_norm": 0.6247955595555765, "learning_rate": 3.28742903487429e-05, "loss": 0.7003, "step": 13984 }, { "epoch": 0.40830924644535926, "grad_norm": 0.5485205181918482, "learning_rate": 3.2872668288726685e-05, "loss": 0.6572, "step": 13985 }, { "epoch": 0.4083384426732066, "grad_norm": 0.5242226632253983, "learning_rate": 3.287104622871046e-05, "loss": 0.6607, "step": 13986 }, { "epoch": 0.408367638901054, "grad_norm": 0.554667898970873, "learning_rate": 3.286942416869424e-05, "loss": 0.6595, "step": 13987 }, { "epoch": 0.40839683512890135, "grad_norm": 0.5906512956194031, "learning_rate": 3.286780210867802e-05, "loss": 0.7604, "step": 13988 }, { "epoch": 0.4084260313567487, "grad_norm": 0.5794452661407512, "learning_rate": 3.2866180048661805e-05, "loss": 0.6493, "step": 13989 }, { "epoch": 0.40845522758459607, "grad_norm": 0.5083979520660802, "learning_rate": 3.2864557988645587e-05, "loss": 0.6134, "step": 13990 }, { "epoch": 0.40848442381244343, "grad_norm": 0.5592106770873413, "learning_rate": 3.286293592862936e-05, "loss": 0.6831, "step": 13991 }, { "epoch": 0.4085136200402908, "grad_norm": 0.5923119293186218, "learning_rate": 3.286131386861314e-05, "loss": 0.7227, "step": 13992 }, { "epoch": 0.40854281626813815, "grad_norm": 0.5297954089424591, "learning_rate": 3.285969180859692e-05, "loss": 0.6056, "step": 13993 }, { "epoch": 0.4085720124959855, "grad_norm": 0.567688809681545, "learning_rate": 3.28580697485807e-05, "loss": 0.6491, "step": 13994 }, { "epoch": 0.4086012087238329, "grad_norm": 0.5310528496873573, "learning_rate": 3.285644768856448e-05, "loss": 0.6353, "step": 13995 }, { "epoch": 0.40863040495168024, "grad_norm": 0.4896713271412189, "learning_rate": 3.285482562854826e-05, "loss": 0.5617, "step": 13996 }, { "epoch": 0.4086596011795276, "grad_norm": 0.5516704366712473, "learning_rate": 3.285320356853204e-05, "loss": 0.6473, "step": 13997 }, { "epoch": 0.40868879740737496, "grad_norm": 0.5175001961373793, "learning_rate": 3.2851581508515814e-05, "loss": 0.6127, "step": 13998 }, { "epoch": 0.4087179936352223, "grad_norm": 0.5214578124020821, "learning_rate": 3.2849959448499595e-05, "loss": 0.6169, "step": 13999 }, { "epoch": 0.4087471898630697, "grad_norm": 0.4768679063106863, "learning_rate": 3.284833738848338e-05, "loss": 0.5264, "step": 14000 }, { "epoch": 0.40877638609091704, "grad_norm": 0.6042844467616671, "learning_rate": 3.284671532846715e-05, "loss": 0.6965, "step": 14001 }, { "epoch": 0.4088055823187644, "grad_norm": 0.5068557546643765, "learning_rate": 3.2845093268450934e-05, "loss": 0.6057, "step": 14002 }, { "epoch": 0.40883477854661177, "grad_norm": 0.5281783602358547, "learning_rate": 3.284347120843471e-05, "loss": 0.6507, "step": 14003 }, { "epoch": 0.4088639747744591, "grad_norm": 0.5393459783802956, "learning_rate": 3.284184914841849e-05, "loss": 0.6533, "step": 14004 }, { "epoch": 0.4088931710023065, "grad_norm": 0.5067978275019202, "learning_rate": 3.284022708840227e-05, "loss": 0.5859, "step": 14005 }, { "epoch": 0.40892236723015385, "grad_norm": 0.5128063123001476, "learning_rate": 3.283860502838605e-05, "loss": 0.5842, "step": 14006 }, { "epoch": 0.4089515634580012, "grad_norm": 0.5205358344906439, "learning_rate": 3.2836982968369836e-05, "loss": 0.6154, "step": 14007 }, { "epoch": 0.40898075968584857, "grad_norm": 0.5373280394835596, "learning_rate": 3.283536090835361e-05, "loss": 0.6128, "step": 14008 }, { "epoch": 0.40900995591369593, "grad_norm": 0.5092932867613985, "learning_rate": 3.283373884833739e-05, "loss": 0.5352, "step": 14009 }, { "epoch": 0.4090391521415433, "grad_norm": 0.539321617893769, "learning_rate": 3.2832116788321174e-05, "loss": 0.6543, "step": 14010 }, { "epoch": 0.40906834836939066, "grad_norm": 0.539996714551962, "learning_rate": 3.283049472830495e-05, "loss": 0.655, "step": 14011 }, { "epoch": 0.409097544597238, "grad_norm": 0.5532707657582235, "learning_rate": 3.282887266828873e-05, "loss": 0.6613, "step": 14012 }, { "epoch": 0.4091267408250854, "grad_norm": 0.5235103576317073, "learning_rate": 3.2827250608272506e-05, "loss": 0.6433, "step": 14013 }, { "epoch": 0.40915593705293274, "grad_norm": 0.508509674804324, "learning_rate": 3.282562854825629e-05, "loss": 0.6062, "step": 14014 }, { "epoch": 0.4091851332807801, "grad_norm": 0.5223913362861378, "learning_rate": 3.282400648824007e-05, "loss": 0.5813, "step": 14015 }, { "epoch": 0.40921432950862746, "grad_norm": 0.5225738997979328, "learning_rate": 3.2822384428223845e-05, "loss": 0.6003, "step": 14016 }, { "epoch": 0.4092435257364748, "grad_norm": 0.5582413164223038, "learning_rate": 3.2820762368207626e-05, "loss": 0.6804, "step": 14017 }, { "epoch": 0.4092727219643222, "grad_norm": 0.5703473297132731, "learning_rate": 3.28191403081914e-05, "loss": 0.742, "step": 14018 }, { "epoch": 0.40930191819216954, "grad_norm": 0.5728573450496761, "learning_rate": 3.281751824817518e-05, "loss": 0.6911, "step": 14019 }, { "epoch": 0.40933111442001696, "grad_norm": 0.4671341353739684, "learning_rate": 3.2815896188158965e-05, "loss": 0.5181, "step": 14020 }, { "epoch": 0.4093603106478643, "grad_norm": 0.5565913346184239, "learning_rate": 3.281427412814274e-05, "loss": 0.6506, "step": 14021 }, { "epoch": 0.4093895068757117, "grad_norm": 0.5526018183007066, "learning_rate": 3.281265206812652e-05, "loss": 0.6793, "step": 14022 }, { "epoch": 0.40941870310355905, "grad_norm": 0.5217703515568461, "learning_rate": 3.2811030008110297e-05, "loss": 0.6248, "step": 14023 }, { "epoch": 0.4094478993314064, "grad_norm": 0.5292608819400099, "learning_rate": 3.280940794809408e-05, "loss": 0.6416, "step": 14024 }, { "epoch": 0.40947709555925377, "grad_norm": 0.4936642959958512, "learning_rate": 3.280778588807786e-05, "loss": 0.5508, "step": 14025 }, { "epoch": 0.40950629178710113, "grad_norm": 0.5660162252717456, "learning_rate": 3.280616382806164e-05, "loss": 0.6953, "step": 14026 }, { "epoch": 0.4095354880149485, "grad_norm": 0.4995084717655695, "learning_rate": 3.2804541768045424e-05, "loss": 0.5738, "step": 14027 }, { "epoch": 0.40956468424279585, "grad_norm": 0.45842694201945056, "learning_rate": 3.28029197080292e-05, "loss": 0.5013, "step": 14028 }, { "epoch": 0.4095938804706432, "grad_norm": 0.5310389576258056, "learning_rate": 3.280129764801298e-05, "loss": 0.6709, "step": 14029 }, { "epoch": 0.4096230766984906, "grad_norm": 0.49783797355331616, "learning_rate": 3.279967558799676e-05, "loss": 0.5609, "step": 14030 }, { "epoch": 0.40965227292633793, "grad_norm": 0.5439709047626174, "learning_rate": 3.279805352798054e-05, "loss": 0.6439, "step": 14031 }, { "epoch": 0.4096814691541853, "grad_norm": 0.5321708114211677, "learning_rate": 3.279643146796432e-05, "loss": 0.6334, "step": 14032 }, { "epoch": 0.40971066538203266, "grad_norm": 0.5181387794526066, "learning_rate": 3.2794809407948094e-05, "loss": 0.605, "step": 14033 }, { "epoch": 0.40973986160988, "grad_norm": 0.5019733460201129, "learning_rate": 3.2793187347931876e-05, "loss": 0.6055, "step": 14034 }, { "epoch": 0.4097690578377274, "grad_norm": 0.5423751160712323, "learning_rate": 3.279156528791566e-05, "loss": 0.6187, "step": 14035 }, { "epoch": 0.40979825406557474, "grad_norm": 0.5465807157813886, "learning_rate": 3.278994322789943e-05, "loss": 0.6632, "step": 14036 }, { "epoch": 0.4098274502934221, "grad_norm": 0.5451378587222332, "learning_rate": 3.2788321167883214e-05, "loss": 0.6623, "step": 14037 }, { "epoch": 0.40985664652126946, "grad_norm": 0.5138481196259223, "learning_rate": 3.278669910786699e-05, "loss": 0.6373, "step": 14038 }, { "epoch": 0.4098858427491168, "grad_norm": 0.510853521781872, "learning_rate": 3.278507704785077e-05, "loss": 0.6247, "step": 14039 }, { "epoch": 0.4099150389769642, "grad_norm": 0.5326087043058203, "learning_rate": 3.278345498783455e-05, "loss": 0.5394, "step": 14040 }, { "epoch": 0.40994423520481155, "grad_norm": 0.4606988926813137, "learning_rate": 3.278183292781833e-05, "loss": 0.4815, "step": 14041 }, { "epoch": 0.4099734314326589, "grad_norm": 0.510666242505574, "learning_rate": 3.278021086780211e-05, "loss": 0.5757, "step": 14042 }, { "epoch": 0.41000262766050627, "grad_norm": 0.6171847310810838, "learning_rate": 3.2778588807785884e-05, "loss": 0.6648, "step": 14043 }, { "epoch": 0.41003182388835363, "grad_norm": 0.5465651546846928, "learning_rate": 3.277696674776967e-05, "loss": 0.6593, "step": 14044 }, { "epoch": 0.410061020116201, "grad_norm": 0.5549674947698413, "learning_rate": 3.277534468775345e-05, "loss": 0.694, "step": 14045 }, { "epoch": 0.41009021634404835, "grad_norm": 0.51981354552962, "learning_rate": 3.277372262773723e-05, "loss": 0.5926, "step": 14046 }, { "epoch": 0.4101194125718957, "grad_norm": 0.5410524537737019, "learning_rate": 3.277210056772101e-05, "loss": 0.6592, "step": 14047 }, { "epoch": 0.4101486087997431, "grad_norm": 0.5104943127212255, "learning_rate": 3.2770478507704786e-05, "loss": 0.5549, "step": 14048 }, { "epoch": 0.41017780502759044, "grad_norm": 0.5316327364760938, "learning_rate": 3.276885644768857e-05, "loss": 0.6612, "step": 14049 }, { "epoch": 0.4102070012554378, "grad_norm": 0.5008995400375272, "learning_rate": 3.276723438767234e-05, "loss": 0.5434, "step": 14050 }, { "epoch": 0.41023619748328516, "grad_norm": 0.5212888378748421, "learning_rate": 3.2765612327656125e-05, "loss": 0.6066, "step": 14051 }, { "epoch": 0.4102653937111325, "grad_norm": 0.5489905782900412, "learning_rate": 3.276399026763991e-05, "loss": 0.6532, "step": 14052 }, { "epoch": 0.4102945899389799, "grad_norm": 0.5526875609874492, "learning_rate": 3.276236820762368e-05, "loss": 0.6636, "step": 14053 }, { "epoch": 0.41032378616682724, "grad_norm": 0.5431451727472353, "learning_rate": 3.2760746147607463e-05, "loss": 0.6459, "step": 14054 }, { "epoch": 0.4103529823946746, "grad_norm": 0.512669585633638, "learning_rate": 3.2759124087591245e-05, "loss": 0.5867, "step": 14055 }, { "epoch": 0.41038217862252196, "grad_norm": 0.5274171879076343, "learning_rate": 3.275750202757502e-05, "loss": 0.5956, "step": 14056 }, { "epoch": 0.4104113748503693, "grad_norm": 0.5788619556658234, "learning_rate": 3.27558799675588e-05, "loss": 0.7396, "step": 14057 }, { "epoch": 0.4104405710782167, "grad_norm": 0.5176679795165863, "learning_rate": 3.275425790754258e-05, "loss": 0.5835, "step": 14058 }, { "epoch": 0.41046976730606405, "grad_norm": 0.55882141692721, "learning_rate": 3.275263584752636e-05, "loss": 0.6448, "step": 14059 }, { "epoch": 0.4104989635339114, "grad_norm": 0.524977557260017, "learning_rate": 3.275101378751014e-05, "loss": 0.5905, "step": 14060 }, { "epoch": 0.41052815976175877, "grad_norm": 0.5414124634753668, "learning_rate": 3.2749391727493915e-05, "loss": 0.6394, "step": 14061 }, { "epoch": 0.41055735598960613, "grad_norm": 0.5118795681960702, "learning_rate": 3.27477696674777e-05, "loss": 0.6101, "step": 14062 }, { "epoch": 0.4105865522174535, "grad_norm": 0.5489945081039524, "learning_rate": 3.274614760746148e-05, "loss": 0.6532, "step": 14063 }, { "epoch": 0.41061574844530085, "grad_norm": 0.5313341057089798, "learning_rate": 3.274452554744526e-05, "loss": 0.6691, "step": 14064 }, { "epoch": 0.4106449446731482, "grad_norm": 0.5279795184179373, "learning_rate": 3.2742903487429036e-05, "loss": 0.5724, "step": 14065 }, { "epoch": 0.4106741409009956, "grad_norm": 0.5357181553932072, "learning_rate": 3.274128142741282e-05, "loss": 0.6173, "step": 14066 }, { "epoch": 0.41070333712884294, "grad_norm": 0.478377367149136, "learning_rate": 3.27396593673966e-05, "loss": 0.5346, "step": 14067 }, { "epoch": 0.4107325333566903, "grad_norm": 0.5381054722545379, "learning_rate": 3.2738037307380374e-05, "loss": 0.6325, "step": 14068 }, { "epoch": 0.41076172958453766, "grad_norm": 0.5316678711357808, "learning_rate": 3.2736415247364156e-05, "loss": 0.6284, "step": 14069 }, { "epoch": 0.410790925812385, "grad_norm": 0.5314069453725667, "learning_rate": 3.273479318734793e-05, "loss": 0.6468, "step": 14070 }, { "epoch": 0.4108201220402324, "grad_norm": 0.5652310425560101, "learning_rate": 3.273317112733171e-05, "loss": 0.6924, "step": 14071 }, { "epoch": 0.41084931826807974, "grad_norm": 0.5022314673737384, "learning_rate": 3.2731549067315495e-05, "loss": 0.5971, "step": 14072 }, { "epoch": 0.4108785144959271, "grad_norm": 0.5417171128184645, "learning_rate": 3.272992700729927e-05, "loss": 0.6261, "step": 14073 }, { "epoch": 0.41090771072377447, "grad_norm": 0.5259331089936727, "learning_rate": 3.272830494728305e-05, "loss": 0.6475, "step": 14074 }, { "epoch": 0.4109369069516218, "grad_norm": 0.5105420607978083, "learning_rate": 3.2726682887266826e-05, "loss": 0.5906, "step": 14075 }, { "epoch": 0.4109661031794692, "grad_norm": 0.5123258661266022, "learning_rate": 3.272506082725061e-05, "loss": 0.5929, "step": 14076 }, { "epoch": 0.41099529940731655, "grad_norm": 0.54989697749162, "learning_rate": 3.272343876723439e-05, "loss": 0.6294, "step": 14077 }, { "epoch": 0.4110244956351639, "grad_norm": 0.5642608860676209, "learning_rate": 3.2721816707218165e-05, "loss": 0.6724, "step": 14078 }, { "epoch": 0.4110536918630113, "grad_norm": 0.5334987290271502, "learning_rate": 3.2720194647201947e-05, "loss": 0.5984, "step": 14079 }, { "epoch": 0.41108288809085863, "grad_norm": 0.4957765343279525, "learning_rate": 3.271857258718573e-05, "loss": 0.5655, "step": 14080 }, { "epoch": 0.41111208431870605, "grad_norm": 0.5643223856629445, "learning_rate": 3.27169505271695e-05, "loss": 0.6391, "step": 14081 }, { "epoch": 0.4111412805465534, "grad_norm": 0.5124393950446615, "learning_rate": 3.271532846715329e-05, "loss": 0.5997, "step": 14082 }, { "epoch": 0.4111704767744008, "grad_norm": 0.5386112667077858, "learning_rate": 3.271370640713707e-05, "loss": 0.6315, "step": 14083 }, { "epoch": 0.41119967300224813, "grad_norm": 0.5678695517792316, "learning_rate": 3.271208434712085e-05, "loss": 0.6706, "step": 14084 }, { "epoch": 0.4112288692300955, "grad_norm": 0.5052383470706355, "learning_rate": 3.2710462287104624e-05, "loss": 0.5735, "step": 14085 }, { "epoch": 0.41125806545794286, "grad_norm": 0.5223378599527756, "learning_rate": 3.2708840227088405e-05, "loss": 0.6143, "step": 14086 }, { "epoch": 0.4112872616857902, "grad_norm": 0.5451854168544701, "learning_rate": 3.270721816707219e-05, "loss": 0.6716, "step": 14087 }, { "epoch": 0.4113164579136376, "grad_norm": 0.521573915779079, "learning_rate": 3.270559610705596e-05, "loss": 0.6139, "step": 14088 }, { "epoch": 0.41134565414148494, "grad_norm": 0.5480217436933109, "learning_rate": 3.2703974047039744e-05, "loss": 0.6551, "step": 14089 }, { "epoch": 0.4113748503693323, "grad_norm": 0.5514246991516734, "learning_rate": 3.270235198702352e-05, "loss": 0.6598, "step": 14090 }, { "epoch": 0.41140404659717966, "grad_norm": 0.576215348407509, "learning_rate": 3.27007299270073e-05, "loss": 0.7423, "step": 14091 }, { "epoch": 0.411433242825027, "grad_norm": 0.4953111555703865, "learning_rate": 3.269910786699108e-05, "loss": 0.5469, "step": 14092 }, { "epoch": 0.4114624390528744, "grad_norm": 0.5449358578898441, "learning_rate": 3.269748580697486e-05, "loss": 0.6423, "step": 14093 }, { "epoch": 0.41149163528072175, "grad_norm": 0.5250602337312149, "learning_rate": 3.269586374695864e-05, "loss": 0.6078, "step": 14094 }, { "epoch": 0.4115208315085691, "grad_norm": 0.5186994717611504, "learning_rate": 3.2694241686942414e-05, "loss": 0.6024, "step": 14095 }, { "epoch": 0.41155002773641647, "grad_norm": 0.5137510124642579, "learning_rate": 3.2692619626926196e-05, "loss": 0.5621, "step": 14096 }, { "epoch": 0.41157922396426383, "grad_norm": 0.5151947638381944, "learning_rate": 3.269099756690998e-05, "loss": 0.6277, "step": 14097 }, { "epoch": 0.4116084201921112, "grad_norm": 0.6192885642154987, "learning_rate": 3.268937550689375e-05, "loss": 0.7434, "step": 14098 }, { "epoch": 0.41163761641995855, "grad_norm": 0.5484633294657042, "learning_rate": 3.2687753446877534e-05, "loss": 0.6369, "step": 14099 }, { "epoch": 0.4116668126478059, "grad_norm": 0.5067730636847005, "learning_rate": 3.2686131386861316e-05, "loss": 0.5712, "step": 14100 }, { "epoch": 0.4116960088756533, "grad_norm": 0.52745181528143, "learning_rate": 3.26845093268451e-05, "loss": 0.5828, "step": 14101 }, { "epoch": 0.41172520510350064, "grad_norm": 0.5126972633702617, "learning_rate": 3.268288726682888e-05, "loss": 0.6331, "step": 14102 }, { "epoch": 0.411754401331348, "grad_norm": 0.5418098762577819, "learning_rate": 3.2681265206812655e-05, "loss": 0.6103, "step": 14103 }, { "epoch": 0.41178359755919536, "grad_norm": 0.5527840312902739, "learning_rate": 3.2679643146796436e-05, "loss": 0.709, "step": 14104 }, { "epoch": 0.4118127937870427, "grad_norm": 0.5572112509766525, "learning_rate": 3.267802108678021e-05, "loss": 0.7081, "step": 14105 }, { "epoch": 0.4118419900148901, "grad_norm": 0.5160572957891573, "learning_rate": 3.267639902676399e-05, "loss": 0.5914, "step": 14106 }, { "epoch": 0.41187118624273744, "grad_norm": 0.504240821971105, "learning_rate": 3.2674776966747775e-05, "loss": 0.5904, "step": 14107 }, { "epoch": 0.4119003824705848, "grad_norm": 0.5404990138077252, "learning_rate": 3.267315490673155e-05, "loss": 0.6134, "step": 14108 }, { "epoch": 0.41192957869843216, "grad_norm": 0.5771626818679115, "learning_rate": 3.267153284671533e-05, "loss": 0.626, "step": 14109 }, { "epoch": 0.4119587749262795, "grad_norm": 0.5416344821288079, "learning_rate": 3.2669910786699107e-05, "loss": 0.6682, "step": 14110 }, { "epoch": 0.4119879711541269, "grad_norm": 0.5109214165973835, "learning_rate": 3.266828872668289e-05, "loss": 0.5627, "step": 14111 }, { "epoch": 0.41201716738197425, "grad_norm": 0.5379161480390462, "learning_rate": 3.266666666666667e-05, "loss": 0.5662, "step": 14112 }, { "epoch": 0.4120463636098216, "grad_norm": 0.5565363546877867, "learning_rate": 3.2665044606650445e-05, "loss": 0.6795, "step": 14113 }, { "epoch": 0.41207555983766897, "grad_norm": 0.5130455850720409, "learning_rate": 3.266342254663423e-05, "loss": 0.6116, "step": 14114 }, { "epoch": 0.41210475606551633, "grad_norm": 0.5140193220255277, "learning_rate": 3.2661800486618e-05, "loss": 0.592, "step": 14115 }, { "epoch": 0.4121339522933637, "grad_norm": 0.5318874667399192, "learning_rate": 3.2660178426601784e-05, "loss": 0.6321, "step": 14116 }, { "epoch": 0.41216314852121105, "grad_norm": 0.5649633040337037, "learning_rate": 3.2658556366585565e-05, "loss": 0.6634, "step": 14117 }, { "epoch": 0.4121923447490584, "grad_norm": 0.5279642042100117, "learning_rate": 3.265693430656934e-05, "loss": 0.5567, "step": 14118 }, { "epoch": 0.4122215409769058, "grad_norm": 0.526135479566052, "learning_rate": 3.265531224655312e-05, "loss": 0.6507, "step": 14119 }, { "epoch": 0.41225073720475314, "grad_norm": 0.5667315135815346, "learning_rate": 3.2653690186536904e-05, "loss": 0.7013, "step": 14120 }, { "epoch": 0.4122799334326005, "grad_norm": 0.552370241918824, "learning_rate": 3.2652068126520686e-05, "loss": 0.7031, "step": 14121 }, { "epoch": 0.41230912966044786, "grad_norm": 0.470037408813232, "learning_rate": 3.265044606650447e-05, "loss": 0.545, "step": 14122 }, { "epoch": 0.4123383258882952, "grad_norm": 0.572027076282763, "learning_rate": 3.264882400648824e-05, "loss": 0.6456, "step": 14123 }, { "epoch": 0.4123675221161426, "grad_norm": 0.5642364891307151, "learning_rate": 3.2647201946472024e-05, "loss": 0.6828, "step": 14124 }, { "epoch": 0.41239671834398994, "grad_norm": 0.5359969699957299, "learning_rate": 3.26455798864558e-05, "loss": 0.6222, "step": 14125 }, { "epoch": 0.4124259145718373, "grad_norm": 0.5154742316242765, "learning_rate": 3.264395782643958e-05, "loss": 0.6045, "step": 14126 }, { "epoch": 0.41245511079968467, "grad_norm": 0.4995820803234882, "learning_rate": 3.264233576642336e-05, "loss": 0.5578, "step": 14127 }, { "epoch": 0.412484307027532, "grad_norm": 0.5183088572451519, "learning_rate": 3.264071370640714e-05, "loss": 0.6176, "step": 14128 }, { "epoch": 0.4125135032553794, "grad_norm": 0.5166276408195255, "learning_rate": 3.263909164639092e-05, "loss": 0.6197, "step": 14129 }, { "epoch": 0.41254269948322675, "grad_norm": 0.5174690325522845, "learning_rate": 3.2637469586374694e-05, "loss": 0.6572, "step": 14130 }, { "epoch": 0.4125718957110741, "grad_norm": 0.520794323694418, "learning_rate": 3.2635847526358476e-05, "loss": 0.636, "step": 14131 }, { "epoch": 0.41260109193892147, "grad_norm": 0.5402814427761018, "learning_rate": 3.263422546634226e-05, "loss": 0.6062, "step": 14132 }, { "epoch": 0.41263028816676883, "grad_norm": 0.6066128445821467, "learning_rate": 3.263260340632603e-05, "loss": 0.7237, "step": 14133 }, { "epoch": 0.4126594843946162, "grad_norm": 0.5348058504356682, "learning_rate": 3.2630981346309815e-05, "loss": 0.6455, "step": 14134 }, { "epoch": 0.41268868062246356, "grad_norm": 0.5165259307438032, "learning_rate": 3.262935928629359e-05, "loss": 0.5559, "step": 14135 }, { "epoch": 0.4127178768503109, "grad_norm": 0.5431312298244666, "learning_rate": 3.262773722627737e-05, "loss": 0.6667, "step": 14136 }, { "epoch": 0.4127470730781583, "grad_norm": 0.5684818144346532, "learning_rate": 3.262611516626115e-05, "loss": 0.6944, "step": 14137 }, { "epoch": 0.41277626930600564, "grad_norm": 0.5663285533780062, "learning_rate": 3.262449310624493e-05, "loss": 0.6909, "step": 14138 }, { "epoch": 0.412805465533853, "grad_norm": 0.5005037918227873, "learning_rate": 3.262287104622872e-05, "loss": 0.5665, "step": 14139 }, { "epoch": 0.41283466176170036, "grad_norm": 0.49700568011457835, "learning_rate": 3.262124898621249e-05, "loss": 0.5785, "step": 14140 }, { "epoch": 0.4128638579895478, "grad_norm": 0.5452516829670346, "learning_rate": 3.2619626926196273e-05, "loss": 0.6313, "step": 14141 }, { "epoch": 0.41289305421739514, "grad_norm": 0.5522258664187187, "learning_rate": 3.2618004866180055e-05, "loss": 0.6629, "step": 14142 }, { "epoch": 0.4129222504452425, "grad_norm": 0.5026393958641987, "learning_rate": 3.261638280616383e-05, "loss": 0.5839, "step": 14143 }, { "epoch": 0.41295144667308986, "grad_norm": 0.5199592019318645, "learning_rate": 3.261476074614761e-05, "loss": 0.6064, "step": 14144 }, { "epoch": 0.4129806429009372, "grad_norm": 0.4922282548136246, "learning_rate": 3.261313868613139e-05, "loss": 0.5818, "step": 14145 }, { "epoch": 0.4130098391287846, "grad_norm": 0.5457566155970643, "learning_rate": 3.261151662611517e-05, "loss": 0.6577, "step": 14146 }, { "epoch": 0.41303903535663195, "grad_norm": 0.5639134974832628, "learning_rate": 3.260989456609895e-05, "loss": 0.7016, "step": 14147 }, { "epoch": 0.4130682315844793, "grad_norm": 0.5565113619574993, "learning_rate": 3.2608272506082725e-05, "loss": 0.6312, "step": 14148 }, { "epoch": 0.41309742781232667, "grad_norm": 0.5647755254942404, "learning_rate": 3.260665044606651e-05, "loss": 0.6824, "step": 14149 }, { "epoch": 0.41312662404017403, "grad_norm": 0.5217966479694425, "learning_rate": 3.260502838605028e-05, "loss": 0.6284, "step": 14150 }, { "epoch": 0.4131558202680214, "grad_norm": 0.4979033326131068, "learning_rate": 3.2603406326034064e-05, "loss": 0.5401, "step": 14151 }, { "epoch": 0.41318501649586875, "grad_norm": 0.511147553445513, "learning_rate": 3.2601784266017846e-05, "loss": 0.5854, "step": 14152 }, { "epoch": 0.4132142127237161, "grad_norm": 0.5246187420966775, "learning_rate": 3.260016220600162e-05, "loss": 0.6077, "step": 14153 }, { "epoch": 0.4132434089515635, "grad_norm": 0.527399572766087, "learning_rate": 3.25985401459854e-05, "loss": 0.6585, "step": 14154 }, { "epoch": 0.41327260517941083, "grad_norm": 0.5323352455795097, "learning_rate": 3.259691808596918e-05, "loss": 0.6164, "step": 14155 }, { "epoch": 0.4133018014072582, "grad_norm": 0.5672282975498039, "learning_rate": 3.259529602595296e-05, "loss": 0.6933, "step": 14156 }, { "epoch": 0.41333099763510556, "grad_norm": 0.5629169931051081, "learning_rate": 3.259367396593674e-05, "loss": 0.6676, "step": 14157 }, { "epoch": 0.4133601938629529, "grad_norm": 0.5438464669652604, "learning_rate": 3.259205190592052e-05, "loss": 0.6108, "step": 14158 }, { "epoch": 0.4133893900908003, "grad_norm": 0.5120418977241357, "learning_rate": 3.2590429845904305e-05, "loss": 0.5772, "step": 14159 }, { "epoch": 0.41341858631864764, "grad_norm": 0.5461816832755816, "learning_rate": 3.258880778588808e-05, "loss": 0.6923, "step": 14160 }, { "epoch": 0.413447782546495, "grad_norm": 0.4946221018976473, "learning_rate": 3.258718572587186e-05, "loss": 0.5197, "step": 14161 }, { "epoch": 0.41347697877434236, "grad_norm": 0.5718348038188598, "learning_rate": 3.2585563665855636e-05, "loss": 0.7434, "step": 14162 }, { "epoch": 0.4135061750021897, "grad_norm": 0.5257915954163558, "learning_rate": 3.258394160583942e-05, "loss": 0.5977, "step": 14163 }, { "epoch": 0.4135353712300371, "grad_norm": 0.512824521055895, "learning_rate": 3.25823195458232e-05, "loss": 0.5978, "step": 14164 }, { "epoch": 0.41356456745788445, "grad_norm": 0.5765404906913926, "learning_rate": 3.2580697485806975e-05, "loss": 0.7684, "step": 14165 }, { "epoch": 0.4135937636857318, "grad_norm": 0.5405197996616278, "learning_rate": 3.2579075425790757e-05, "loss": 0.6621, "step": 14166 }, { "epoch": 0.41362295991357917, "grad_norm": 0.516451568532721, "learning_rate": 3.257745336577454e-05, "loss": 0.595, "step": 14167 }, { "epoch": 0.41365215614142653, "grad_norm": 0.518865802361118, "learning_rate": 3.257583130575831e-05, "loss": 0.5973, "step": 14168 }, { "epoch": 0.4136813523692739, "grad_norm": 0.5433836024593949, "learning_rate": 3.2574209245742095e-05, "loss": 0.7024, "step": 14169 }, { "epoch": 0.41371054859712125, "grad_norm": 0.5450263634685727, "learning_rate": 3.257258718572587e-05, "loss": 0.6596, "step": 14170 }, { "epoch": 0.4137397448249686, "grad_norm": 0.556920182577788, "learning_rate": 3.257096512570965e-05, "loss": 0.6789, "step": 14171 }, { "epoch": 0.413768941052816, "grad_norm": 0.5444093358101126, "learning_rate": 3.2569343065693434e-05, "loss": 0.6266, "step": 14172 }, { "epoch": 0.41379813728066334, "grad_norm": 0.5125307438609186, "learning_rate": 3.256772100567721e-05, "loss": 0.5931, "step": 14173 }, { "epoch": 0.4138273335085107, "grad_norm": 0.5009575232297353, "learning_rate": 3.256609894566099e-05, "loss": 0.6002, "step": 14174 }, { "epoch": 0.41385652973635806, "grad_norm": 0.524272366675411, "learning_rate": 3.2564476885644765e-05, "loss": 0.6048, "step": 14175 }, { "epoch": 0.4138857259642054, "grad_norm": 0.5164578114544836, "learning_rate": 3.256285482562855e-05, "loss": 0.5912, "step": 14176 }, { "epoch": 0.4139149221920528, "grad_norm": 0.5317265117053319, "learning_rate": 3.256123276561233e-05, "loss": 0.5929, "step": 14177 }, { "epoch": 0.41394411841990014, "grad_norm": 0.5770049040706783, "learning_rate": 3.255961070559611e-05, "loss": 0.7116, "step": 14178 }, { "epoch": 0.4139733146477475, "grad_norm": 0.5936173379538203, "learning_rate": 3.255798864557989e-05, "loss": 0.6767, "step": 14179 }, { "epoch": 0.41400251087559486, "grad_norm": 0.5805889691119586, "learning_rate": 3.255636658556367e-05, "loss": 0.6819, "step": 14180 }, { "epoch": 0.4140317071034422, "grad_norm": 0.5110352286349139, "learning_rate": 3.255474452554745e-05, "loss": 0.5738, "step": 14181 }, { "epoch": 0.4140609033312896, "grad_norm": 0.4964036924867148, "learning_rate": 3.2553122465531224e-05, "loss": 0.5715, "step": 14182 }, { "epoch": 0.41409009955913695, "grad_norm": 0.4984388506295946, "learning_rate": 3.2551500405515006e-05, "loss": 0.5854, "step": 14183 }, { "epoch": 0.4141192957869843, "grad_norm": 0.5214253419116276, "learning_rate": 3.254987834549879e-05, "loss": 0.5589, "step": 14184 }, { "epoch": 0.41414849201483167, "grad_norm": 0.5719699154875523, "learning_rate": 3.254825628548256e-05, "loss": 0.7547, "step": 14185 }, { "epoch": 0.41417768824267903, "grad_norm": 0.49596742997609555, "learning_rate": 3.2546634225466344e-05, "loss": 0.5327, "step": 14186 }, { "epoch": 0.4142068844705264, "grad_norm": 0.5258111487898208, "learning_rate": 3.2545012165450126e-05, "loss": 0.5899, "step": 14187 }, { "epoch": 0.41423608069837375, "grad_norm": 0.5526546250292783, "learning_rate": 3.25433901054339e-05, "loss": 0.661, "step": 14188 }, { "epoch": 0.4142652769262211, "grad_norm": 0.5330072877011358, "learning_rate": 3.254176804541768e-05, "loss": 0.6123, "step": 14189 }, { "epoch": 0.4142944731540685, "grad_norm": 0.5297202913031152, "learning_rate": 3.254014598540146e-05, "loss": 0.6176, "step": 14190 }, { "epoch": 0.41432366938191584, "grad_norm": 0.4904359813861088, "learning_rate": 3.253852392538524e-05, "loss": 0.5502, "step": 14191 }, { "epoch": 0.4143528656097632, "grad_norm": 0.6060704347111896, "learning_rate": 3.253690186536902e-05, "loss": 0.7333, "step": 14192 }, { "epoch": 0.41438206183761056, "grad_norm": 0.5665091245966318, "learning_rate": 3.2535279805352796e-05, "loss": 0.6853, "step": 14193 }, { "epoch": 0.4144112580654579, "grad_norm": 0.49752332066785565, "learning_rate": 3.253365774533658e-05, "loss": 0.5329, "step": 14194 }, { "epoch": 0.4144404542933053, "grad_norm": 0.5935814118110349, "learning_rate": 3.253203568532036e-05, "loss": 0.7284, "step": 14195 }, { "epoch": 0.41446965052115264, "grad_norm": 0.49399507822605293, "learning_rate": 3.253041362530414e-05, "loss": 0.5554, "step": 14196 }, { "epoch": 0.414498846749, "grad_norm": 0.5462054778231074, "learning_rate": 3.252879156528792e-05, "loss": 0.6316, "step": 14197 }, { "epoch": 0.41452804297684737, "grad_norm": 0.5736711642509739, "learning_rate": 3.25271695052717e-05, "loss": 0.7051, "step": 14198 }, { "epoch": 0.4145572392046947, "grad_norm": 0.5014582403089681, "learning_rate": 3.252554744525548e-05, "loss": 0.5719, "step": 14199 }, { "epoch": 0.4145864354325421, "grad_norm": 0.5485728976268404, "learning_rate": 3.2523925385239255e-05, "loss": 0.6654, "step": 14200 }, { "epoch": 0.4146156316603895, "grad_norm": 0.551141859895545, "learning_rate": 3.252230332522304e-05, "loss": 0.6969, "step": 14201 }, { "epoch": 0.41464482788823687, "grad_norm": 0.5188179997489234, "learning_rate": 3.252068126520681e-05, "loss": 0.6554, "step": 14202 }, { "epoch": 0.4146740241160842, "grad_norm": 0.5012698123431122, "learning_rate": 3.2519059205190594e-05, "loss": 0.58, "step": 14203 }, { "epoch": 0.4147032203439316, "grad_norm": 0.5593597458044314, "learning_rate": 3.2517437145174375e-05, "loss": 0.6344, "step": 14204 }, { "epoch": 0.41473241657177895, "grad_norm": 0.50740725346004, "learning_rate": 3.251581508515815e-05, "loss": 0.5887, "step": 14205 }, { "epoch": 0.4147616127996263, "grad_norm": 0.5563124759120334, "learning_rate": 3.251419302514193e-05, "loss": 0.7389, "step": 14206 }, { "epoch": 0.4147908090274737, "grad_norm": 0.5663819497677164, "learning_rate": 3.251257096512571e-05, "loss": 0.6606, "step": 14207 }, { "epoch": 0.41482000525532103, "grad_norm": 0.49656921971362855, "learning_rate": 3.251094890510949e-05, "loss": 0.5305, "step": 14208 }, { "epoch": 0.4148492014831684, "grad_norm": 0.5610610017660638, "learning_rate": 3.250932684509327e-05, "loss": 0.6047, "step": 14209 }, { "epoch": 0.41487839771101576, "grad_norm": 0.5440970411320092, "learning_rate": 3.2507704785077046e-05, "loss": 0.6718, "step": 14210 }, { "epoch": 0.4149075939388631, "grad_norm": 0.5626594108813942, "learning_rate": 3.250608272506083e-05, "loss": 0.6591, "step": 14211 }, { "epoch": 0.4149367901667105, "grad_norm": 0.5151246974623979, "learning_rate": 3.250446066504461e-05, "loss": 0.6066, "step": 14212 }, { "epoch": 0.41496598639455784, "grad_norm": 0.580476890227083, "learning_rate": 3.2502838605028384e-05, "loss": 0.6804, "step": 14213 }, { "epoch": 0.4149951826224052, "grad_norm": 0.5130272416711033, "learning_rate": 3.250121654501217e-05, "loss": 0.5468, "step": 14214 }, { "epoch": 0.41502437885025256, "grad_norm": 0.5650262804215948, "learning_rate": 3.249959448499595e-05, "loss": 0.7033, "step": 14215 }, { "epoch": 0.4150535750780999, "grad_norm": 0.5207178362806959, "learning_rate": 3.249797242497973e-05, "loss": 0.6105, "step": 14216 }, { "epoch": 0.4150827713059473, "grad_norm": 0.5617923574726783, "learning_rate": 3.2496350364963504e-05, "loss": 0.6932, "step": 14217 }, { "epoch": 0.41511196753379465, "grad_norm": 0.5228276602630845, "learning_rate": 3.2494728304947286e-05, "loss": 0.63, "step": 14218 }, { "epoch": 0.415141163761642, "grad_norm": 0.5265342328619282, "learning_rate": 3.249310624493107e-05, "loss": 0.5904, "step": 14219 }, { "epoch": 0.41517035998948937, "grad_norm": 0.4877068171958507, "learning_rate": 3.249148418491484e-05, "loss": 0.5232, "step": 14220 }, { "epoch": 0.41519955621733673, "grad_norm": 0.5364840318504819, "learning_rate": 3.2489862124898625e-05, "loss": 0.7029, "step": 14221 }, { "epoch": 0.4152287524451841, "grad_norm": 0.5416364897965651, "learning_rate": 3.24882400648824e-05, "loss": 0.6629, "step": 14222 }, { "epoch": 0.41525794867303145, "grad_norm": 0.5372893985637697, "learning_rate": 3.248661800486618e-05, "loss": 0.6657, "step": 14223 }, { "epoch": 0.4152871449008788, "grad_norm": 0.4901156806358465, "learning_rate": 3.248499594484996e-05, "loss": 0.5588, "step": 14224 }, { "epoch": 0.4153163411287262, "grad_norm": 0.5092228341787737, "learning_rate": 3.248337388483374e-05, "loss": 0.6048, "step": 14225 }, { "epoch": 0.41534553735657354, "grad_norm": 0.5217454550628458, "learning_rate": 3.248175182481752e-05, "loss": 0.638, "step": 14226 }, { "epoch": 0.4153747335844209, "grad_norm": 0.4897279373362701, "learning_rate": 3.2480129764801295e-05, "loss": 0.5639, "step": 14227 }, { "epoch": 0.41540392981226826, "grad_norm": 0.6135671284836599, "learning_rate": 3.247850770478508e-05, "loss": 0.7016, "step": 14228 }, { "epoch": 0.4154331260401156, "grad_norm": 0.5020513021403034, "learning_rate": 3.247688564476886e-05, "loss": 0.5733, "step": 14229 }, { "epoch": 0.415462322267963, "grad_norm": 0.5868100805189829, "learning_rate": 3.2475263584752633e-05, "loss": 0.7548, "step": 14230 }, { "epoch": 0.41549151849581034, "grad_norm": 0.5520643210026208, "learning_rate": 3.2473641524736415e-05, "loss": 0.671, "step": 14231 }, { "epoch": 0.4155207147236577, "grad_norm": 0.543582411644894, "learning_rate": 3.24720194647202e-05, "loss": 0.633, "step": 14232 }, { "epoch": 0.41554991095150506, "grad_norm": 0.5461770565494292, "learning_rate": 3.247039740470398e-05, "loss": 0.6471, "step": 14233 }, { "epoch": 0.4155791071793524, "grad_norm": 0.5240302681293371, "learning_rate": 3.246877534468776e-05, "loss": 0.6631, "step": 14234 }, { "epoch": 0.4156083034071998, "grad_norm": 0.5367746076742557, "learning_rate": 3.2467153284671535e-05, "loss": 0.6486, "step": 14235 }, { "epoch": 0.41563749963504715, "grad_norm": 0.5428577135212971, "learning_rate": 3.246553122465532e-05, "loss": 0.6409, "step": 14236 }, { "epoch": 0.4156666958628945, "grad_norm": 0.5077204652530273, "learning_rate": 3.246390916463909e-05, "loss": 0.6011, "step": 14237 }, { "epoch": 0.41569589209074187, "grad_norm": 0.5392172886618737, "learning_rate": 3.2462287104622874e-05, "loss": 0.6824, "step": 14238 }, { "epoch": 0.41572508831858923, "grad_norm": 0.5282301366478279, "learning_rate": 3.2460665044606656e-05, "loss": 0.6143, "step": 14239 }, { "epoch": 0.4157542845464366, "grad_norm": 0.5498973121021715, "learning_rate": 3.245904298459043e-05, "loss": 0.6977, "step": 14240 }, { "epoch": 0.41578348077428395, "grad_norm": 0.5507750233739936, "learning_rate": 3.245742092457421e-05, "loss": 0.5937, "step": 14241 }, { "epoch": 0.4158126770021313, "grad_norm": 0.5694124358942084, "learning_rate": 3.245579886455799e-05, "loss": 0.6825, "step": 14242 }, { "epoch": 0.4158418732299787, "grad_norm": 0.536849298037446, "learning_rate": 3.245417680454177e-05, "loss": 0.6725, "step": 14243 }, { "epoch": 0.41587106945782604, "grad_norm": 0.5818160048686607, "learning_rate": 3.245255474452555e-05, "loss": 0.7163, "step": 14244 }, { "epoch": 0.4159002656856734, "grad_norm": 0.5382264233478, "learning_rate": 3.2450932684509326e-05, "loss": 0.6822, "step": 14245 }, { "epoch": 0.41592946191352076, "grad_norm": 0.5634339136784552, "learning_rate": 3.244931062449311e-05, "loss": 0.6263, "step": 14246 }, { "epoch": 0.4159586581413681, "grad_norm": 0.4942717049781386, "learning_rate": 3.244768856447688e-05, "loss": 0.5152, "step": 14247 }, { "epoch": 0.4159878543692155, "grad_norm": 0.5210454398445322, "learning_rate": 3.2446066504460665e-05, "loss": 0.5982, "step": 14248 }, { "epoch": 0.41601705059706284, "grad_norm": 0.5228331852536329, "learning_rate": 3.2444444444444446e-05, "loss": 0.6028, "step": 14249 }, { "epoch": 0.4160462468249102, "grad_norm": 0.5159738784401116, "learning_rate": 3.244282238442822e-05, "loss": 0.6419, "step": 14250 }, { "epoch": 0.41607544305275757, "grad_norm": 0.5011740013688086, "learning_rate": 3.2441200324412e-05, "loss": 0.5881, "step": 14251 }, { "epoch": 0.4161046392806049, "grad_norm": 0.5250700903083868, "learning_rate": 3.2439578264395785e-05, "loss": 0.5889, "step": 14252 }, { "epoch": 0.4161338355084523, "grad_norm": 0.528876191339442, "learning_rate": 3.2437956204379567e-05, "loss": 0.5807, "step": 14253 }, { "epoch": 0.41616303173629965, "grad_norm": 0.515439591786099, "learning_rate": 3.243633414436335e-05, "loss": 0.6247, "step": 14254 }, { "epoch": 0.416192227964147, "grad_norm": 0.5055997725170572, "learning_rate": 3.243471208434712e-05, "loss": 0.5815, "step": 14255 }, { "epoch": 0.41622142419199437, "grad_norm": 0.5255956370253352, "learning_rate": 3.2433090024330905e-05, "loss": 0.6232, "step": 14256 }, { "epoch": 0.41625062041984173, "grad_norm": 0.5111866026488885, "learning_rate": 3.243146796431468e-05, "loss": 0.6178, "step": 14257 }, { "epoch": 0.4162798166476891, "grad_norm": 0.5445715344587606, "learning_rate": 3.242984590429846e-05, "loss": 0.5897, "step": 14258 }, { "epoch": 0.41630901287553645, "grad_norm": 0.5287770229154872, "learning_rate": 3.2428223844282244e-05, "loss": 0.5916, "step": 14259 }, { "epoch": 0.4163382091033838, "grad_norm": 0.583412951347946, "learning_rate": 3.242660178426602e-05, "loss": 0.6901, "step": 14260 }, { "epoch": 0.4163674053312312, "grad_norm": 0.5395579835890467, "learning_rate": 3.24249797242498e-05, "loss": 0.6465, "step": 14261 }, { "epoch": 0.4163966015590786, "grad_norm": 0.49366157886495193, "learning_rate": 3.2423357664233575e-05, "loss": 0.5826, "step": 14262 }, { "epoch": 0.41642579778692596, "grad_norm": 0.5421591554272147, "learning_rate": 3.242173560421736e-05, "loss": 0.6436, "step": 14263 }, { "epoch": 0.4164549940147733, "grad_norm": 0.4942543187230353, "learning_rate": 3.242011354420114e-05, "loss": 0.5821, "step": 14264 }, { "epoch": 0.4164841902426207, "grad_norm": 0.5057114802564019, "learning_rate": 3.2418491484184914e-05, "loss": 0.5829, "step": 14265 }, { "epoch": 0.41651338647046804, "grad_norm": 0.5349067683629689, "learning_rate": 3.2416869424168696e-05, "loss": 0.5886, "step": 14266 }, { "epoch": 0.4165425826983154, "grad_norm": 0.5353886931199022, "learning_rate": 3.241524736415247e-05, "loss": 0.6586, "step": 14267 }, { "epoch": 0.41657177892616276, "grad_norm": 0.5431734180289037, "learning_rate": 3.241362530413625e-05, "loss": 0.628, "step": 14268 }, { "epoch": 0.4166009751540101, "grad_norm": 0.5173371758964161, "learning_rate": 3.2412003244120034e-05, "loss": 0.602, "step": 14269 }, { "epoch": 0.4166301713818575, "grad_norm": 0.5042000507074995, "learning_rate": 3.241038118410381e-05, "loss": 0.5933, "step": 14270 }, { "epoch": 0.41665936760970484, "grad_norm": 0.6990371614086199, "learning_rate": 3.24087591240876e-05, "loss": 0.75, "step": 14271 }, { "epoch": 0.4166885638375522, "grad_norm": 0.5529448679217381, "learning_rate": 3.240713706407137e-05, "loss": 0.6905, "step": 14272 }, { "epoch": 0.41671776006539957, "grad_norm": 0.5635040734774537, "learning_rate": 3.2405515004055154e-05, "loss": 0.6749, "step": 14273 }, { "epoch": 0.41674695629324693, "grad_norm": 0.49623998221216453, "learning_rate": 3.240389294403893e-05, "loss": 0.5154, "step": 14274 }, { "epoch": 0.4167761525210943, "grad_norm": 0.49530761310989346, "learning_rate": 3.240227088402271e-05, "loss": 0.5501, "step": 14275 }, { "epoch": 0.41680534874894165, "grad_norm": 0.5272404056666185, "learning_rate": 3.240064882400649e-05, "loss": 0.6614, "step": 14276 }, { "epoch": 0.416834544976789, "grad_norm": 0.5063704938385963, "learning_rate": 3.239902676399027e-05, "loss": 0.6071, "step": 14277 }, { "epoch": 0.4168637412046364, "grad_norm": 0.5035251061441142, "learning_rate": 3.239740470397405e-05, "loss": 0.5902, "step": 14278 }, { "epoch": 0.41689293743248373, "grad_norm": 0.8270521172641361, "learning_rate": 3.239578264395783e-05, "loss": 0.6795, "step": 14279 }, { "epoch": 0.4169221336603311, "grad_norm": 0.5459755224845793, "learning_rate": 3.2394160583941606e-05, "loss": 0.6797, "step": 14280 }, { "epoch": 0.41695132988817846, "grad_norm": 0.5389578178699421, "learning_rate": 3.239253852392539e-05, "loss": 0.651, "step": 14281 }, { "epoch": 0.4169805261160258, "grad_norm": 0.5147706318774239, "learning_rate": 3.239091646390916e-05, "loss": 0.5839, "step": 14282 }, { "epoch": 0.4170097223438732, "grad_norm": 0.5685633614213397, "learning_rate": 3.2389294403892945e-05, "loss": 0.6794, "step": 14283 }, { "epoch": 0.41703891857172054, "grad_norm": 0.5076219693277658, "learning_rate": 3.238767234387673e-05, "loss": 0.5798, "step": 14284 }, { "epoch": 0.4170681147995679, "grad_norm": 0.537406852530498, "learning_rate": 3.23860502838605e-05, "loss": 0.6608, "step": 14285 }, { "epoch": 0.41709731102741526, "grad_norm": 0.5188637392075954, "learning_rate": 3.238442822384428e-05, "loss": 0.6286, "step": 14286 }, { "epoch": 0.4171265072552626, "grad_norm": 0.5151189894726993, "learning_rate": 3.238280616382806e-05, "loss": 0.5772, "step": 14287 }, { "epoch": 0.41715570348311, "grad_norm": 0.5033676514258456, "learning_rate": 3.238118410381184e-05, "loss": 0.608, "step": 14288 }, { "epoch": 0.41718489971095735, "grad_norm": 0.5517525464375953, "learning_rate": 3.237956204379562e-05, "loss": 0.6609, "step": 14289 }, { "epoch": 0.4172140959388047, "grad_norm": 0.5075827249694335, "learning_rate": 3.2377939983779404e-05, "loss": 0.6006, "step": 14290 }, { "epoch": 0.41724329216665207, "grad_norm": 0.5353585986994098, "learning_rate": 3.2376317923763185e-05, "loss": 0.6785, "step": 14291 }, { "epoch": 0.41727248839449943, "grad_norm": 0.5025653769332012, "learning_rate": 3.237469586374696e-05, "loss": 0.5661, "step": 14292 }, { "epoch": 0.4173016846223468, "grad_norm": 0.5362031811399135, "learning_rate": 3.237307380373074e-05, "loss": 0.6816, "step": 14293 }, { "epoch": 0.41733088085019415, "grad_norm": 0.5636008857508207, "learning_rate": 3.237145174371452e-05, "loss": 0.6726, "step": 14294 }, { "epoch": 0.4173600770780415, "grad_norm": 0.5484495874022755, "learning_rate": 3.23698296836983e-05, "loss": 0.6422, "step": 14295 }, { "epoch": 0.4173892733058889, "grad_norm": 0.5601684705102342, "learning_rate": 3.236820762368208e-05, "loss": 0.6665, "step": 14296 }, { "epoch": 0.41741846953373624, "grad_norm": 0.5390553201539409, "learning_rate": 3.2366585563665856e-05, "loss": 0.6105, "step": 14297 }, { "epoch": 0.4174476657615836, "grad_norm": 0.5774274826561381, "learning_rate": 3.236496350364964e-05, "loss": 0.7338, "step": 14298 }, { "epoch": 0.41747686198943096, "grad_norm": 0.5314687187052448, "learning_rate": 3.236334144363342e-05, "loss": 0.5867, "step": 14299 }, { "epoch": 0.4175060582172783, "grad_norm": 0.5554804853982551, "learning_rate": 3.2361719383617194e-05, "loss": 0.6492, "step": 14300 }, { "epoch": 0.4175352544451257, "grad_norm": 0.5333778218100156, "learning_rate": 3.2360097323600976e-05, "loss": 0.6602, "step": 14301 }, { "epoch": 0.41756445067297304, "grad_norm": 0.5432818143209514, "learning_rate": 3.235847526358475e-05, "loss": 0.6296, "step": 14302 }, { "epoch": 0.4175936469008204, "grad_norm": 0.5426453496316211, "learning_rate": 3.235685320356853e-05, "loss": 0.6415, "step": 14303 }, { "epoch": 0.41762284312866776, "grad_norm": 0.5939347604122338, "learning_rate": 3.2355231143552314e-05, "loss": 0.7587, "step": 14304 }, { "epoch": 0.4176520393565151, "grad_norm": 0.5220993673927116, "learning_rate": 3.235360908353609e-05, "loss": 0.5902, "step": 14305 }, { "epoch": 0.4176812355843625, "grad_norm": 0.520677942609238, "learning_rate": 3.235198702351987e-05, "loss": 0.5866, "step": 14306 }, { "epoch": 0.41771043181220985, "grad_norm": 0.5793079475605241, "learning_rate": 3.2350364963503646e-05, "loss": 0.7082, "step": 14307 }, { "epoch": 0.4177396280400572, "grad_norm": 0.48573754428310717, "learning_rate": 3.234874290348743e-05, "loss": 0.5559, "step": 14308 }, { "epoch": 0.41776882426790457, "grad_norm": 0.4936448829846689, "learning_rate": 3.234712084347121e-05, "loss": 0.5563, "step": 14309 }, { "epoch": 0.41779802049575193, "grad_norm": 0.5362973810951375, "learning_rate": 3.234549878345499e-05, "loss": 0.6454, "step": 14310 }, { "epoch": 0.4178272167235993, "grad_norm": 0.6144735896202616, "learning_rate": 3.234387672343877e-05, "loss": 0.7791, "step": 14311 }, { "epoch": 0.41785641295144665, "grad_norm": 0.5259807439753746, "learning_rate": 3.234225466342255e-05, "loss": 0.5727, "step": 14312 }, { "epoch": 0.417885609179294, "grad_norm": 0.5279793844243289, "learning_rate": 3.234063260340633e-05, "loss": 0.6061, "step": 14313 }, { "epoch": 0.4179148054071414, "grad_norm": 0.5086703342103054, "learning_rate": 3.2339010543390105e-05, "loss": 0.5888, "step": 14314 }, { "epoch": 0.41794400163498874, "grad_norm": 0.5115484152485305, "learning_rate": 3.233738848337389e-05, "loss": 0.6013, "step": 14315 }, { "epoch": 0.4179731978628361, "grad_norm": 0.5544174765014489, "learning_rate": 3.233576642335767e-05, "loss": 0.7118, "step": 14316 }, { "epoch": 0.41800239409068346, "grad_norm": 0.5846341541971952, "learning_rate": 3.2334144363341443e-05, "loss": 0.7179, "step": 14317 }, { "epoch": 0.4180315903185308, "grad_norm": 0.5036104524814403, "learning_rate": 3.2332522303325225e-05, "loss": 0.5569, "step": 14318 }, { "epoch": 0.4180607865463782, "grad_norm": 0.5461955687962937, "learning_rate": 3.2330900243309e-05, "loss": 0.6505, "step": 14319 }, { "epoch": 0.41808998277422554, "grad_norm": 0.5811985036355485, "learning_rate": 3.232927818329278e-05, "loss": 0.7115, "step": 14320 }, { "epoch": 0.4181191790020729, "grad_norm": 0.5539810077732018, "learning_rate": 3.2327656123276564e-05, "loss": 0.6481, "step": 14321 }, { "epoch": 0.4181483752299203, "grad_norm": 0.5401649596712111, "learning_rate": 3.232603406326034e-05, "loss": 0.6427, "step": 14322 }, { "epoch": 0.4181775714577677, "grad_norm": 0.49458690188352605, "learning_rate": 3.232441200324412e-05, "loss": 0.5187, "step": 14323 }, { "epoch": 0.41820676768561504, "grad_norm": 0.5416113908576176, "learning_rate": 3.23227899432279e-05, "loss": 0.6411, "step": 14324 }, { "epoch": 0.4182359639134624, "grad_norm": 0.5452058434037351, "learning_rate": 3.232116788321168e-05, "loss": 0.6485, "step": 14325 }, { "epoch": 0.41826516014130977, "grad_norm": 0.5702721079525063, "learning_rate": 3.231954582319546e-05, "loss": 0.6836, "step": 14326 }, { "epoch": 0.4182943563691571, "grad_norm": 0.5231215245121253, "learning_rate": 3.2317923763179234e-05, "loss": 0.6204, "step": 14327 }, { "epoch": 0.4183235525970045, "grad_norm": 0.5488174871066744, "learning_rate": 3.231630170316302e-05, "loss": 0.6315, "step": 14328 }, { "epoch": 0.41835274882485185, "grad_norm": 0.5577127106005426, "learning_rate": 3.23146796431468e-05, "loss": 0.7076, "step": 14329 }, { "epoch": 0.4183819450526992, "grad_norm": 0.5573665054836918, "learning_rate": 3.231305758313058e-05, "loss": 0.6765, "step": 14330 }, { "epoch": 0.4184111412805466, "grad_norm": 0.5468667810743266, "learning_rate": 3.231143552311436e-05, "loss": 0.7092, "step": 14331 }, { "epoch": 0.41844033750839393, "grad_norm": 0.5243217694867089, "learning_rate": 3.2309813463098136e-05, "loss": 0.5917, "step": 14332 }, { "epoch": 0.4184695337362413, "grad_norm": 0.5058934382385882, "learning_rate": 3.230819140308192e-05, "loss": 0.5976, "step": 14333 }, { "epoch": 0.41849872996408866, "grad_norm": 0.5360547614480624, "learning_rate": 3.230656934306569e-05, "loss": 0.6503, "step": 14334 }, { "epoch": 0.418527926191936, "grad_norm": 0.537168297364157, "learning_rate": 3.2304947283049475e-05, "loss": 0.5931, "step": 14335 }, { "epoch": 0.4185571224197834, "grad_norm": 0.5283660371500446, "learning_rate": 3.2303325223033256e-05, "loss": 0.6708, "step": 14336 }, { "epoch": 0.41858631864763074, "grad_norm": 0.4921413673340311, "learning_rate": 3.230170316301703e-05, "loss": 0.5422, "step": 14337 }, { "epoch": 0.4186155148754781, "grad_norm": 0.5383669766574645, "learning_rate": 3.230008110300081e-05, "loss": 0.655, "step": 14338 }, { "epoch": 0.41864471110332546, "grad_norm": 0.5363284309787562, "learning_rate": 3.229845904298459e-05, "loss": 0.6327, "step": 14339 }, { "epoch": 0.4186739073311728, "grad_norm": 0.5152652178120116, "learning_rate": 3.229683698296837e-05, "loss": 0.6408, "step": 14340 }, { "epoch": 0.4187031035590202, "grad_norm": 0.5254574310806981, "learning_rate": 3.229521492295215e-05, "loss": 0.6983, "step": 14341 }, { "epoch": 0.41873229978686755, "grad_norm": 0.5623259261169866, "learning_rate": 3.2293592862935927e-05, "loss": 0.7282, "step": 14342 }, { "epoch": 0.4187614960147149, "grad_norm": 0.5273401876577376, "learning_rate": 3.229197080291971e-05, "loss": 0.5577, "step": 14343 }, { "epoch": 0.41879069224256227, "grad_norm": 0.5659922564425968, "learning_rate": 3.229034874290349e-05, "loss": 0.6974, "step": 14344 }, { "epoch": 0.41881988847040963, "grad_norm": 0.5321922187379092, "learning_rate": 3.2288726682887265e-05, "loss": 0.6376, "step": 14345 }, { "epoch": 0.418849084698257, "grad_norm": 0.5586060301133247, "learning_rate": 3.2287104622871054e-05, "loss": 0.6711, "step": 14346 }, { "epoch": 0.41887828092610435, "grad_norm": 0.5135659213915433, "learning_rate": 3.228548256285483e-05, "loss": 0.6197, "step": 14347 }, { "epoch": 0.4189074771539517, "grad_norm": 0.5374467896451184, "learning_rate": 3.228386050283861e-05, "loss": 0.6558, "step": 14348 }, { "epoch": 0.4189366733817991, "grad_norm": 0.5687960261079738, "learning_rate": 3.2282238442822385e-05, "loss": 0.6899, "step": 14349 }, { "epoch": 0.41896586960964644, "grad_norm": 0.7984428039773162, "learning_rate": 3.228061638280617e-05, "loss": 0.6385, "step": 14350 }, { "epoch": 0.4189950658374938, "grad_norm": 0.5385166416696582, "learning_rate": 3.227899432278995e-05, "loss": 0.6485, "step": 14351 }, { "epoch": 0.41902426206534116, "grad_norm": 0.5468881191066216, "learning_rate": 3.2277372262773724e-05, "loss": 0.6703, "step": 14352 }, { "epoch": 0.4190534582931885, "grad_norm": 0.5575025377862306, "learning_rate": 3.2275750202757506e-05, "loss": 0.7018, "step": 14353 }, { "epoch": 0.4190826545210359, "grad_norm": 0.5960021985646818, "learning_rate": 3.227412814274128e-05, "loss": 0.7374, "step": 14354 }, { "epoch": 0.41911185074888324, "grad_norm": 0.5350510387559886, "learning_rate": 3.227250608272506e-05, "loss": 0.626, "step": 14355 }, { "epoch": 0.4191410469767306, "grad_norm": 0.5620730585729269, "learning_rate": 3.2270884022708844e-05, "loss": 0.692, "step": 14356 }, { "epoch": 0.41917024320457796, "grad_norm": 0.5535116639945653, "learning_rate": 3.226926196269262e-05, "loss": 0.6699, "step": 14357 }, { "epoch": 0.4191994394324253, "grad_norm": 0.5388091949196874, "learning_rate": 3.22676399026764e-05, "loss": 0.6894, "step": 14358 }, { "epoch": 0.4192286356602727, "grad_norm": 0.54524396396125, "learning_rate": 3.2266017842660176e-05, "loss": 0.6285, "step": 14359 }, { "epoch": 0.41925783188812005, "grad_norm": 0.532654345162324, "learning_rate": 3.226439578264396e-05, "loss": 0.6113, "step": 14360 }, { "epoch": 0.4192870281159674, "grad_norm": 0.5316378002017413, "learning_rate": 3.226277372262774e-05, "loss": 0.6506, "step": 14361 }, { "epoch": 0.41931622434381477, "grad_norm": 0.5298894284800687, "learning_rate": 3.2261151662611514e-05, "loss": 0.6231, "step": 14362 }, { "epoch": 0.41934542057166213, "grad_norm": 0.4869269745655447, "learning_rate": 3.2259529602595296e-05, "loss": 0.5604, "step": 14363 }, { "epoch": 0.4193746167995095, "grad_norm": 0.5211417257918733, "learning_rate": 3.225790754257907e-05, "loss": 0.6429, "step": 14364 }, { "epoch": 0.41940381302735685, "grad_norm": 0.5515468073645657, "learning_rate": 3.225628548256286e-05, "loss": 0.6633, "step": 14365 }, { "epoch": 0.4194330092552042, "grad_norm": 0.530682214105833, "learning_rate": 3.225466342254664e-05, "loss": 0.6179, "step": 14366 }, { "epoch": 0.4194622054830516, "grad_norm": 0.5103040046114543, "learning_rate": 3.2253041362530416e-05, "loss": 0.6159, "step": 14367 }, { "epoch": 0.41949140171089894, "grad_norm": 0.5824014772715431, "learning_rate": 3.22514193025142e-05, "loss": 0.7219, "step": 14368 }, { "epoch": 0.4195205979387463, "grad_norm": 0.5136398929418522, "learning_rate": 3.224979724249797e-05, "loss": 0.5883, "step": 14369 }, { "epoch": 0.41954979416659366, "grad_norm": 0.482952771428861, "learning_rate": 3.2248175182481755e-05, "loss": 0.5654, "step": 14370 }, { "epoch": 0.419578990394441, "grad_norm": 0.5314100705725183, "learning_rate": 3.224655312246554e-05, "loss": 0.6084, "step": 14371 }, { "epoch": 0.4196081866222884, "grad_norm": 0.5438443316934377, "learning_rate": 3.224493106244931e-05, "loss": 0.6868, "step": 14372 }, { "epoch": 0.41963738285013574, "grad_norm": 0.511228194786815, "learning_rate": 3.224330900243309e-05, "loss": 0.607, "step": 14373 }, { "epoch": 0.4196665790779831, "grad_norm": 0.5399234441790297, "learning_rate": 3.224168694241687e-05, "loss": 0.6044, "step": 14374 }, { "epoch": 0.41969577530583047, "grad_norm": 0.6345023671348563, "learning_rate": 3.224006488240065e-05, "loss": 0.737, "step": 14375 }, { "epoch": 0.4197249715336778, "grad_norm": 0.5716414809977453, "learning_rate": 3.223844282238443e-05, "loss": 0.6943, "step": 14376 }, { "epoch": 0.4197541677615252, "grad_norm": 0.5020208666061796, "learning_rate": 3.223682076236821e-05, "loss": 0.5555, "step": 14377 }, { "epoch": 0.41978336398937255, "grad_norm": 0.5607147148518973, "learning_rate": 3.223519870235199e-05, "loss": 0.6736, "step": 14378 }, { "epoch": 0.4198125602172199, "grad_norm": 0.5164644103122823, "learning_rate": 3.2233576642335764e-05, "loss": 0.633, "step": 14379 }, { "epoch": 0.41984175644506727, "grad_norm": 0.4786160081766096, "learning_rate": 3.2231954582319545e-05, "loss": 0.521, "step": 14380 }, { "epoch": 0.41987095267291463, "grad_norm": 0.5291259900765933, "learning_rate": 3.223033252230333e-05, "loss": 0.6644, "step": 14381 }, { "epoch": 0.41990014890076205, "grad_norm": 0.5291879568792949, "learning_rate": 3.22287104622871e-05, "loss": 0.6531, "step": 14382 }, { "epoch": 0.4199293451286094, "grad_norm": 0.5481190306728152, "learning_rate": 3.2227088402270884e-05, "loss": 0.6815, "step": 14383 }, { "epoch": 0.41995854135645677, "grad_norm": 0.548254859737846, "learning_rate": 3.2225466342254666e-05, "loss": 0.667, "step": 14384 }, { "epoch": 0.41998773758430413, "grad_norm": 0.5257655655235258, "learning_rate": 3.222384428223845e-05, "loss": 0.6425, "step": 14385 }, { "epoch": 0.4200169338121515, "grad_norm": 0.5666468376400496, "learning_rate": 3.222222222222223e-05, "loss": 0.7222, "step": 14386 }, { "epoch": 0.42004613003999886, "grad_norm": 0.4725934679600051, "learning_rate": 3.2220600162206004e-05, "loss": 0.5207, "step": 14387 }, { "epoch": 0.4200753262678462, "grad_norm": 0.49734646127346743, "learning_rate": 3.2218978102189786e-05, "loss": 0.5614, "step": 14388 }, { "epoch": 0.4201045224956936, "grad_norm": 0.5479504054121423, "learning_rate": 3.221735604217356e-05, "loss": 0.6918, "step": 14389 }, { "epoch": 0.42013371872354094, "grad_norm": 0.5172675628453922, "learning_rate": 3.221573398215734e-05, "loss": 0.5782, "step": 14390 }, { "epoch": 0.4201629149513883, "grad_norm": 0.49233351952440174, "learning_rate": 3.2214111922141124e-05, "loss": 0.5374, "step": 14391 }, { "epoch": 0.42019211117923566, "grad_norm": 0.5214762039700044, "learning_rate": 3.22124898621249e-05, "loss": 0.606, "step": 14392 }, { "epoch": 0.420221307407083, "grad_norm": 0.5525575018090069, "learning_rate": 3.221086780210868e-05, "loss": 0.6912, "step": 14393 }, { "epoch": 0.4202505036349304, "grad_norm": 0.5133276851690235, "learning_rate": 3.2209245742092456e-05, "loss": 0.6117, "step": 14394 }, { "epoch": 0.42027969986277774, "grad_norm": 0.4858137231443882, "learning_rate": 3.220762368207624e-05, "loss": 0.5242, "step": 14395 }, { "epoch": 0.4203088960906251, "grad_norm": 0.5705248504218884, "learning_rate": 3.220600162206002e-05, "loss": 0.6449, "step": 14396 }, { "epoch": 0.42033809231847247, "grad_norm": 0.5238527461043242, "learning_rate": 3.2204379562043795e-05, "loss": 0.6145, "step": 14397 }, { "epoch": 0.42036728854631983, "grad_norm": 0.5532473236586739, "learning_rate": 3.2202757502027576e-05, "loss": 0.6641, "step": 14398 }, { "epoch": 0.4203964847741672, "grad_norm": 0.5238850738861611, "learning_rate": 3.220113544201135e-05, "loss": 0.6076, "step": 14399 }, { "epoch": 0.42042568100201455, "grad_norm": 0.5059418981268439, "learning_rate": 3.219951338199513e-05, "loss": 0.6031, "step": 14400 }, { "epoch": 0.4204548772298619, "grad_norm": 0.5384081047274153, "learning_rate": 3.2197891321978915e-05, "loss": 0.5407, "step": 14401 }, { "epoch": 0.4204840734577093, "grad_norm": 0.49406811796620403, "learning_rate": 3.219626926196269e-05, "loss": 0.5386, "step": 14402 }, { "epoch": 0.42051326968555663, "grad_norm": 0.5470113676081222, "learning_rate": 3.219464720194648e-05, "loss": 0.6495, "step": 14403 }, { "epoch": 0.420542465913404, "grad_norm": 0.5405372215395225, "learning_rate": 3.2193025141930253e-05, "loss": 0.6164, "step": 14404 }, { "epoch": 0.42057166214125136, "grad_norm": 0.5060503069144486, "learning_rate": 3.2191403081914035e-05, "loss": 0.5995, "step": 14405 }, { "epoch": 0.4206008583690987, "grad_norm": 0.47259170847943294, "learning_rate": 3.218978102189781e-05, "loss": 0.5243, "step": 14406 }, { "epoch": 0.4206300545969461, "grad_norm": 0.5058521771300846, "learning_rate": 3.218815896188159e-05, "loss": 0.6236, "step": 14407 }, { "epoch": 0.42065925082479344, "grad_norm": 0.5381903415020515, "learning_rate": 3.2186536901865374e-05, "loss": 0.6086, "step": 14408 }, { "epoch": 0.4206884470526408, "grad_norm": 0.5343781479381817, "learning_rate": 3.218491484184915e-05, "loss": 0.6198, "step": 14409 }, { "epoch": 0.42071764328048816, "grad_norm": 0.5271622228093609, "learning_rate": 3.218329278183293e-05, "loss": 0.6216, "step": 14410 }, { "epoch": 0.4207468395083355, "grad_norm": 0.5284512366427389, "learning_rate": 3.218167072181671e-05, "loss": 0.6565, "step": 14411 }, { "epoch": 0.4207760357361829, "grad_norm": 0.5184396217241082, "learning_rate": 3.218004866180049e-05, "loss": 0.617, "step": 14412 }, { "epoch": 0.42080523196403025, "grad_norm": 0.48752728925431965, "learning_rate": 3.217842660178427e-05, "loss": 0.5752, "step": 14413 }, { "epoch": 0.4208344281918776, "grad_norm": 0.5489555014171302, "learning_rate": 3.2176804541768044e-05, "loss": 0.6585, "step": 14414 }, { "epoch": 0.42086362441972497, "grad_norm": 0.5245183866586165, "learning_rate": 3.2175182481751826e-05, "loss": 0.6158, "step": 14415 }, { "epoch": 0.42089282064757233, "grad_norm": 0.5193543719318642, "learning_rate": 3.217356042173561e-05, "loss": 0.6181, "step": 14416 }, { "epoch": 0.4209220168754197, "grad_norm": 0.5217068833455081, "learning_rate": 3.217193836171938e-05, "loss": 0.6157, "step": 14417 }, { "epoch": 0.42095121310326705, "grad_norm": 0.5220348884917715, "learning_rate": 3.2170316301703164e-05, "loss": 0.614, "step": 14418 }, { "epoch": 0.4209804093311144, "grad_norm": 0.5393272841309983, "learning_rate": 3.216869424168694e-05, "loss": 0.6521, "step": 14419 }, { "epoch": 0.4210096055589618, "grad_norm": 0.5310054039409988, "learning_rate": 3.216707218167072e-05, "loss": 0.6211, "step": 14420 }, { "epoch": 0.42103880178680914, "grad_norm": 0.5793697626994314, "learning_rate": 3.21654501216545e-05, "loss": 0.6152, "step": 14421 }, { "epoch": 0.4210679980146565, "grad_norm": 0.54598977179515, "learning_rate": 3.2163828061638285e-05, "loss": 0.617, "step": 14422 }, { "epoch": 0.42109719424250386, "grad_norm": 0.508739859330615, "learning_rate": 3.2162206001622066e-05, "loss": 0.5796, "step": 14423 }, { "epoch": 0.4211263904703512, "grad_norm": 0.5095956882390142, "learning_rate": 3.216058394160584e-05, "loss": 0.5942, "step": 14424 }, { "epoch": 0.4211555866981986, "grad_norm": 0.5179260734313428, "learning_rate": 3.215896188158962e-05, "loss": 0.5631, "step": 14425 }, { "epoch": 0.42118478292604594, "grad_norm": 0.556919678812477, "learning_rate": 3.21573398215734e-05, "loss": 0.6789, "step": 14426 }, { "epoch": 0.4212139791538933, "grad_norm": 0.5450222703737153, "learning_rate": 3.215571776155718e-05, "loss": 0.5824, "step": 14427 }, { "epoch": 0.42124317538174066, "grad_norm": 0.5216011506655769, "learning_rate": 3.215409570154096e-05, "loss": 0.6053, "step": 14428 }, { "epoch": 0.421272371609588, "grad_norm": 0.5247168455570388, "learning_rate": 3.2152473641524737e-05, "loss": 0.6495, "step": 14429 }, { "epoch": 0.4213015678374354, "grad_norm": 0.526326053451217, "learning_rate": 3.215085158150852e-05, "loss": 0.5501, "step": 14430 }, { "epoch": 0.42133076406528275, "grad_norm": 0.5377976304567836, "learning_rate": 3.21492295214923e-05, "loss": 0.6373, "step": 14431 }, { "epoch": 0.4213599602931301, "grad_norm": 0.5311833500735498, "learning_rate": 3.2147607461476075e-05, "loss": 0.661, "step": 14432 }, { "epoch": 0.42138915652097747, "grad_norm": 0.5451109057265846, "learning_rate": 3.214598540145986e-05, "loss": 0.6946, "step": 14433 }, { "epoch": 0.42141835274882483, "grad_norm": 0.5264574088974254, "learning_rate": 3.214436334144363e-05, "loss": 0.6525, "step": 14434 }, { "epoch": 0.4214475489766722, "grad_norm": 0.5422224994046836, "learning_rate": 3.2142741281427414e-05, "loss": 0.6671, "step": 14435 }, { "epoch": 0.42147674520451955, "grad_norm": 0.5341989551369557, "learning_rate": 3.2141119221411195e-05, "loss": 0.6567, "step": 14436 }, { "epoch": 0.4215059414323669, "grad_norm": 0.5516743323586658, "learning_rate": 3.213949716139497e-05, "loss": 0.562, "step": 14437 }, { "epoch": 0.4215351376602143, "grad_norm": 0.5549505088117583, "learning_rate": 3.213787510137875e-05, "loss": 0.7018, "step": 14438 }, { "epoch": 0.42156433388806164, "grad_norm": 0.5071943850628206, "learning_rate": 3.213625304136253e-05, "loss": 0.5921, "step": 14439 }, { "epoch": 0.421593530115909, "grad_norm": 0.5291393203536271, "learning_rate": 3.213463098134631e-05, "loss": 0.6539, "step": 14440 }, { "epoch": 0.42162272634375636, "grad_norm": 0.5238002922715735, "learning_rate": 3.213300892133009e-05, "loss": 0.5917, "step": 14441 }, { "epoch": 0.4216519225716037, "grad_norm": 0.5361510685782088, "learning_rate": 3.213138686131387e-05, "loss": 0.5715, "step": 14442 }, { "epoch": 0.42168111879945114, "grad_norm": 0.5016924701404263, "learning_rate": 3.2129764801297654e-05, "loss": 0.5817, "step": 14443 }, { "epoch": 0.4217103150272985, "grad_norm": 0.5132000920054669, "learning_rate": 3.212814274128143e-05, "loss": 0.6288, "step": 14444 }, { "epoch": 0.42173951125514586, "grad_norm": 0.542522882874416, "learning_rate": 3.212652068126521e-05, "loss": 0.667, "step": 14445 }, { "epoch": 0.4217687074829932, "grad_norm": 0.5453617590060377, "learning_rate": 3.2124898621248986e-05, "loss": 0.6887, "step": 14446 }, { "epoch": 0.4217979037108406, "grad_norm": 0.4942304892962044, "learning_rate": 3.212327656123277e-05, "loss": 0.5965, "step": 14447 }, { "epoch": 0.42182709993868794, "grad_norm": 0.49104687905996963, "learning_rate": 3.212165450121655e-05, "loss": 0.5276, "step": 14448 }, { "epoch": 0.4218562961665353, "grad_norm": 0.5195798751448911, "learning_rate": 3.2120032441200324e-05, "loss": 0.614, "step": 14449 }, { "epoch": 0.42188549239438267, "grad_norm": 0.5214773515485421, "learning_rate": 3.2118410381184106e-05, "loss": 0.6455, "step": 14450 }, { "epoch": 0.42191468862223, "grad_norm": 0.556788416049881, "learning_rate": 3.211678832116788e-05, "loss": 0.725, "step": 14451 }, { "epoch": 0.4219438848500774, "grad_norm": 0.5517842685857463, "learning_rate": 3.211516626115166e-05, "loss": 0.6195, "step": 14452 }, { "epoch": 0.42197308107792475, "grad_norm": 0.5808163297085456, "learning_rate": 3.2113544201135445e-05, "loss": 0.7143, "step": 14453 }, { "epoch": 0.4220022773057721, "grad_norm": 0.5184290357728373, "learning_rate": 3.211192214111922e-05, "loss": 0.6249, "step": 14454 }, { "epoch": 0.4220314735336195, "grad_norm": 0.5443415843559749, "learning_rate": 3.2110300081103e-05, "loss": 0.6421, "step": 14455 }, { "epoch": 0.42206066976146683, "grad_norm": 0.5397540065245349, "learning_rate": 3.210867802108678e-05, "loss": 0.6237, "step": 14456 }, { "epoch": 0.4220898659893142, "grad_norm": 0.5671798646301202, "learning_rate": 3.210705596107056e-05, "loss": 0.6276, "step": 14457 }, { "epoch": 0.42211906221716156, "grad_norm": 0.5098463284407541, "learning_rate": 3.210543390105434e-05, "loss": 0.614, "step": 14458 }, { "epoch": 0.4221482584450089, "grad_norm": 0.5488379934534345, "learning_rate": 3.2103811841038115e-05, "loss": 0.5731, "step": 14459 }, { "epoch": 0.4221774546728563, "grad_norm": 0.5941341065922943, "learning_rate": 3.2102189781021903e-05, "loss": 0.6749, "step": 14460 }, { "epoch": 0.42220665090070364, "grad_norm": 0.5228914011113518, "learning_rate": 3.210056772100568e-05, "loss": 0.5899, "step": 14461 }, { "epoch": 0.422235847128551, "grad_norm": 0.5326660005349984, "learning_rate": 3.209894566098946e-05, "loss": 0.6485, "step": 14462 }, { "epoch": 0.42226504335639836, "grad_norm": 0.5278107396851405, "learning_rate": 3.209732360097324e-05, "loss": 0.6137, "step": 14463 }, { "epoch": 0.4222942395842457, "grad_norm": 0.5535833269848794, "learning_rate": 3.209570154095702e-05, "loss": 0.6368, "step": 14464 }, { "epoch": 0.4223234358120931, "grad_norm": 0.5695389909498179, "learning_rate": 3.20940794809408e-05, "loss": 0.6728, "step": 14465 }, { "epoch": 0.42235263203994045, "grad_norm": 0.5569501456036346, "learning_rate": 3.2092457420924574e-05, "loss": 0.6861, "step": 14466 }, { "epoch": 0.4223818282677878, "grad_norm": 0.5274757789227797, "learning_rate": 3.2090835360908355e-05, "loss": 0.6355, "step": 14467 }, { "epoch": 0.42241102449563517, "grad_norm": 0.5463454946742637, "learning_rate": 3.208921330089214e-05, "loss": 0.6564, "step": 14468 }, { "epoch": 0.42244022072348253, "grad_norm": 0.5828295181646043, "learning_rate": 3.208759124087591e-05, "loss": 0.6843, "step": 14469 }, { "epoch": 0.4224694169513299, "grad_norm": 0.5602441044457881, "learning_rate": 3.2085969180859694e-05, "loss": 0.6787, "step": 14470 }, { "epoch": 0.42249861317917725, "grad_norm": 0.5209387475913255, "learning_rate": 3.208434712084347e-05, "loss": 0.6139, "step": 14471 }, { "epoch": 0.4225278094070246, "grad_norm": 0.4982518342287749, "learning_rate": 3.208272506082725e-05, "loss": 0.5436, "step": 14472 }, { "epoch": 0.422557005634872, "grad_norm": 0.5467835323119873, "learning_rate": 3.208110300081103e-05, "loss": 0.696, "step": 14473 }, { "epoch": 0.42258620186271934, "grad_norm": 0.5087273188797231, "learning_rate": 3.207948094079481e-05, "loss": 0.5734, "step": 14474 }, { "epoch": 0.4226153980905667, "grad_norm": 0.5759799689756046, "learning_rate": 3.207785888077859e-05, "loss": 0.6334, "step": 14475 }, { "epoch": 0.42264459431841406, "grad_norm": 0.5603964255951636, "learning_rate": 3.207623682076237e-05, "loss": 0.6118, "step": 14476 }, { "epoch": 0.4226737905462614, "grad_norm": 0.5243074260964127, "learning_rate": 3.2074614760746146e-05, "loss": 0.6075, "step": 14477 }, { "epoch": 0.4227029867741088, "grad_norm": 0.5516299417932826, "learning_rate": 3.207299270072993e-05, "loss": 0.6735, "step": 14478 }, { "epoch": 0.42273218300195614, "grad_norm": 0.5374766210685729, "learning_rate": 3.207137064071371e-05, "loss": 0.6725, "step": 14479 }, { "epoch": 0.4227613792298035, "grad_norm": 0.49077209520200094, "learning_rate": 3.206974858069749e-05, "loss": 0.542, "step": 14480 }, { "epoch": 0.42279057545765086, "grad_norm": 0.5428724827653562, "learning_rate": 3.2068126520681266e-05, "loss": 0.6665, "step": 14481 }, { "epoch": 0.4228197716854982, "grad_norm": 0.5325740888260958, "learning_rate": 3.206650446066505e-05, "loss": 0.6, "step": 14482 }, { "epoch": 0.4228489679133456, "grad_norm": 0.5486745489038003, "learning_rate": 3.206488240064883e-05, "loss": 0.7058, "step": 14483 }, { "epoch": 0.42287816414119295, "grad_norm": 0.5198403319950295, "learning_rate": 3.2063260340632605e-05, "loss": 0.6098, "step": 14484 }, { "epoch": 0.4229073603690403, "grad_norm": 0.5707521529736489, "learning_rate": 3.2061638280616386e-05, "loss": 0.7128, "step": 14485 }, { "epoch": 0.42293655659688767, "grad_norm": 0.5575848168496431, "learning_rate": 3.206001622060016e-05, "loss": 0.6646, "step": 14486 }, { "epoch": 0.42296575282473503, "grad_norm": 0.5240753678658497, "learning_rate": 3.205839416058394e-05, "loss": 0.6177, "step": 14487 }, { "epoch": 0.4229949490525824, "grad_norm": 0.5148752539086909, "learning_rate": 3.2056772100567725e-05, "loss": 0.62, "step": 14488 }, { "epoch": 0.42302414528042975, "grad_norm": 0.5314638355751313, "learning_rate": 3.20551500405515e-05, "loss": 0.6367, "step": 14489 }, { "epoch": 0.4230533415082771, "grad_norm": 0.5226941227078649, "learning_rate": 3.205352798053528e-05, "loss": 0.6182, "step": 14490 }, { "epoch": 0.4230825377361245, "grad_norm": 0.5349086754232593, "learning_rate": 3.205190592051906e-05, "loss": 0.6434, "step": 14491 }, { "epoch": 0.42311173396397184, "grad_norm": 0.49614624875300495, "learning_rate": 3.205028386050284e-05, "loss": 0.558, "step": 14492 }, { "epoch": 0.4231409301918192, "grad_norm": 0.5509125337925285, "learning_rate": 3.204866180048662e-05, "loss": 0.6625, "step": 14493 }, { "epoch": 0.42317012641966656, "grad_norm": 0.5312849991980263, "learning_rate": 3.2047039740470395e-05, "loss": 0.6232, "step": 14494 }, { "epoch": 0.4231993226475139, "grad_norm": 0.5044508847453997, "learning_rate": 3.204541768045418e-05, "loss": 0.5384, "step": 14495 }, { "epoch": 0.4232285188753613, "grad_norm": 0.5649283660320704, "learning_rate": 3.204379562043795e-05, "loss": 0.6752, "step": 14496 }, { "epoch": 0.42325771510320864, "grad_norm": 0.5688568617215147, "learning_rate": 3.204217356042174e-05, "loss": 0.6522, "step": 14497 }, { "epoch": 0.423286911331056, "grad_norm": 0.5227697664525787, "learning_rate": 3.204055150040552e-05, "loss": 0.6426, "step": 14498 }, { "epoch": 0.42331610755890337, "grad_norm": 0.5782660693735872, "learning_rate": 3.20389294403893e-05, "loss": 0.6815, "step": 14499 }, { "epoch": 0.4233453037867507, "grad_norm": 0.5517195142851751, "learning_rate": 3.203730738037308e-05, "loss": 0.677, "step": 14500 }, { "epoch": 0.4233745000145981, "grad_norm": 0.49882361235986483, "learning_rate": 3.2035685320356854e-05, "loss": 0.5385, "step": 14501 }, { "epoch": 0.42340369624244545, "grad_norm": 0.5410506261939096, "learning_rate": 3.2034063260340636e-05, "loss": 0.659, "step": 14502 }, { "epoch": 0.42343289247029287, "grad_norm": 0.4906003465825883, "learning_rate": 3.203244120032442e-05, "loss": 0.5704, "step": 14503 }, { "epoch": 0.4234620886981402, "grad_norm": 0.5301780788156994, "learning_rate": 3.203081914030819e-05, "loss": 0.6268, "step": 14504 }, { "epoch": 0.4234912849259876, "grad_norm": 0.5570910143533219, "learning_rate": 3.2029197080291974e-05, "loss": 0.653, "step": 14505 }, { "epoch": 0.42352048115383495, "grad_norm": 0.5090545094246262, "learning_rate": 3.202757502027575e-05, "loss": 0.6031, "step": 14506 }, { "epoch": 0.4235496773816823, "grad_norm": 0.49457176028515226, "learning_rate": 3.202595296025953e-05, "loss": 0.5512, "step": 14507 }, { "epoch": 0.42357887360952967, "grad_norm": 0.589096921789651, "learning_rate": 3.202433090024331e-05, "loss": 0.7155, "step": 14508 }, { "epoch": 0.42360806983737703, "grad_norm": 0.5503315075579924, "learning_rate": 3.202270884022709e-05, "loss": 0.679, "step": 14509 }, { "epoch": 0.4236372660652244, "grad_norm": 0.4999651991005883, "learning_rate": 3.202108678021087e-05, "loss": 0.5591, "step": 14510 }, { "epoch": 0.42366646229307175, "grad_norm": 0.5557309217053503, "learning_rate": 3.2019464720194645e-05, "loss": 0.6876, "step": 14511 }, { "epoch": 0.4236956585209191, "grad_norm": 0.5034153645698699, "learning_rate": 3.2017842660178426e-05, "loss": 0.628, "step": 14512 }, { "epoch": 0.4237248547487665, "grad_norm": 0.5183972182195863, "learning_rate": 3.201622060016221e-05, "loss": 0.6227, "step": 14513 }, { "epoch": 0.42375405097661384, "grad_norm": 0.5471041843044127, "learning_rate": 3.201459854014598e-05, "loss": 0.6191, "step": 14514 }, { "epoch": 0.4237832472044612, "grad_norm": 0.563219035860741, "learning_rate": 3.2012976480129765e-05, "loss": 0.6915, "step": 14515 }, { "epoch": 0.42381244343230856, "grad_norm": 0.5024560283210296, "learning_rate": 3.2011354420113547e-05, "loss": 0.5739, "step": 14516 }, { "epoch": 0.4238416396601559, "grad_norm": 0.5096751805064833, "learning_rate": 3.200973236009733e-05, "loss": 0.6025, "step": 14517 }, { "epoch": 0.4238708358880033, "grad_norm": 0.5523743033585583, "learning_rate": 3.20081103000811e-05, "loss": 0.7158, "step": 14518 }, { "epoch": 0.42390003211585064, "grad_norm": 0.533320348851527, "learning_rate": 3.2006488240064885e-05, "loss": 0.6386, "step": 14519 }, { "epoch": 0.423929228343698, "grad_norm": 0.5529175015350882, "learning_rate": 3.200486618004867e-05, "loss": 0.6341, "step": 14520 }, { "epoch": 0.42395842457154537, "grad_norm": 0.538728886537945, "learning_rate": 3.200324412003244e-05, "loss": 0.6514, "step": 14521 }, { "epoch": 0.42398762079939273, "grad_norm": 0.5266637289052245, "learning_rate": 3.2001622060016224e-05, "loss": 0.6171, "step": 14522 }, { "epoch": 0.4240168170272401, "grad_norm": 0.500260587744664, "learning_rate": 3.2000000000000005e-05, "loss": 0.6295, "step": 14523 }, { "epoch": 0.42404601325508745, "grad_norm": 0.5257932613212717, "learning_rate": 3.199837793998378e-05, "loss": 0.6201, "step": 14524 }, { "epoch": 0.4240752094829348, "grad_norm": 0.5593412587850517, "learning_rate": 3.199675587996756e-05, "loss": 0.6407, "step": 14525 }, { "epoch": 0.4241044057107822, "grad_norm": 0.5287236984872483, "learning_rate": 3.199513381995134e-05, "loss": 0.6117, "step": 14526 }, { "epoch": 0.42413360193862953, "grad_norm": 0.5817987740579302, "learning_rate": 3.199351175993512e-05, "loss": 0.6976, "step": 14527 }, { "epoch": 0.4241627981664769, "grad_norm": 0.5292292151037212, "learning_rate": 3.19918896999189e-05, "loss": 0.6408, "step": 14528 }, { "epoch": 0.42419199439432426, "grad_norm": 0.482292901124624, "learning_rate": 3.1990267639902676e-05, "loss": 0.5442, "step": 14529 }, { "epoch": 0.4242211906221716, "grad_norm": 0.48986761504425036, "learning_rate": 3.198864557988646e-05, "loss": 0.5507, "step": 14530 }, { "epoch": 0.424250386850019, "grad_norm": 0.5667434661845637, "learning_rate": 3.198702351987023e-05, "loss": 0.6777, "step": 14531 }, { "epoch": 0.42427958307786634, "grad_norm": 0.5551739720128365, "learning_rate": 3.1985401459854014e-05, "loss": 0.6695, "step": 14532 }, { "epoch": 0.4243087793057137, "grad_norm": 0.5233065748239124, "learning_rate": 3.1983779399837796e-05, "loss": 0.5907, "step": 14533 }, { "epoch": 0.42433797553356106, "grad_norm": 0.4906073322095818, "learning_rate": 3.198215733982157e-05, "loss": 0.5523, "step": 14534 }, { "epoch": 0.4243671717614084, "grad_norm": 0.5199399094701457, "learning_rate": 3.198053527980536e-05, "loss": 0.6452, "step": 14535 }, { "epoch": 0.4243963679892558, "grad_norm": 0.5219745348682535, "learning_rate": 3.1978913219789134e-05, "loss": 0.6475, "step": 14536 }, { "epoch": 0.42442556421710315, "grad_norm": 0.5295144133121482, "learning_rate": 3.1977291159772916e-05, "loss": 0.6387, "step": 14537 }, { "epoch": 0.4244547604449505, "grad_norm": 0.49511470386104034, "learning_rate": 3.197566909975669e-05, "loss": 0.5723, "step": 14538 }, { "epoch": 0.42448395667279787, "grad_norm": 0.5510160998496455, "learning_rate": 3.197404703974047e-05, "loss": 0.6384, "step": 14539 }, { "epoch": 0.42451315290064523, "grad_norm": 0.5776695265286003, "learning_rate": 3.1972424979724255e-05, "loss": 0.6915, "step": 14540 }, { "epoch": 0.4245423491284926, "grad_norm": 0.5143617452552304, "learning_rate": 3.197080291970803e-05, "loss": 0.5696, "step": 14541 }, { "epoch": 0.42457154535633995, "grad_norm": 0.5448941409103519, "learning_rate": 3.196918085969181e-05, "loss": 0.6743, "step": 14542 }, { "epoch": 0.4246007415841873, "grad_norm": 0.5253657772437609, "learning_rate": 3.196755879967559e-05, "loss": 0.6161, "step": 14543 }, { "epoch": 0.4246299378120347, "grad_norm": 0.5372853764849695, "learning_rate": 3.196593673965937e-05, "loss": 0.5828, "step": 14544 }, { "epoch": 0.42465913403988204, "grad_norm": 0.5818272926123513, "learning_rate": 3.196431467964315e-05, "loss": 0.7008, "step": 14545 }, { "epoch": 0.4246883302677294, "grad_norm": 0.5659144058685199, "learning_rate": 3.1962692619626925e-05, "loss": 0.6908, "step": 14546 }, { "epoch": 0.42471752649557676, "grad_norm": 0.5242140434748309, "learning_rate": 3.196107055961071e-05, "loss": 0.6034, "step": 14547 }, { "epoch": 0.4247467227234241, "grad_norm": 0.5204144731542267, "learning_rate": 3.195944849959449e-05, "loss": 0.6523, "step": 14548 }, { "epoch": 0.4247759189512715, "grad_norm": 0.5265003001917268, "learning_rate": 3.195782643957826e-05, "loss": 0.577, "step": 14549 }, { "epoch": 0.42480511517911884, "grad_norm": 0.5365110422349808, "learning_rate": 3.1956204379562045e-05, "loss": 0.6166, "step": 14550 }, { "epoch": 0.4248343114069662, "grad_norm": 0.5155054620894115, "learning_rate": 3.195458231954582e-05, "loss": 0.6122, "step": 14551 }, { "epoch": 0.42486350763481356, "grad_norm": 0.5344102252844795, "learning_rate": 3.19529602595296e-05, "loss": 0.6708, "step": 14552 }, { "epoch": 0.4248927038626609, "grad_norm": 0.5565974617179162, "learning_rate": 3.1951338199513384e-05, "loss": 0.6991, "step": 14553 }, { "epoch": 0.4249219000905083, "grad_norm": 0.559397911150781, "learning_rate": 3.1949716139497165e-05, "loss": 0.6912, "step": 14554 }, { "epoch": 0.42495109631835565, "grad_norm": 0.5231751709828286, "learning_rate": 3.194809407948095e-05, "loss": 0.5641, "step": 14555 }, { "epoch": 0.424980292546203, "grad_norm": 0.531144526239711, "learning_rate": 3.194647201946472e-05, "loss": 0.6041, "step": 14556 }, { "epoch": 0.42500948877405037, "grad_norm": 0.6160087388028187, "learning_rate": 3.1944849959448504e-05, "loss": 0.7072, "step": 14557 }, { "epoch": 0.42503868500189773, "grad_norm": 0.5405982730961217, "learning_rate": 3.194322789943228e-05, "loss": 0.6156, "step": 14558 }, { "epoch": 0.4250678812297451, "grad_norm": 0.5126922503210862, "learning_rate": 3.194160583941606e-05, "loss": 0.5984, "step": 14559 }, { "epoch": 0.42509707745759245, "grad_norm": 0.4852015059626925, "learning_rate": 3.193998377939984e-05, "loss": 0.5341, "step": 14560 }, { "epoch": 0.4251262736854398, "grad_norm": 0.4974710362771131, "learning_rate": 3.193836171938362e-05, "loss": 0.5683, "step": 14561 }, { "epoch": 0.4251554699132872, "grad_norm": 0.5735886687472936, "learning_rate": 3.19367396593674e-05, "loss": 0.665, "step": 14562 }, { "epoch": 0.4251846661411346, "grad_norm": 0.5178408330293194, "learning_rate": 3.1935117599351174e-05, "loss": 0.5707, "step": 14563 }, { "epoch": 0.42521386236898195, "grad_norm": 0.5050147993958815, "learning_rate": 3.1933495539334956e-05, "loss": 0.5889, "step": 14564 }, { "epoch": 0.4252430585968293, "grad_norm": 0.538310625724927, "learning_rate": 3.193187347931874e-05, "loss": 0.6617, "step": 14565 }, { "epoch": 0.4252722548246767, "grad_norm": 0.5319865240766155, "learning_rate": 3.193025141930251e-05, "loss": 0.6335, "step": 14566 }, { "epoch": 0.42530145105252404, "grad_norm": 0.5589804216847025, "learning_rate": 3.1928629359286294e-05, "loss": 0.6529, "step": 14567 }, { "epoch": 0.4253306472803714, "grad_norm": 0.5455744386019994, "learning_rate": 3.1927007299270076e-05, "loss": 0.6968, "step": 14568 }, { "epoch": 0.42535984350821876, "grad_norm": 0.5314640741601622, "learning_rate": 3.192538523925385e-05, "loss": 0.6315, "step": 14569 }, { "epoch": 0.4253890397360661, "grad_norm": 0.5311760234468861, "learning_rate": 3.192376317923763e-05, "loss": 0.5861, "step": 14570 }, { "epoch": 0.4254182359639135, "grad_norm": 0.5558685281464786, "learning_rate": 3.192214111922141e-05, "loss": 0.6763, "step": 14571 }, { "epoch": 0.42544743219176084, "grad_norm": 0.49728830141971286, "learning_rate": 3.192051905920519e-05, "loss": 0.5696, "step": 14572 }, { "epoch": 0.4254766284196082, "grad_norm": 0.5391758592458692, "learning_rate": 3.191889699918897e-05, "loss": 0.6268, "step": 14573 }, { "epoch": 0.42550582464745557, "grad_norm": 0.49615200895247724, "learning_rate": 3.191727493917275e-05, "loss": 0.5978, "step": 14574 }, { "epoch": 0.4255350208753029, "grad_norm": 0.5215702939477871, "learning_rate": 3.1915652879156535e-05, "loss": 0.5948, "step": 14575 }, { "epoch": 0.4255642171031503, "grad_norm": 0.5160036789300697, "learning_rate": 3.191403081914031e-05, "loss": 0.6003, "step": 14576 }, { "epoch": 0.42559341333099765, "grad_norm": 0.5549140943703444, "learning_rate": 3.191240875912409e-05, "loss": 0.7096, "step": 14577 }, { "epoch": 0.425622609558845, "grad_norm": 0.5709668106856568, "learning_rate": 3.191078669910787e-05, "loss": 0.6773, "step": 14578 }, { "epoch": 0.42565180578669237, "grad_norm": 0.5550825285097205, "learning_rate": 3.190916463909165e-05, "loss": 0.6636, "step": 14579 }, { "epoch": 0.42568100201453973, "grad_norm": 0.5132983029132, "learning_rate": 3.190754257907543e-05, "loss": 0.5944, "step": 14580 }, { "epoch": 0.4257101982423871, "grad_norm": 0.5148846281811611, "learning_rate": 3.1905920519059205e-05, "loss": 0.5574, "step": 14581 }, { "epoch": 0.42573939447023446, "grad_norm": 0.5099498001388421, "learning_rate": 3.190429845904299e-05, "loss": 0.5462, "step": 14582 }, { "epoch": 0.4257685906980818, "grad_norm": 0.5554438933905473, "learning_rate": 3.190267639902676e-05, "loss": 0.6729, "step": 14583 }, { "epoch": 0.4257977869259292, "grad_norm": 0.5363587370387772, "learning_rate": 3.1901054339010544e-05, "loss": 0.5962, "step": 14584 }, { "epoch": 0.42582698315377654, "grad_norm": 0.5457711817112966, "learning_rate": 3.1899432278994325e-05, "loss": 0.6702, "step": 14585 }, { "epoch": 0.4258561793816239, "grad_norm": 0.5903111226853435, "learning_rate": 3.18978102189781e-05, "loss": 0.7399, "step": 14586 }, { "epoch": 0.42588537560947126, "grad_norm": 0.5226983495993518, "learning_rate": 3.189618815896188e-05, "loss": 0.6163, "step": 14587 }, { "epoch": 0.4259145718373186, "grad_norm": 0.5224984811506972, "learning_rate": 3.1894566098945664e-05, "loss": 0.6276, "step": 14588 }, { "epoch": 0.425943768065166, "grad_norm": 0.5250397234505076, "learning_rate": 3.189294403892944e-05, "loss": 0.6118, "step": 14589 }, { "epoch": 0.42597296429301335, "grad_norm": 0.5180360728383181, "learning_rate": 3.189132197891322e-05, "loss": 0.5983, "step": 14590 }, { "epoch": 0.4260021605208607, "grad_norm": 0.5532879255111668, "learning_rate": 3.1889699918896996e-05, "loss": 0.6811, "step": 14591 }, { "epoch": 0.42603135674870807, "grad_norm": 0.5779696346142525, "learning_rate": 3.1888077858880784e-05, "loss": 0.6777, "step": 14592 }, { "epoch": 0.42606055297655543, "grad_norm": 0.5181411434385085, "learning_rate": 3.188645579886456e-05, "loss": 0.6179, "step": 14593 }, { "epoch": 0.4260897492044028, "grad_norm": 0.5543169899186408, "learning_rate": 3.188483373884834e-05, "loss": 0.6335, "step": 14594 }, { "epoch": 0.42611894543225015, "grad_norm": 0.5257726918550626, "learning_rate": 3.188321167883212e-05, "loss": 0.644, "step": 14595 }, { "epoch": 0.4261481416600975, "grad_norm": 0.5306290828872886, "learning_rate": 3.18815896188159e-05, "loss": 0.6124, "step": 14596 }, { "epoch": 0.4261773378879449, "grad_norm": 0.5609604932544516, "learning_rate": 3.187996755879968e-05, "loss": 0.6455, "step": 14597 }, { "epoch": 0.42620653411579223, "grad_norm": 0.546657669982198, "learning_rate": 3.1878345498783455e-05, "loss": 0.6718, "step": 14598 }, { "epoch": 0.4262357303436396, "grad_norm": 0.5450992334257243, "learning_rate": 3.1876723438767236e-05, "loss": 0.6739, "step": 14599 }, { "epoch": 0.42626492657148696, "grad_norm": 0.5081711465138781, "learning_rate": 3.187510137875102e-05, "loss": 0.6053, "step": 14600 }, { "epoch": 0.4262941227993343, "grad_norm": 0.4940180146837205, "learning_rate": 3.187347931873479e-05, "loss": 0.5405, "step": 14601 }, { "epoch": 0.4263233190271817, "grad_norm": 0.505317339259162, "learning_rate": 3.1871857258718575e-05, "loss": 0.5758, "step": 14602 }, { "epoch": 0.42635251525502904, "grad_norm": 0.5695513271501693, "learning_rate": 3.187023519870235e-05, "loss": 0.6328, "step": 14603 }, { "epoch": 0.4263817114828764, "grad_norm": 0.5429885511006416, "learning_rate": 3.186861313868613e-05, "loss": 0.6238, "step": 14604 }, { "epoch": 0.42641090771072376, "grad_norm": 0.4912492469528983, "learning_rate": 3.186699107866991e-05, "loss": 0.5527, "step": 14605 }, { "epoch": 0.4264401039385711, "grad_norm": 0.5472912883764365, "learning_rate": 3.186536901865369e-05, "loss": 0.6741, "step": 14606 }, { "epoch": 0.4264693001664185, "grad_norm": 0.5838027072074582, "learning_rate": 3.186374695863747e-05, "loss": 0.5918, "step": 14607 }, { "epoch": 0.42649849639426585, "grad_norm": 0.6002158238422889, "learning_rate": 3.1862124898621245e-05, "loss": 0.6776, "step": 14608 }, { "epoch": 0.4265276926221132, "grad_norm": 0.6864458400729422, "learning_rate": 3.186050283860503e-05, "loss": 0.6125, "step": 14609 }, { "epoch": 0.42655688884996057, "grad_norm": 0.5504155687353537, "learning_rate": 3.185888077858881e-05, "loss": 0.6905, "step": 14610 }, { "epoch": 0.42658608507780793, "grad_norm": 0.5391066869337063, "learning_rate": 3.185725871857259e-05, "loss": 0.6462, "step": 14611 }, { "epoch": 0.4266152813056553, "grad_norm": 0.4908569570615853, "learning_rate": 3.185563665855637e-05, "loss": 0.5513, "step": 14612 }, { "epoch": 0.42664447753350265, "grad_norm": 0.4861329830510495, "learning_rate": 3.185401459854015e-05, "loss": 0.5381, "step": 14613 }, { "epoch": 0.42667367376135, "grad_norm": 0.5325755267323358, "learning_rate": 3.185239253852393e-05, "loss": 0.6407, "step": 14614 }, { "epoch": 0.4267028699891974, "grad_norm": 0.5958636682323017, "learning_rate": 3.185077047850771e-05, "loss": 0.745, "step": 14615 }, { "epoch": 0.42673206621704474, "grad_norm": 0.5351282254662433, "learning_rate": 3.1849148418491486e-05, "loss": 0.6718, "step": 14616 }, { "epoch": 0.4267612624448921, "grad_norm": 0.5437676039614814, "learning_rate": 3.184752635847527e-05, "loss": 0.5979, "step": 14617 }, { "epoch": 0.42679045867273946, "grad_norm": 0.49754254707240886, "learning_rate": 3.184590429845904e-05, "loss": 0.5293, "step": 14618 }, { "epoch": 0.4268196549005868, "grad_norm": 0.5113471338602846, "learning_rate": 3.1844282238442824e-05, "loss": 0.5946, "step": 14619 }, { "epoch": 0.4268488511284342, "grad_norm": 0.5601289203205024, "learning_rate": 3.1842660178426606e-05, "loss": 0.6463, "step": 14620 }, { "epoch": 0.42687804735628154, "grad_norm": 0.5294967120370501, "learning_rate": 3.184103811841038e-05, "loss": 0.6445, "step": 14621 }, { "epoch": 0.4269072435841289, "grad_norm": 0.5336055469575007, "learning_rate": 3.183941605839416e-05, "loss": 0.6221, "step": 14622 }, { "epoch": 0.42693643981197626, "grad_norm": 0.5339358844095012, "learning_rate": 3.183779399837794e-05, "loss": 0.5941, "step": 14623 }, { "epoch": 0.4269656360398237, "grad_norm": 0.518755481553495, "learning_rate": 3.183617193836172e-05, "loss": 0.6302, "step": 14624 }, { "epoch": 0.42699483226767104, "grad_norm": 0.5121478176739976, "learning_rate": 3.18345498783455e-05, "loss": 0.5604, "step": 14625 }, { "epoch": 0.4270240284955184, "grad_norm": 0.5283105062252392, "learning_rate": 3.1832927818329276e-05, "loss": 0.6033, "step": 14626 }, { "epoch": 0.42705322472336577, "grad_norm": 0.5850485501820871, "learning_rate": 3.183130575831306e-05, "loss": 0.6526, "step": 14627 }, { "epoch": 0.4270824209512131, "grad_norm": 0.5670915188515403, "learning_rate": 3.182968369829683e-05, "loss": 0.6816, "step": 14628 }, { "epoch": 0.4271116171790605, "grad_norm": 0.5542729949029945, "learning_rate": 3.1828061638280615e-05, "loss": 0.6144, "step": 14629 }, { "epoch": 0.42714081340690785, "grad_norm": 0.541769675499304, "learning_rate": 3.18264395782644e-05, "loss": 0.6163, "step": 14630 }, { "epoch": 0.4271700096347552, "grad_norm": 0.5286655643310195, "learning_rate": 3.182481751824818e-05, "loss": 0.5845, "step": 14631 }, { "epoch": 0.42719920586260257, "grad_norm": 0.5701713399357456, "learning_rate": 3.182319545823196e-05, "loss": 0.6791, "step": 14632 }, { "epoch": 0.42722840209044993, "grad_norm": 0.5243116397210569, "learning_rate": 3.1821573398215735e-05, "loss": 0.6041, "step": 14633 }, { "epoch": 0.4272575983182973, "grad_norm": 0.5138093110995487, "learning_rate": 3.181995133819952e-05, "loss": 0.6221, "step": 14634 }, { "epoch": 0.42728679454614465, "grad_norm": 0.5008819059383354, "learning_rate": 3.18183292781833e-05, "loss": 0.555, "step": 14635 }, { "epoch": 0.427315990773992, "grad_norm": 0.6217975781915854, "learning_rate": 3.1816707218167073e-05, "loss": 0.8381, "step": 14636 }, { "epoch": 0.4273451870018394, "grad_norm": 0.573792319489999, "learning_rate": 3.1815085158150855e-05, "loss": 0.6662, "step": 14637 }, { "epoch": 0.42737438322968674, "grad_norm": 0.5383107715634134, "learning_rate": 3.181346309813463e-05, "loss": 0.6568, "step": 14638 }, { "epoch": 0.4274035794575341, "grad_norm": 0.49866344596595286, "learning_rate": 3.181184103811841e-05, "loss": 0.5894, "step": 14639 }, { "epoch": 0.42743277568538146, "grad_norm": 0.5802238997854421, "learning_rate": 3.1810218978102194e-05, "loss": 0.6611, "step": 14640 }, { "epoch": 0.4274619719132288, "grad_norm": 0.5184944872728426, "learning_rate": 3.180859691808597e-05, "loss": 0.6173, "step": 14641 }, { "epoch": 0.4274911681410762, "grad_norm": 0.5458629682359503, "learning_rate": 3.180697485806975e-05, "loss": 0.6711, "step": 14642 }, { "epoch": 0.42752036436892354, "grad_norm": 0.5464106022396457, "learning_rate": 3.1805352798053525e-05, "loss": 0.6454, "step": 14643 }, { "epoch": 0.4275495605967709, "grad_norm": 0.521954837881602, "learning_rate": 3.180373073803731e-05, "loss": 0.6289, "step": 14644 }, { "epoch": 0.42757875682461827, "grad_norm": 0.5226302675776138, "learning_rate": 3.180210867802109e-05, "loss": 0.5848, "step": 14645 }, { "epoch": 0.42760795305246563, "grad_norm": 0.5583418870441708, "learning_rate": 3.1800486618004864e-05, "loss": 0.6344, "step": 14646 }, { "epoch": 0.427637149280313, "grad_norm": 0.5153993245163696, "learning_rate": 3.1798864557988646e-05, "loss": 0.6215, "step": 14647 }, { "epoch": 0.42766634550816035, "grad_norm": 0.5494602232621405, "learning_rate": 3.179724249797243e-05, "loss": 0.6422, "step": 14648 }, { "epoch": 0.4276955417360077, "grad_norm": 0.5029967446014699, "learning_rate": 3.179562043795621e-05, "loss": 0.5867, "step": 14649 }, { "epoch": 0.4277247379638551, "grad_norm": 0.5228175870887369, "learning_rate": 3.1793998377939984e-05, "loss": 0.6075, "step": 14650 }, { "epoch": 0.42775393419170243, "grad_norm": 0.5189899341317884, "learning_rate": 3.1792376317923766e-05, "loss": 0.5897, "step": 14651 }, { "epoch": 0.4277831304195498, "grad_norm": 0.5347781703614268, "learning_rate": 3.179075425790755e-05, "loss": 0.6226, "step": 14652 }, { "epoch": 0.42781232664739716, "grad_norm": 0.4743557845504099, "learning_rate": 3.178913219789132e-05, "loss": 0.5348, "step": 14653 }, { "epoch": 0.4278415228752445, "grad_norm": 0.5221830965885033, "learning_rate": 3.1787510137875104e-05, "loss": 0.6139, "step": 14654 }, { "epoch": 0.4278707191030919, "grad_norm": 0.502222481427178, "learning_rate": 3.1785888077858886e-05, "loss": 0.5642, "step": 14655 }, { "epoch": 0.42789991533093924, "grad_norm": 0.5172485957463268, "learning_rate": 3.178426601784266e-05, "loss": 0.6263, "step": 14656 }, { "epoch": 0.4279291115587866, "grad_norm": 0.6436611567947537, "learning_rate": 3.178264395782644e-05, "loss": 0.7697, "step": 14657 }, { "epoch": 0.42795830778663396, "grad_norm": 0.5217234507863994, "learning_rate": 3.178102189781022e-05, "loss": 0.6083, "step": 14658 }, { "epoch": 0.4279875040144813, "grad_norm": 0.5289523318475517, "learning_rate": 3.1779399837794e-05, "loss": 0.6431, "step": 14659 }, { "epoch": 0.4280167002423287, "grad_norm": 0.527034625854671, "learning_rate": 3.177777777777778e-05, "loss": 0.5757, "step": 14660 }, { "epoch": 0.42804589647017605, "grad_norm": 0.500658966639678, "learning_rate": 3.1776155717761556e-05, "loss": 0.569, "step": 14661 }, { "epoch": 0.4280750926980234, "grad_norm": 0.5336103666920621, "learning_rate": 3.177453365774534e-05, "loss": 0.6273, "step": 14662 }, { "epoch": 0.42810428892587077, "grad_norm": 0.5023653957281529, "learning_rate": 3.177291159772911e-05, "loss": 0.5739, "step": 14663 }, { "epoch": 0.42813348515371813, "grad_norm": 0.5805387250487162, "learning_rate": 3.1771289537712895e-05, "loss": 0.6627, "step": 14664 }, { "epoch": 0.4281626813815655, "grad_norm": 0.5156618710678141, "learning_rate": 3.176966747769668e-05, "loss": 0.6362, "step": 14665 }, { "epoch": 0.42819187760941285, "grad_norm": 0.5308841779337663, "learning_rate": 3.176804541768045e-05, "loss": 0.6349, "step": 14666 }, { "epoch": 0.4282210738372602, "grad_norm": 0.5407796469750292, "learning_rate": 3.176642335766424e-05, "loss": 0.7012, "step": 14667 }, { "epoch": 0.4282502700651076, "grad_norm": 0.5359779048321812, "learning_rate": 3.1764801297648015e-05, "loss": 0.6606, "step": 14668 }, { "epoch": 0.42827946629295494, "grad_norm": 0.5394162089238187, "learning_rate": 3.17631792376318e-05, "loss": 0.6526, "step": 14669 }, { "epoch": 0.4283086625208023, "grad_norm": 0.5767166165108322, "learning_rate": 3.176155717761557e-05, "loss": 0.6896, "step": 14670 }, { "epoch": 0.42833785874864966, "grad_norm": 0.49811372218726935, "learning_rate": 3.1759935117599354e-05, "loss": 0.5459, "step": 14671 }, { "epoch": 0.428367054976497, "grad_norm": 0.5372982068476106, "learning_rate": 3.1758313057583136e-05, "loss": 0.6575, "step": 14672 }, { "epoch": 0.4283962512043444, "grad_norm": 0.5441141322150471, "learning_rate": 3.175669099756691e-05, "loss": 0.6371, "step": 14673 }, { "epoch": 0.42842544743219174, "grad_norm": 0.5188958347204474, "learning_rate": 3.175506893755069e-05, "loss": 0.6243, "step": 14674 }, { "epoch": 0.4284546436600391, "grad_norm": 0.49890922861279324, "learning_rate": 3.1753446877534474e-05, "loss": 0.5493, "step": 14675 }, { "epoch": 0.42848383988788646, "grad_norm": 0.5253494265449504, "learning_rate": 3.175182481751825e-05, "loss": 0.6321, "step": 14676 }, { "epoch": 0.4285130361157338, "grad_norm": 0.5267831069577852, "learning_rate": 3.175020275750203e-05, "loss": 0.5929, "step": 14677 }, { "epoch": 0.4285422323435812, "grad_norm": 0.5662073176655074, "learning_rate": 3.1748580697485806e-05, "loss": 0.6988, "step": 14678 }, { "epoch": 0.42857142857142855, "grad_norm": 0.5208646465710537, "learning_rate": 3.174695863746959e-05, "loss": 0.6037, "step": 14679 }, { "epoch": 0.4286006247992759, "grad_norm": 0.5960893425688858, "learning_rate": 3.174533657745337e-05, "loss": 0.779, "step": 14680 }, { "epoch": 0.42862982102712327, "grad_norm": 0.528095410966377, "learning_rate": 3.1743714517437144e-05, "loss": 0.6384, "step": 14681 }, { "epoch": 0.42865901725497063, "grad_norm": 0.4889660003987704, "learning_rate": 3.1742092457420926e-05, "loss": 0.5652, "step": 14682 }, { "epoch": 0.428688213482818, "grad_norm": 0.5501686486136219, "learning_rate": 3.17404703974047e-05, "loss": 0.661, "step": 14683 }, { "epoch": 0.4287174097106654, "grad_norm": 0.4918301440680632, "learning_rate": 3.173884833738848e-05, "loss": 0.5426, "step": 14684 }, { "epoch": 0.42874660593851277, "grad_norm": 0.5601522228751984, "learning_rate": 3.1737226277372265e-05, "loss": 0.6875, "step": 14685 }, { "epoch": 0.42877580216636013, "grad_norm": 0.5119299017712333, "learning_rate": 3.1735604217356046e-05, "loss": 0.6055, "step": 14686 }, { "epoch": 0.4288049983942075, "grad_norm": 0.5028907482645539, "learning_rate": 3.173398215733983e-05, "loss": 0.5563, "step": 14687 }, { "epoch": 0.42883419462205485, "grad_norm": 0.5685856711443359, "learning_rate": 3.17323600973236e-05, "loss": 0.6924, "step": 14688 }, { "epoch": 0.4288633908499022, "grad_norm": 0.5420839967127795, "learning_rate": 3.1730738037307385e-05, "loss": 0.6533, "step": 14689 }, { "epoch": 0.4288925870777496, "grad_norm": 0.5526343053690986, "learning_rate": 3.172911597729116e-05, "loss": 0.6803, "step": 14690 }, { "epoch": 0.42892178330559694, "grad_norm": 0.5431950031650463, "learning_rate": 3.172749391727494e-05, "loss": 0.6728, "step": 14691 }, { "epoch": 0.4289509795334443, "grad_norm": 0.513937258292176, "learning_rate": 3.172587185725872e-05, "loss": 0.534, "step": 14692 }, { "epoch": 0.42898017576129166, "grad_norm": 0.5378339011435284, "learning_rate": 3.17242497972425e-05, "loss": 0.6214, "step": 14693 }, { "epoch": 0.429009371989139, "grad_norm": 0.5502765159136552, "learning_rate": 3.172262773722628e-05, "loss": 0.6776, "step": 14694 }, { "epoch": 0.4290385682169864, "grad_norm": 0.5443018177263644, "learning_rate": 3.1721005677210055e-05, "loss": 0.6309, "step": 14695 }, { "epoch": 0.42906776444483374, "grad_norm": 0.5187140491770836, "learning_rate": 3.171938361719384e-05, "loss": 0.629, "step": 14696 }, { "epoch": 0.4290969606726811, "grad_norm": 0.4961897451148294, "learning_rate": 3.171776155717762e-05, "loss": 0.5749, "step": 14697 }, { "epoch": 0.42912615690052847, "grad_norm": 0.5653997959332341, "learning_rate": 3.1716139497161394e-05, "loss": 0.7137, "step": 14698 }, { "epoch": 0.4291553531283758, "grad_norm": 0.5553343009027957, "learning_rate": 3.1714517437145175e-05, "loss": 0.6243, "step": 14699 }, { "epoch": 0.4291845493562232, "grad_norm": 0.5217300190959273, "learning_rate": 3.171289537712896e-05, "loss": 0.6093, "step": 14700 }, { "epoch": 0.42921374558407055, "grad_norm": 0.5470581608091207, "learning_rate": 3.171127331711273e-05, "loss": 0.6476, "step": 14701 }, { "epoch": 0.4292429418119179, "grad_norm": 0.5135724201238845, "learning_rate": 3.1709651257096514e-05, "loss": 0.5863, "step": 14702 }, { "epoch": 0.42927213803976527, "grad_norm": 0.46834770844967816, "learning_rate": 3.170802919708029e-05, "loss": 0.4871, "step": 14703 }, { "epoch": 0.42930133426761263, "grad_norm": 0.5168310934729206, "learning_rate": 3.170640713706407e-05, "loss": 0.5689, "step": 14704 }, { "epoch": 0.42933053049546, "grad_norm": 0.5897774132463981, "learning_rate": 3.170478507704785e-05, "loss": 0.6208, "step": 14705 }, { "epoch": 0.42935972672330736, "grad_norm": 0.4859739391268113, "learning_rate": 3.1703163017031634e-05, "loss": 0.5572, "step": 14706 }, { "epoch": 0.4293889229511547, "grad_norm": 0.4905938270640819, "learning_rate": 3.1701540957015416e-05, "loss": 0.5816, "step": 14707 }, { "epoch": 0.4294181191790021, "grad_norm": 0.5205578968343918, "learning_rate": 3.169991889699919e-05, "loss": 0.6355, "step": 14708 }, { "epoch": 0.42944731540684944, "grad_norm": 0.5298405403753614, "learning_rate": 3.169829683698297e-05, "loss": 0.641, "step": 14709 }, { "epoch": 0.4294765116346968, "grad_norm": 0.5794765488236436, "learning_rate": 3.169667477696675e-05, "loss": 0.6766, "step": 14710 }, { "epoch": 0.42950570786254416, "grad_norm": 0.5817449181459082, "learning_rate": 3.169505271695053e-05, "loss": 0.7151, "step": 14711 }, { "epoch": 0.4295349040903915, "grad_norm": 0.5769584856424711, "learning_rate": 3.169343065693431e-05, "loss": 0.69, "step": 14712 }, { "epoch": 0.4295641003182389, "grad_norm": 0.4878161403428703, "learning_rate": 3.1691808596918086e-05, "loss": 0.551, "step": 14713 }, { "epoch": 0.42959329654608625, "grad_norm": 0.48727008783205905, "learning_rate": 3.169018653690187e-05, "loss": 0.5788, "step": 14714 }, { "epoch": 0.4296224927739336, "grad_norm": 0.5089676396918512, "learning_rate": 3.168856447688564e-05, "loss": 0.582, "step": 14715 }, { "epoch": 0.42965168900178097, "grad_norm": 0.5744040919474421, "learning_rate": 3.1686942416869425e-05, "loss": 0.714, "step": 14716 }, { "epoch": 0.42968088522962833, "grad_norm": 0.5546921151688191, "learning_rate": 3.1685320356853206e-05, "loss": 0.6761, "step": 14717 }, { "epoch": 0.4297100814574757, "grad_norm": 0.6023141345693669, "learning_rate": 3.168369829683698e-05, "loss": 0.7413, "step": 14718 }, { "epoch": 0.42973927768532305, "grad_norm": 0.5742709199562123, "learning_rate": 3.168207623682076e-05, "loss": 0.6925, "step": 14719 }, { "epoch": 0.4297684739131704, "grad_norm": 0.5255966129159193, "learning_rate": 3.1680454176804545e-05, "loss": 0.6131, "step": 14720 }, { "epoch": 0.4297976701410178, "grad_norm": 0.5537972322837668, "learning_rate": 3.167883211678832e-05, "loss": 0.6643, "step": 14721 }, { "epoch": 0.42982686636886513, "grad_norm": 0.5389702099158498, "learning_rate": 3.16772100567721e-05, "loss": 0.6815, "step": 14722 }, { "epoch": 0.4298560625967125, "grad_norm": 0.48751778420756126, "learning_rate": 3.167558799675588e-05, "loss": 0.5238, "step": 14723 }, { "epoch": 0.42988525882455986, "grad_norm": 0.5862791981612966, "learning_rate": 3.1673965936739665e-05, "loss": 0.7184, "step": 14724 }, { "epoch": 0.4299144550524072, "grad_norm": 0.5288210850690805, "learning_rate": 3.167234387672344e-05, "loss": 0.6651, "step": 14725 }, { "epoch": 0.4299436512802546, "grad_norm": 0.5410432149356998, "learning_rate": 3.167072181670722e-05, "loss": 0.6613, "step": 14726 }, { "epoch": 0.42997284750810194, "grad_norm": 0.550930920802023, "learning_rate": 3.1669099756691004e-05, "loss": 0.6664, "step": 14727 }, { "epoch": 0.4300020437359493, "grad_norm": 0.5316349587811742, "learning_rate": 3.166747769667478e-05, "loss": 0.5924, "step": 14728 }, { "epoch": 0.43003123996379666, "grad_norm": 0.5005886163054488, "learning_rate": 3.166585563665856e-05, "loss": 0.5871, "step": 14729 }, { "epoch": 0.430060436191644, "grad_norm": 0.5845150754602519, "learning_rate": 3.1664233576642335e-05, "loss": 0.6767, "step": 14730 }, { "epoch": 0.4300896324194914, "grad_norm": 0.5006000481759589, "learning_rate": 3.166261151662612e-05, "loss": 0.5399, "step": 14731 }, { "epoch": 0.43011882864733875, "grad_norm": 0.5619816297966246, "learning_rate": 3.16609894566099e-05, "loss": 0.7065, "step": 14732 }, { "epoch": 0.4301480248751861, "grad_norm": 0.5096819659319397, "learning_rate": 3.1659367396593674e-05, "loss": 0.5864, "step": 14733 }, { "epoch": 0.43017722110303347, "grad_norm": 0.5070303761145104, "learning_rate": 3.1657745336577456e-05, "loss": 0.6582, "step": 14734 }, { "epoch": 0.43020641733088083, "grad_norm": 0.5273397603803213, "learning_rate": 3.165612327656123e-05, "loss": 0.6013, "step": 14735 }, { "epoch": 0.4302356135587282, "grad_norm": 0.5577363044548653, "learning_rate": 3.165450121654501e-05, "loss": 0.6639, "step": 14736 }, { "epoch": 0.43026480978657555, "grad_norm": 0.5549587327682437, "learning_rate": 3.1652879156528794e-05, "loss": 0.647, "step": 14737 }, { "epoch": 0.4302940060144229, "grad_norm": 0.5611675583610041, "learning_rate": 3.165125709651257e-05, "loss": 0.635, "step": 14738 }, { "epoch": 0.4303232022422703, "grad_norm": 0.535849178999849, "learning_rate": 3.164963503649635e-05, "loss": 0.629, "step": 14739 }, { "epoch": 0.43035239847011764, "grad_norm": 0.4840029440540419, "learning_rate": 3.1648012976480126e-05, "loss": 0.5663, "step": 14740 }, { "epoch": 0.430381594697965, "grad_norm": 0.5296597872655644, "learning_rate": 3.164639091646391e-05, "loss": 0.6154, "step": 14741 }, { "epoch": 0.43041079092581236, "grad_norm": 0.5497944308045003, "learning_rate": 3.164476885644769e-05, "loss": 0.6502, "step": 14742 }, { "epoch": 0.4304399871536597, "grad_norm": 0.5426917565230859, "learning_rate": 3.164314679643147e-05, "loss": 0.62, "step": 14743 }, { "epoch": 0.43046918338150714, "grad_norm": 0.5194803833966143, "learning_rate": 3.164152473641525e-05, "loss": 0.6112, "step": 14744 }, { "epoch": 0.4304983796093545, "grad_norm": 0.5239024628363799, "learning_rate": 3.163990267639903e-05, "loss": 0.6054, "step": 14745 }, { "epoch": 0.43052757583720186, "grad_norm": 0.5052576988896788, "learning_rate": 3.163828061638281e-05, "loss": 0.5809, "step": 14746 }, { "epoch": 0.4305567720650492, "grad_norm": 0.4997799418681703, "learning_rate": 3.163665855636659e-05, "loss": 0.6104, "step": 14747 }, { "epoch": 0.4305859682928966, "grad_norm": 0.5065532262435261, "learning_rate": 3.1635036496350366e-05, "loss": 0.5805, "step": 14748 }, { "epoch": 0.43061516452074394, "grad_norm": 0.5141158450604754, "learning_rate": 3.163341443633415e-05, "loss": 0.5829, "step": 14749 }, { "epoch": 0.4306443607485913, "grad_norm": 0.5147594476561236, "learning_rate": 3.163179237631792e-05, "loss": 0.6024, "step": 14750 }, { "epoch": 0.43067355697643867, "grad_norm": 0.5230956874165021, "learning_rate": 3.1630170316301705e-05, "loss": 0.5953, "step": 14751 }, { "epoch": 0.430702753204286, "grad_norm": 0.5204704998039559, "learning_rate": 3.162854825628549e-05, "loss": 0.6206, "step": 14752 }, { "epoch": 0.4307319494321334, "grad_norm": 0.5250742486878879, "learning_rate": 3.162692619626926e-05, "loss": 0.6477, "step": 14753 }, { "epoch": 0.43076114565998075, "grad_norm": 0.4716130101032386, "learning_rate": 3.1625304136253043e-05, "loss": 0.512, "step": 14754 }, { "epoch": 0.4307903418878281, "grad_norm": 0.5922041652589533, "learning_rate": 3.162368207623682e-05, "loss": 0.7203, "step": 14755 }, { "epoch": 0.43081953811567547, "grad_norm": 0.5383704089592813, "learning_rate": 3.16220600162206e-05, "loss": 0.6223, "step": 14756 }, { "epoch": 0.43084873434352283, "grad_norm": 0.5042678346839397, "learning_rate": 3.162043795620438e-05, "loss": 0.536, "step": 14757 }, { "epoch": 0.4308779305713702, "grad_norm": 0.5188501540204049, "learning_rate": 3.161881589618816e-05, "loss": 0.627, "step": 14758 }, { "epoch": 0.43090712679921755, "grad_norm": 0.5287690380469878, "learning_rate": 3.161719383617194e-05, "loss": 0.6044, "step": 14759 }, { "epoch": 0.4309363230270649, "grad_norm": 0.5198366769407332, "learning_rate": 3.1615571776155714e-05, "loss": 0.572, "step": 14760 }, { "epoch": 0.4309655192549123, "grad_norm": 0.5476517786836003, "learning_rate": 3.1613949716139495e-05, "loss": 0.6401, "step": 14761 }, { "epoch": 0.43099471548275964, "grad_norm": 0.5513766956211862, "learning_rate": 3.161232765612328e-05, "loss": 0.658, "step": 14762 }, { "epoch": 0.431023911710607, "grad_norm": 0.5664381651280304, "learning_rate": 3.161070559610706e-05, "loss": 0.7409, "step": 14763 }, { "epoch": 0.43105310793845436, "grad_norm": 0.5628133094896774, "learning_rate": 3.160908353609084e-05, "loss": 0.6466, "step": 14764 }, { "epoch": 0.4310823041663017, "grad_norm": 0.5292200941513446, "learning_rate": 3.1607461476074616e-05, "loss": 0.6058, "step": 14765 }, { "epoch": 0.4311115003941491, "grad_norm": 0.5243568964922228, "learning_rate": 3.16058394160584e-05, "loss": 0.6277, "step": 14766 }, { "epoch": 0.43114069662199644, "grad_norm": 0.4803399158604364, "learning_rate": 3.160421735604218e-05, "loss": 0.561, "step": 14767 }, { "epoch": 0.4311698928498438, "grad_norm": 0.533216287244944, "learning_rate": 3.1602595296025954e-05, "loss": 0.6337, "step": 14768 }, { "epoch": 0.43119908907769117, "grad_norm": 0.5662537944069609, "learning_rate": 3.1600973236009736e-05, "loss": 0.706, "step": 14769 }, { "epoch": 0.43122828530553853, "grad_norm": 0.4811493073196198, "learning_rate": 3.159935117599351e-05, "loss": 0.5419, "step": 14770 }, { "epoch": 0.4312574815333859, "grad_norm": 0.5354129855277644, "learning_rate": 3.159772911597729e-05, "loss": 0.65, "step": 14771 }, { "epoch": 0.43128667776123325, "grad_norm": 0.5438120518090531, "learning_rate": 3.1596107055961075e-05, "loss": 0.6951, "step": 14772 }, { "epoch": 0.4313158739890806, "grad_norm": 0.4978401171064719, "learning_rate": 3.159448499594485e-05, "loss": 0.5682, "step": 14773 }, { "epoch": 0.431345070216928, "grad_norm": 0.5946313894030425, "learning_rate": 3.159286293592863e-05, "loss": 0.7485, "step": 14774 }, { "epoch": 0.43137426644477533, "grad_norm": 0.5204961265407018, "learning_rate": 3.1591240875912406e-05, "loss": 0.4786, "step": 14775 }, { "epoch": 0.4314034626726227, "grad_norm": 0.5071284968243753, "learning_rate": 3.158961881589619e-05, "loss": 0.653, "step": 14776 }, { "epoch": 0.43143265890047006, "grad_norm": 0.5298848052639158, "learning_rate": 3.158799675587997e-05, "loss": 0.6281, "step": 14777 }, { "epoch": 0.4314618551283174, "grad_norm": 0.5217737752142768, "learning_rate": 3.1586374695863745e-05, "loss": 0.6002, "step": 14778 }, { "epoch": 0.4314910513561648, "grad_norm": 0.5607275248068231, "learning_rate": 3.1584752635847527e-05, "loss": 0.66, "step": 14779 }, { "epoch": 0.43152024758401214, "grad_norm": 0.5029562146373765, "learning_rate": 3.15831305758313e-05, "loss": 0.621, "step": 14780 }, { "epoch": 0.4315494438118595, "grad_norm": 0.5493571700832116, "learning_rate": 3.158150851581509e-05, "loss": 0.7188, "step": 14781 }, { "epoch": 0.43157864003970686, "grad_norm": 0.5511046062817319, "learning_rate": 3.1579886455798865e-05, "loss": 0.6836, "step": 14782 }, { "epoch": 0.4316078362675542, "grad_norm": 0.5562198836594026, "learning_rate": 3.157826439578265e-05, "loss": 0.6983, "step": 14783 }, { "epoch": 0.4316370324954016, "grad_norm": 1.122487758934778, "learning_rate": 3.157664233576643e-05, "loss": 0.6562, "step": 14784 }, { "epoch": 0.43166622872324895, "grad_norm": 0.48202331335747, "learning_rate": 3.1575020275750204e-05, "loss": 0.5167, "step": 14785 }, { "epoch": 0.4316954249510963, "grad_norm": 0.5350629898581308, "learning_rate": 3.1573398215733985e-05, "loss": 0.6147, "step": 14786 }, { "epoch": 0.43172462117894367, "grad_norm": 0.508569128700237, "learning_rate": 3.157177615571777e-05, "loss": 0.5622, "step": 14787 }, { "epoch": 0.43175381740679103, "grad_norm": 0.5136882951737507, "learning_rate": 3.157015409570154e-05, "loss": 0.6217, "step": 14788 }, { "epoch": 0.4317830136346384, "grad_norm": 0.5927994735514212, "learning_rate": 3.1568532035685324e-05, "loss": 0.7094, "step": 14789 }, { "epoch": 0.43181220986248575, "grad_norm": 0.5232757102381642, "learning_rate": 3.15669099756691e-05, "loss": 0.606, "step": 14790 }, { "epoch": 0.4318414060903331, "grad_norm": 0.5723449675244193, "learning_rate": 3.156528791565288e-05, "loss": 0.7522, "step": 14791 }, { "epoch": 0.4318706023181805, "grad_norm": 0.5141595824682852, "learning_rate": 3.156366585563666e-05, "loss": 0.6365, "step": 14792 }, { "epoch": 0.43189979854602784, "grad_norm": 0.5061140115688237, "learning_rate": 3.156204379562044e-05, "loss": 0.5611, "step": 14793 }, { "epoch": 0.4319289947738752, "grad_norm": 0.5263131848196709, "learning_rate": 3.156042173560422e-05, "loss": 0.6434, "step": 14794 }, { "epoch": 0.43195819100172256, "grad_norm": 0.549714921250071, "learning_rate": 3.1558799675587994e-05, "loss": 0.6688, "step": 14795 }, { "epoch": 0.4319873872295699, "grad_norm": 0.5441139809857742, "learning_rate": 3.1557177615571776e-05, "loss": 0.7249, "step": 14796 }, { "epoch": 0.4320165834574173, "grad_norm": 0.547527812252175, "learning_rate": 3.155555555555556e-05, "loss": 0.6456, "step": 14797 }, { "epoch": 0.43204577968526464, "grad_norm": 0.5171779171602902, "learning_rate": 3.155393349553933e-05, "loss": 0.6167, "step": 14798 }, { "epoch": 0.432074975913112, "grad_norm": 0.5144111010577149, "learning_rate": 3.155231143552312e-05, "loss": 0.6149, "step": 14799 }, { "epoch": 0.43210417214095936, "grad_norm": 0.5598242980127879, "learning_rate": 3.1550689375506896e-05, "loss": 0.7049, "step": 14800 }, { "epoch": 0.4321333683688067, "grad_norm": 0.5207223136703033, "learning_rate": 3.154906731549068e-05, "loss": 0.6352, "step": 14801 }, { "epoch": 0.4321625645966541, "grad_norm": 0.5206661696290629, "learning_rate": 3.154744525547445e-05, "loss": 0.6323, "step": 14802 }, { "epoch": 0.43219176082450145, "grad_norm": 0.6852516675797611, "learning_rate": 3.1545823195458235e-05, "loss": 0.5637, "step": 14803 }, { "epoch": 0.43222095705234886, "grad_norm": 0.5612807212688068, "learning_rate": 3.1544201135442016e-05, "loss": 0.6737, "step": 14804 }, { "epoch": 0.4322501532801962, "grad_norm": 0.588900757130669, "learning_rate": 3.154257907542579e-05, "loss": 0.7456, "step": 14805 }, { "epoch": 0.4322793495080436, "grad_norm": 0.5182967963315773, "learning_rate": 3.154095701540957e-05, "loss": 0.5748, "step": 14806 }, { "epoch": 0.43230854573589095, "grad_norm": 0.5101630794555642, "learning_rate": 3.153933495539335e-05, "loss": 0.6411, "step": 14807 }, { "epoch": 0.4323377419637383, "grad_norm": 0.5138953253176618, "learning_rate": 3.153771289537713e-05, "loss": 0.6098, "step": 14808 }, { "epoch": 0.43236693819158567, "grad_norm": 0.5111836694124683, "learning_rate": 3.153609083536091e-05, "loss": 0.5792, "step": 14809 }, { "epoch": 0.43239613441943303, "grad_norm": 0.5459471867580219, "learning_rate": 3.153446877534469e-05, "loss": 0.6483, "step": 14810 }, { "epoch": 0.4324253306472804, "grad_norm": 0.553718612464024, "learning_rate": 3.153284671532847e-05, "loss": 0.6435, "step": 14811 }, { "epoch": 0.43245452687512775, "grad_norm": 0.5314295570984694, "learning_rate": 3.153122465531225e-05, "loss": 0.5403, "step": 14812 }, { "epoch": 0.4324837231029751, "grad_norm": 0.5313556430766675, "learning_rate": 3.1529602595296025e-05, "loss": 0.6459, "step": 14813 }, { "epoch": 0.4325129193308225, "grad_norm": 0.5182285491179545, "learning_rate": 3.152798053527981e-05, "loss": 0.6219, "step": 14814 }, { "epoch": 0.43254211555866984, "grad_norm": 0.5668105544951195, "learning_rate": 3.152635847526358e-05, "loss": 0.6708, "step": 14815 }, { "epoch": 0.4325713117865172, "grad_norm": 0.526157735806909, "learning_rate": 3.1524736415247364e-05, "loss": 0.5644, "step": 14816 }, { "epoch": 0.43260050801436456, "grad_norm": 0.5655707919002918, "learning_rate": 3.1523114355231145e-05, "loss": 0.6514, "step": 14817 }, { "epoch": 0.4326297042422119, "grad_norm": 0.5543938987670539, "learning_rate": 3.152149229521493e-05, "loss": 0.6887, "step": 14818 }, { "epoch": 0.4326589004700593, "grad_norm": 0.49545397538378727, "learning_rate": 3.151987023519871e-05, "loss": 0.5321, "step": 14819 }, { "epoch": 0.43268809669790664, "grad_norm": 0.5509605424652814, "learning_rate": 3.1518248175182484e-05, "loss": 0.6259, "step": 14820 }, { "epoch": 0.432717292925754, "grad_norm": 0.5015958511618186, "learning_rate": 3.1516626115166266e-05, "loss": 0.6229, "step": 14821 }, { "epoch": 0.43274648915360137, "grad_norm": 0.5003531488262616, "learning_rate": 3.151500405515004e-05, "loss": 0.6097, "step": 14822 }, { "epoch": 0.4327756853814487, "grad_norm": 0.5256876860691087, "learning_rate": 3.151338199513382e-05, "loss": 0.6274, "step": 14823 }, { "epoch": 0.4328048816092961, "grad_norm": 0.5495959058831664, "learning_rate": 3.1511759935117604e-05, "loss": 0.6883, "step": 14824 }, { "epoch": 0.43283407783714345, "grad_norm": 0.507284082551755, "learning_rate": 3.151013787510138e-05, "loss": 0.6386, "step": 14825 }, { "epoch": 0.4328632740649908, "grad_norm": 0.5051661805952965, "learning_rate": 3.150851581508516e-05, "loss": 0.569, "step": 14826 }, { "epoch": 0.43289247029283817, "grad_norm": 0.5213272000873918, "learning_rate": 3.1506893755068936e-05, "loss": 0.5904, "step": 14827 }, { "epoch": 0.43292166652068553, "grad_norm": 0.4945082982326633, "learning_rate": 3.150527169505272e-05, "loss": 0.5369, "step": 14828 }, { "epoch": 0.4329508627485329, "grad_norm": 0.5002059535567349, "learning_rate": 3.15036496350365e-05, "loss": 0.5975, "step": 14829 }, { "epoch": 0.43298005897638026, "grad_norm": 0.518243795398974, "learning_rate": 3.1502027575020274e-05, "loss": 0.6363, "step": 14830 }, { "epoch": 0.4330092552042276, "grad_norm": 0.5267317513222892, "learning_rate": 3.1500405515004056e-05, "loss": 0.6622, "step": 14831 }, { "epoch": 0.433038451432075, "grad_norm": 0.5374190648378531, "learning_rate": 3.149878345498784e-05, "loss": 0.6216, "step": 14832 }, { "epoch": 0.43306764765992234, "grad_norm": 0.5670964231862397, "learning_rate": 3.149716139497161e-05, "loss": 0.6919, "step": 14833 }, { "epoch": 0.4330968438877697, "grad_norm": 0.5345483220668302, "learning_rate": 3.1495539334955395e-05, "loss": 0.6126, "step": 14834 }, { "epoch": 0.43312604011561706, "grad_norm": 0.5647021409555159, "learning_rate": 3.149391727493917e-05, "loss": 0.6464, "step": 14835 }, { "epoch": 0.4331552363434644, "grad_norm": 0.529198113495005, "learning_rate": 3.149229521492295e-05, "loss": 0.5885, "step": 14836 }, { "epoch": 0.4331844325713118, "grad_norm": 0.48291692423001326, "learning_rate": 3.149067315490673e-05, "loss": 0.5584, "step": 14837 }, { "epoch": 0.43321362879915915, "grad_norm": 0.48755278278315184, "learning_rate": 3.1489051094890515e-05, "loss": 0.5423, "step": 14838 }, { "epoch": 0.4332428250270065, "grad_norm": 0.4939509091109495, "learning_rate": 3.14874290348743e-05, "loss": 0.5564, "step": 14839 }, { "epoch": 0.43327202125485387, "grad_norm": 0.5491257497398263, "learning_rate": 3.148580697485807e-05, "loss": 0.6748, "step": 14840 }, { "epoch": 0.43330121748270123, "grad_norm": 0.5013267756981473, "learning_rate": 3.1484184914841853e-05, "loss": 0.5611, "step": 14841 }, { "epoch": 0.4333304137105486, "grad_norm": 0.5522996982935691, "learning_rate": 3.148256285482563e-05, "loss": 0.6611, "step": 14842 }, { "epoch": 0.43335960993839595, "grad_norm": 0.5212474242255477, "learning_rate": 3.148094079480941e-05, "loss": 0.6129, "step": 14843 }, { "epoch": 0.4333888061662433, "grad_norm": 0.6011509077735934, "learning_rate": 3.147931873479319e-05, "loss": 0.6926, "step": 14844 }, { "epoch": 0.4334180023940907, "grad_norm": 0.5327601393089298, "learning_rate": 3.147769667477697e-05, "loss": 0.5845, "step": 14845 }, { "epoch": 0.43344719862193803, "grad_norm": 0.548253325232302, "learning_rate": 3.147607461476075e-05, "loss": 0.6384, "step": 14846 }, { "epoch": 0.4334763948497854, "grad_norm": 0.5568302371965222, "learning_rate": 3.1474452554744524e-05, "loss": 0.628, "step": 14847 }, { "epoch": 0.43350559107763276, "grad_norm": 0.49085628936909304, "learning_rate": 3.1472830494728306e-05, "loss": 0.5488, "step": 14848 }, { "epoch": 0.4335347873054801, "grad_norm": 0.5664203187902458, "learning_rate": 3.147120843471209e-05, "loss": 0.6386, "step": 14849 }, { "epoch": 0.4335639835333275, "grad_norm": 0.5210230998461349, "learning_rate": 3.146958637469586e-05, "loss": 0.6125, "step": 14850 }, { "epoch": 0.43359317976117484, "grad_norm": 0.5073765525137199, "learning_rate": 3.1467964314679644e-05, "loss": 0.6094, "step": 14851 }, { "epoch": 0.4336223759890222, "grad_norm": 0.5511971103339961, "learning_rate": 3.146634225466342e-05, "loss": 0.6598, "step": 14852 }, { "epoch": 0.43365157221686956, "grad_norm": 0.5373764473857278, "learning_rate": 3.14647201946472e-05, "loss": 0.6861, "step": 14853 }, { "epoch": 0.4336807684447169, "grad_norm": 0.5056993781774045, "learning_rate": 3.146309813463098e-05, "loss": 0.6106, "step": 14854 }, { "epoch": 0.4337099646725643, "grad_norm": 0.5042617480296198, "learning_rate": 3.146147607461476e-05, "loss": 0.5543, "step": 14855 }, { "epoch": 0.43373916090041165, "grad_norm": 0.5206712619488383, "learning_rate": 3.1459854014598546e-05, "loss": 0.6118, "step": 14856 }, { "epoch": 0.433768357128259, "grad_norm": 0.5160470901180426, "learning_rate": 3.145823195458232e-05, "loss": 0.6294, "step": 14857 }, { "epoch": 0.43379755335610637, "grad_norm": 0.5621248327590577, "learning_rate": 3.14566098945661e-05, "loss": 0.7385, "step": 14858 }, { "epoch": 0.43382674958395373, "grad_norm": 0.5493008140937339, "learning_rate": 3.1454987834549885e-05, "loss": 0.6507, "step": 14859 }, { "epoch": 0.4338559458118011, "grad_norm": 0.48843151409787383, "learning_rate": 3.145336577453366e-05, "loss": 0.5678, "step": 14860 }, { "epoch": 0.43388514203964845, "grad_norm": 0.5190488756755115, "learning_rate": 3.145174371451744e-05, "loss": 0.5734, "step": 14861 }, { "epoch": 0.4339143382674958, "grad_norm": 0.5173116805133712, "learning_rate": 3.1450121654501216e-05, "loss": 0.5826, "step": 14862 }, { "epoch": 0.4339435344953432, "grad_norm": 0.5431944685793939, "learning_rate": 3.1448499594485e-05, "loss": 0.6779, "step": 14863 }, { "epoch": 0.43397273072319054, "grad_norm": 0.5030979141012157, "learning_rate": 3.144687753446878e-05, "loss": 0.5929, "step": 14864 }, { "epoch": 0.43400192695103795, "grad_norm": 0.480760736491253, "learning_rate": 3.1445255474452555e-05, "loss": 0.5125, "step": 14865 }, { "epoch": 0.4340311231788853, "grad_norm": 0.5563761494282924, "learning_rate": 3.1443633414436337e-05, "loss": 0.7239, "step": 14866 }, { "epoch": 0.4340603194067327, "grad_norm": 0.5509404861188584, "learning_rate": 3.144201135442011e-05, "loss": 0.6703, "step": 14867 }, { "epoch": 0.43408951563458004, "grad_norm": 0.5541735245734148, "learning_rate": 3.144038929440389e-05, "loss": 0.6056, "step": 14868 }, { "epoch": 0.4341187118624274, "grad_norm": 0.49365718767057765, "learning_rate": 3.1438767234387675e-05, "loss": 0.5497, "step": 14869 }, { "epoch": 0.43414790809027476, "grad_norm": 0.6355054886692079, "learning_rate": 3.143714517437145e-05, "loss": 0.6842, "step": 14870 }, { "epoch": 0.4341771043181221, "grad_norm": 0.5157582438777254, "learning_rate": 3.143552311435523e-05, "loss": 0.6014, "step": 14871 }, { "epoch": 0.4342063005459695, "grad_norm": 0.4932670219967173, "learning_rate": 3.143390105433901e-05, "loss": 0.5691, "step": 14872 }, { "epoch": 0.43423549677381684, "grad_norm": 0.562969325323633, "learning_rate": 3.143227899432279e-05, "loss": 0.7037, "step": 14873 }, { "epoch": 0.4342646930016642, "grad_norm": 0.5555289826987443, "learning_rate": 3.143065693430657e-05, "loss": 0.6697, "step": 14874 }, { "epoch": 0.43429388922951156, "grad_norm": 0.5258812364892832, "learning_rate": 3.142903487429035e-05, "loss": 0.5963, "step": 14875 }, { "epoch": 0.4343230854573589, "grad_norm": 0.5584322822698914, "learning_rate": 3.1427412814274134e-05, "loss": 0.6737, "step": 14876 }, { "epoch": 0.4343522816852063, "grad_norm": 0.5127926849780154, "learning_rate": 3.142579075425791e-05, "loss": 0.6102, "step": 14877 }, { "epoch": 0.43438147791305365, "grad_norm": 0.49717845955181633, "learning_rate": 3.142416869424169e-05, "loss": 0.5629, "step": 14878 }, { "epoch": 0.434410674140901, "grad_norm": 0.5198811776724453, "learning_rate": 3.142254663422547e-05, "loss": 0.6269, "step": 14879 }, { "epoch": 0.43443987036874837, "grad_norm": 0.49932270765296694, "learning_rate": 3.142092457420925e-05, "loss": 0.5462, "step": 14880 }, { "epoch": 0.43446906659659573, "grad_norm": 0.502357841662365, "learning_rate": 3.141930251419303e-05, "loss": 0.5692, "step": 14881 }, { "epoch": 0.4344982628244431, "grad_norm": 0.5329552434069986, "learning_rate": 3.1417680454176804e-05, "loss": 0.633, "step": 14882 }, { "epoch": 0.43452745905229045, "grad_norm": 0.6594708836044088, "learning_rate": 3.1416058394160586e-05, "loss": 0.5673, "step": 14883 }, { "epoch": 0.4345566552801378, "grad_norm": 0.5744227620485821, "learning_rate": 3.141443633414437e-05, "loss": 0.6397, "step": 14884 }, { "epoch": 0.4345858515079852, "grad_norm": 0.481964628340139, "learning_rate": 3.141281427412814e-05, "loss": 0.5181, "step": 14885 }, { "epoch": 0.43461504773583254, "grad_norm": 0.5699178106512408, "learning_rate": 3.1411192214111924e-05, "loss": 0.6854, "step": 14886 }, { "epoch": 0.4346442439636799, "grad_norm": 0.4801122784571505, "learning_rate": 3.14095701540957e-05, "loss": 0.4936, "step": 14887 }, { "epoch": 0.43467344019152726, "grad_norm": 0.5240109840440701, "learning_rate": 3.140794809407948e-05, "loss": 0.6316, "step": 14888 }, { "epoch": 0.4347026364193746, "grad_norm": 0.5426868952449875, "learning_rate": 3.140632603406326e-05, "loss": 0.678, "step": 14889 }, { "epoch": 0.434731832647222, "grad_norm": 0.5585412250570524, "learning_rate": 3.140470397404704e-05, "loss": 0.6508, "step": 14890 }, { "epoch": 0.43476102887506934, "grad_norm": 0.5175321719671082, "learning_rate": 3.140308191403082e-05, "loss": 0.5948, "step": 14891 }, { "epoch": 0.4347902251029167, "grad_norm": 0.5558539582316008, "learning_rate": 3.1401459854014595e-05, "loss": 0.6606, "step": 14892 }, { "epoch": 0.43481942133076407, "grad_norm": 0.5549619405943694, "learning_rate": 3.1399837793998376e-05, "loss": 0.7092, "step": 14893 }, { "epoch": 0.4348486175586114, "grad_norm": 0.48717171317972163, "learning_rate": 3.139821573398216e-05, "loss": 0.545, "step": 14894 }, { "epoch": 0.4348778137864588, "grad_norm": 0.5513830541577603, "learning_rate": 3.139659367396594e-05, "loss": 0.6836, "step": 14895 }, { "epoch": 0.43490701001430615, "grad_norm": 0.5195578885194189, "learning_rate": 3.139497161394972e-05, "loss": 0.5551, "step": 14896 }, { "epoch": 0.4349362062421535, "grad_norm": 0.548449092164658, "learning_rate": 3.13933495539335e-05, "loss": 0.6657, "step": 14897 }, { "epoch": 0.4349654024700009, "grad_norm": 0.5061250076959748, "learning_rate": 3.139172749391728e-05, "loss": 0.5721, "step": 14898 }, { "epoch": 0.43499459869784823, "grad_norm": 0.5060593467016095, "learning_rate": 3.139010543390106e-05, "loss": 0.5685, "step": 14899 }, { "epoch": 0.4350237949256956, "grad_norm": 0.49315368639787566, "learning_rate": 3.1388483373884835e-05, "loss": 0.5175, "step": 14900 }, { "epoch": 0.43505299115354296, "grad_norm": 0.544000391553862, "learning_rate": 3.138686131386862e-05, "loss": 0.6255, "step": 14901 }, { "epoch": 0.4350821873813903, "grad_norm": 0.570630605579413, "learning_rate": 3.138523925385239e-05, "loss": 0.6845, "step": 14902 }, { "epoch": 0.4351113836092377, "grad_norm": 0.5275913398278467, "learning_rate": 3.1383617193836174e-05, "loss": 0.6705, "step": 14903 }, { "epoch": 0.43514057983708504, "grad_norm": 0.4902238923120544, "learning_rate": 3.1381995133819955e-05, "loss": 0.5326, "step": 14904 }, { "epoch": 0.4351697760649324, "grad_norm": 0.5684249364219769, "learning_rate": 3.138037307380373e-05, "loss": 0.6551, "step": 14905 }, { "epoch": 0.43519897229277976, "grad_norm": 0.557309526530684, "learning_rate": 3.137875101378751e-05, "loss": 0.7084, "step": 14906 }, { "epoch": 0.4352281685206271, "grad_norm": 0.6207631940316746, "learning_rate": 3.137712895377129e-05, "loss": 0.6671, "step": 14907 }, { "epoch": 0.4352573647484745, "grad_norm": 0.5393366681260455, "learning_rate": 3.137550689375507e-05, "loss": 0.6707, "step": 14908 }, { "epoch": 0.43528656097632185, "grad_norm": 0.5423867395961783, "learning_rate": 3.137388483373885e-05, "loss": 0.6228, "step": 14909 }, { "epoch": 0.4353157572041692, "grad_norm": 0.5529784026912158, "learning_rate": 3.1372262773722626e-05, "loss": 0.6854, "step": 14910 }, { "epoch": 0.43534495343201657, "grad_norm": 0.5065410727596725, "learning_rate": 3.137064071370641e-05, "loss": 0.6167, "step": 14911 }, { "epoch": 0.43537414965986393, "grad_norm": 0.5039565955092533, "learning_rate": 3.136901865369018e-05, "loss": 0.5944, "step": 14912 }, { "epoch": 0.4354033458877113, "grad_norm": 0.5277197627883518, "learning_rate": 3.136739659367397e-05, "loss": 0.5604, "step": 14913 }, { "epoch": 0.43543254211555865, "grad_norm": 0.5022114015291845, "learning_rate": 3.1365774533657746e-05, "loss": 0.5583, "step": 14914 }, { "epoch": 0.435461738343406, "grad_norm": 0.5232428851873078, "learning_rate": 3.136415247364153e-05, "loss": 0.5661, "step": 14915 }, { "epoch": 0.4354909345712534, "grad_norm": 0.5429940874962116, "learning_rate": 3.136253041362531e-05, "loss": 0.6693, "step": 14916 }, { "epoch": 0.43552013079910074, "grad_norm": 0.4871085394958061, "learning_rate": 3.1360908353609084e-05, "loss": 0.5507, "step": 14917 }, { "epoch": 0.4355493270269481, "grad_norm": 0.5117662586251942, "learning_rate": 3.1359286293592866e-05, "loss": 0.5313, "step": 14918 }, { "epoch": 0.43557852325479546, "grad_norm": 0.5232810180409149, "learning_rate": 3.135766423357665e-05, "loss": 0.6067, "step": 14919 }, { "epoch": 0.4356077194826428, "grad_norm": 0.5545896004933392, "learning_rate": 3.135604217356042e-05, "loss": 0.5825, "step": 14920 }, { "epoch": 0.4356369157104902, "grad_norm": 0.5346677184038193, "learning_rate": 3.1354420113544205e-05, "loss": 0.6402, "step": 14921 }, { "epoch": 0.43566611193833754, "grad_norm": 0.5939237675070064, "learning_rate": 3.135279805352798e-05, "loss": 0.6988, "step": 14922 }, { "epoch": 0.4356953081661849, "grad_norm": 0.5499710974328017, "learning_rate": 3.135117599351176e-05, "loss": 0.6709, "step": 14923 }, { "epoch": 0.43572450439403226, "grad_norm": 0.563288417422812, "learning_rate": 3.134955393349554e-05, "loss": 0.6251, "step": 14924 }, { "epoch": 0.4357537006218797, "grad_norm": 0.5202807953786214, "learning_rate": 3.134793187347932e-05, "loss": 0.614, "step": 14925 }, { "epoch": 0.43578289684972704, "grad_norm": 0.5006253401801622, "learning_rate": 3.13463098134631e-05, "loss": 0.5645, "step": 14926 }, { "epoch": 0.4358120930775744, "grad_norm": 0.5382656483313815, "learning_rate": 3.1344687753446875e-05, "loss": 0.619, "step": 14927 }, { "epoch": 0.43584128930542176, "grad_norm": 0.5627610774063303, "learning_rate": 3.134306569343066e-05, "loss": 0.6928, "step": 14928 }, { "epoch": 0.4358704855332691, "grad_norm": 0.5474813765977424, "learning_rate": 3.134144363341444e-05, "loss": 0.6324, "step": 14929 }, { "epoch": 0.4358996817611165, "grad_norm": 0.5009523097072952, "learning_rate": 3.1339821573398213e-05, "loss": 0.5035, "step": 14930 }, { "epoch": 0.43592887798896385, "grad_norm": 0.5326365829581046, "learning_rate": 3.1338199513381995e-05, "loss": 0.633, "step": 14931 }, { "epoch": 0.4359580742168112, "grad_norm": 0.5472650616410265, "learning_rate": 3.133657745336578e-05, "loss": 0.5749, "step": 14932 }, { "epoch": 0.43598727044465857, "grad_norm": 0.5097218657775116, "learning_rate": 3.133495539334956e-05, "loss": 0.5808, "step": 14933 }, { "epoch": 0.43601646667250593, "grad_norm": 0.5821966924406404, "learning_rate": 3.1333333333333334e-05, "loss": 0.6965, "step": 14934 }, { "epoch": 0.4360456629003533, "grad_norm": 0.5221148145905338, "learning_rate": 3.1331711273317116e-05, "loss": 0.6166, "step": 14935 }, { "epoch": 0.43607485912820065, "grad_norm": 0.5160137545152553, "learning_rate": 3.13300892133009e-05, "loss": 0.5817, "step": 14936 }, { "epoch": 0.436104055356048, "grad_norm": 0.4945703820167794, "learning_rate": 3.132846715328467e-05, "loss": 0.5597, "step": 14937 }, { "epoch": 0.4361332515838954, "grad_norm": 0.4620816133284844, "learning_rate": 3.1326845093268454e-05, "loss": 0.5115, "step": 14938 }, { "epoch": 0.43616244781174274, "grad_norm": 0.5228780202189649, "learning_rate": 3.132522303325223e-05, "loss": 0.6241, "step": 14939 }, { "epoch": 0.4361916440395901, "grad_norm": 0.506342795084016, "learning_rate": 3.132360097323601e-05, "loss": 0.5634, "step": 14940 }, { "epoch": 0.43622084026743746, "grad_norm": 0.5199284762296602, "learning_rate": 3.132197891321979e-05, "loss": 0.5497, "step": 14941 }, { "epoch": 0.4362500364952848, "grad_norm": 0.4968259123754, "learning_rate": 3.132035685320357e-05, "loss": 0.548, "step": 14942 }, { "epoch": 0.4362792327231322, "grad_norm": 0.5506561135480771, "learning_rate": 3.131873479318735e-05, "loss": 0.659, "step": 14943 }, { "epoch": 0.43630842895097954, "grad_norm": 0.5505591335784694, "learning_rate": 3.131711273317113e-05, "loss": 0.6031, "step": 14944 }, { "epoch": 0.4363376251788269, "grad_norm": 0.5495723191869882, "learning_rate": 3.1315490673154906e-05, "loss": 0.6796, "step": 14945 }, { "epoch": 0.43636682140667427, "grad_norm": 0.5063669397042092, "learning_rate": 3.131386861313869e-05, "loss": 0.5807, "step": 14946 }, { "epoch": 0.4363960176345216, "grad_norm": 0.5582833543739265, "learning_rate": 3.131224655312246e-05, "loss": 0.6782, "step": 14947 }, { "epoch": 0.436425213862369, "grad_norm": 0.5335613035087754, "learning_rate": 3.1310624493106245e-05, "loss": 0.6411, "step": 14948 }, { "epoch": 0.43645441009021635, "grad_norm": 0.5292855466835297, "learning_rate": 3.1309002433090026e-05, "loss": 0.6196, "step": 14949 }, { "epoch": 0.4364836063180637, "grad_norm": 0.4972683182535914, "learning_rate": 3.130738037307381e-05, "loss": 0.5779, "step": 14950 }, { "epoch": 0.43651280254591107, "grad_norm": 0.5501834129689567, "learning_rate": 3.130575831305759e-05, "loss": 0.7047, "step": 14951 }, { "epoch": 0.43654199877375843, "grad_norm": 0.5164268961816936, "learning_rate": 3.1304136253041365e-05, "loss": 0.5929, "step": 14952 }, { "epoch": 0.4365711950016058, "grad_norm": 0.5150643523207774, "learning_rate": 3.1302514193025147e-05, "loss": 0.6193, "step": 14953 }, { "epoch": 0.43660039122945316, "grad_norm": 0.523241249666554, "learning_rate": 3.130089213300892e-05, "loss": 0.6175, "step": 14954 }, { "epoch": 0.4366295874573005, "grad_norm": 0.5124645251946536, "learning_rate": 3.12992700729927e-05, "loss": 0.5718, "step": 14955 }, { "epoch": 0.4366587836851479, "grad_norm": 0.5223621027745136, "learning_rate": 3.1297648012976485e-05, "loss": 0.6228, "step": 14956 }, { "epoch": 0.43668797991299524, "grad_norm": 0.560927042377925, "learning_rate": 3.129602595296026e-05, "loss": 0.6932, "step": 14957 }, { "epoch": 0.4367171761408426, "grad_norm": 0.5791381242760535, "learning_rate": 3.129440389294404e-05, "loss": 0.6998, "step": 14958 }, { "epoch": 0.43674637236868996, "grad_norm": 0.5255577712250898, "learning_rate": 3.129278183292782e-05, "loss": 0.581, "step": 14959 }, { "epoch": 0.4367755685965373, "grad_norm": 0.47609979547667514, "learning_rate": 3.12911597729116e-05, "loss": 0.5532, "step": 14960 }, { "epoch": 0.4368047648243847, "grad_norm": 0.5164860844967186, "learning_rate": 3.128953771289538e-05, "loss": 0.6405, "step": 14961 }, { "epoch": 0.43683396105223204, "grad_norm": 0.5696443448029839, "learning_rate": 3.1287915652879155e-05, "loss": 0.6618, "step": 14962 }, { "epoch": 0.4368631572800794, "grad_norm": 0.49278885088499674, "learning_rate": 3.128629359286294e-05, "loss": 0.5624, "step": 14963 }, { "epoch": 0.43689235350792677, "grad_norm": 0.5091978222309578, "learning_rate": 3.128467153284672e-05, "loss": 0.5725, "step": 14964 }, { "epoch": 0.43692154973577413, "grad_norm": 0.4973794104777664, "learning_rate": 3.1283049472830494e-05, "loss": 0.5831, "step": 14965 }, { "epoch": 0.4369507459636215, "grad_norm": 0.489980364066581, "learning_rate": 3.1281427412814276e-05, "loss": 0.6197, "step": 14966 }, { "epoch": 0.43697994219146885, "grad_norm": 0.5543807283951144, "learning_rate": 3.127980535279805e-05, "loss": 0.6772, "step": 14967 }, { "epoch": 0.4370091384193162, "grad_norm": 0.5912396834088404, "learning_rate": 3.127818329278183e-05, "loss": 0.7465, "step": 14968 }, { "epoch": 0.4370383346471636, "grad_norm": 0.5689631672465844, "learning_rate": 3.1276561232765614e-05, "loss": 0.7152, "step": 14969 }, { "epoch": 0.43706753087501093, "grad_norm": 0.5397303004480801, "learning_rate": 3.1274939172749396e-05, "loss": 0.6443, "step": 14970 }, { "epoch": 0.4370967271028583, "grad_norm": 0.5387140593586016, "learning_rate": 3.127331711273318e-05, "loss": 0.6257, "step": 14971 }, { "epoch": 0.43712592333070566, "grad_norm": 0.5249064798256764, "learning_rate": 3.127169505271695e-05, "loss": 0.6194, "step": 14972 }, { "epoch": 0.437155119558553, "grad_norm": 0.5625535339874296, "learning_rate": 3.1270072992700734e-05, "loss": 0.6463, "step": 14973 }, { "epoch": 0.4371843157864004, "grad_norm": 0.5185856109284906, "learning_rate": 3.126845093268451e-05, "loss": 0.6055, "step": 14974 }, { "epoch": 0.43721351201424774, "grad_norm": 0.5527180783618549, "learning_rate": 3.126682887266829e-05, "loss": 0.6459, "step": 14975 }, { "epoch": 0.4372427082420951, "grad_norm": 0.5152909402852094, "learning_rate": 3.126520681265207e-05, "loss": 0.6525, "step": 14976 }, { "epoch": 0.43727190446994246, "grad_norm": 0.5044907151238197, "learning_rate": 3.126358475263585e-05, "loss": 0.5912, "step": 14977 }, { "epoch": 0.4373011006977898, "grad_norm": 0.545234789805976, "learning_rate": 3.126196269261963e-05, "loss": 0.6184, "step": 14978 }, { "epoch": 0.4373302969256372, "grad_norm": 0.5128709516322536, "learning_rate": 3.1260340632603405e-05, "loss": 0.5701, "step": 14979 }, { "epoch": 0.43735949315348455, "grad_norm": 0.5287492374811801, "learning_rate": 3.1258718572587186e-05, "loss": 0.5996, "step": 14980 }, { "epoch": 0.4373886893813319, "grad_norm": 0.523511047929479, "learning_rate": 3.125709651257097e-05, "loss": 0.6365, "step": 14981 }, { "epoch": 0.43741788560917927, "grad_norm": 0.5542804597191128, "learning_rate": 3.125547445255474e-05, "loss": 0.5831, "step": 14982 }, { "epoch": 0.43744708183702663, "grad_norm": 0.5650665131973391, "learning_rate": 3.1253852392538525e-05, "loss": 0.7383, "step": 14983 }, { "epoch": 0.437476278064874, "grad_norm": 0.5315077023281869, "learning_rate": 3.12522303325223e-05, "loss": 0.6256, "step": 14984 }, { "epoch": 0.4375054742927214, "grad_norm": 0.5103759815831937, "learning_rate": 3.125060827250608e-05, "loss": 0.6054, "step": 14985 }, { "epoch": 0.43753467052056877, "grad_norm": 0.5134339216421764, "learning_rate": 3.1248986212489863e-05, "loss": 0.5751, "step": 14986 }, { "epoch": 0.43756386674841613, "grad_norm": 0.5471102566690763, "learning_rate": 3.124736415247364e-05, "loss": 0.6945, "step": 14987 }, { "epoch": 0.4375930629762635, "grad_norm": 0.5766709830665248, "learning_rate": 3.124574209245743e-05, "loss": 0.7268, "step": 14988 }, { "epoch": 0.43762225920411085, "grad_norm": 0.5587986661216756, "learning_rate": 3.12441200324412e-05, "loss": 0.6107, "step": 14989 }, { "epoch": 0.4376514554319582, "grad_norm": 0.6100172174091848, "learning_rate": 3.1242497972424984e-05, "loss": 0.6648, "step": 14990 }, { "epoch": 0.4376806516598056, "grad_norm": 0.5146005309570982, "learning_rate": 3.1240875912408765e-05, "loss": 0.5941, "step": 14991 }, { "epoch": 0.43770984788765294, "grad_norm": 0.5018672967130167, "learning_rate": 3.123925385239254e-05, "loss": 0.5878, "step": 14992 }, { "epoch": 0.4377390441155003, "grad_norm": 0.5165916851677924, "learning_rate": 3.123763179237632e-05, "loss": 0.5924, "step": 14993 }, { "epoch": 0.43776824034334766, "grad_norm": 0.5131823780620934, "learning_rate": 3.12360097323601e-05, "loss": 0.5866, "step": 14994 }, { "epoch": 0.437797436571195, "grad_norm": 0.5190731877412963, "learning_rate": 3.123438767234388e-05, "loss": 0.6069, "step": 14995 }, { "epoch": 0.4378266327990424, "grad_norm": 0.5328889679309039, "learning_rate": 3.123276561232766e-05, "loss": 0.6918, "step": 14996 }, { "epoch": 0.43785582902688974, "grad_norm": 0.5347484595252815, "learning_rate": 3.1231143552311436e-05, "loss": 0.5869, "step": 14997 }, { "epoch": 0.4378850252547371, "grad_norm": 0.5162351580177574, "learning_rate": 3.122952149229522e-05, "loss": 0.5995, "step": 14998 }, { "epoch": 0.43791422148258446, "grad_norm": 0.48468032076244993, "learning_rate": 3.122789943227899e-05, "loss": 0.5352, "step": 14999 }, { "epoch": 0.4379434177104318, "grad_norm": 0.5437417400921092, "learning_rate": 3.1226277372262774e-05, "loss": 0.6607, "step": 15000 }, { "epoch": 0.4379726139382792, "grad_norm": 0.5446289277710121, "learning_rate": 3.1224655312246556e-05, "loss": 0.6514, "step": 15001 }, { "epoch": 0.43800181016612655, "grad_norm": 0.5192361373129769, "learning_rate": 3.122303325223033e-05, "loss": 0.6217, "step": 15002 }, { "epoch": 0.4380310063939739, "grad_norm": 0.5186824693341772, "learning_rate": 3.122141119221411e-05, "loss": 0.6088, "step": 15003 }, { "epoch": 0.43806020262182127, "grad_norm": 0.529009984212116, "learning_rate": 3.121978913219789e-05, "loss": 0.6426, "step": 15004 }, { "epoch": 0.43808939884966863, "grad_norm": 0.5841777175677241, "learning_rate": 3.121816707218167e-05, "loss": 0.6327, "step": 15005 }, { "epoch": 0.438118595077516, "grad_norm": 0.5391135062840241, "learning_rate": 3.121654501216545e-05, "loss": 0.6984, "step": 15006 }, { "epoch": 0.43814779130536335, "grad_norm": 0.5526908620538565, "learning_rate": 3.121492295214923e-05, "loss": 0.7616, "step": 15007 }, { "epoch": 0.4381769875332107, "grad_norm": 0.5528468843705833, "learning_rate": 3.1213300892133015e-05, "loss": 0.7063, "step": 15008 }, { "epoch": 0.4382061837610581, "grad_norm": 0.5603243119146981, "learning_rate": 3.121167883211679e-05, "loss": 0.6688, "step": 15009 }, { "epoch": 0.43823537998890544, "grad_norm": 0.5492361101806347, "learning_rate": 3.121005677210057e-05, "loss": 0.65, "step": 15010 }, { "epoch": 0.4382645762167528, "grad_norm": 0.4760906090725165, "learning_rate": 3.120843471208435e-05, "loss": 0.5502, "step": 15011 }, { "epoch": 0.43829377244460016, "grad_norm": 0.44427126954348367, "learning_rate": 3.120681265206813e-05, "loss": 0.4554, "step": 15012 }, { "epoch": 0.4383229686724475, "grad_norm": 0.5186194197763024, "learning_rate": 3.120519059205191e-05, "loss": 0.5832, "step": 15013 }, { "epoch": 0.4383521649002949, "grad_norm": 0.5288309532597898, "learning_rate": 3.1203568532035685e-05, "loss": 0.6127, "step": 15014 }, { "epoch": 0.43838136112814224, "grad_norm": 0.5449507648598761, "learning_rate": 3.120194647201947e-05, "loss": 0.7213, "step": 15015 }, { "epoch": 0.4384105573559896, "grad_norm": 0.5106915146744376, "learning_rate": 3.120032441200325e-05, "loss": 0.5735, "step": 15016 }, { "epoch": 0.43843975358383697, "grad_norm": 0.46105849375871916, "learning_rate": 3.1198702351987023e-05, "loss": 0.5017, "step": 15017 }, { "epoch": 0.4384689498116843, "grad_norm": 0.5649939035260275, "learning_rate": 3.1197080291970805e-05, "loss": 0.697, "step": 15018 }, { "epoch": 0.4384981460395317, "grad_norm": 0.5398299392323327, "learning_rate": 3.119545823195458e-05, "loss": 0.6532, "step": 15019 }, { "epoch": 0.43852734226737905, "grad_norm": 0.5196445174258703, "learning_rate": 3.119383617193836e-05, "loss": 0.6312, "step": 15020 }, { "epoch": 0.4385565384952264, "grad_norm": 0.5080081179850486, "learning_rate": 3.1192214111922144e-05, "loss": 0.5737, "step": 15021 }, { "epoch": 0.4385857347230738, "grad_norm": 0.5113593612402514, "learning_rate": 3.119059205190592e-05, "loss": 0.6261, "step": 15022 }, { "epoch": 0.43861493095092113, "grad_norm": 0.5209145815540175, "learning_rate": 3.11889699918897e-05, "loss": 0.6095, "step": 15023 }, { "epoch": 0.4386441271787685, "grad_norm": 0.5027193415874109, "learning_rate": 3.1187347931873475e-05, "loss": 0.5775, "step": 15024 }, { "epoch": 0.43867332340661586, "grad_norm": 0.5408585131211335, "learning_rate": 3.118572587185726e-05, "loss": 0.6394, "step": 15025 }, { "epoch": 0.4387025196344632, "grad_norm": 0.5196897654031302, "learning_rate": 3.118410381184104e-05, "loss": 0.5954, "step": 15026 }, { "epoch": 0.4387317158623106, "grad_norm": 0.5107424167645274, "learning_rate": 3.118248175182482e-05, "loss": 0.5948, "step": 15027 }, { "epoch": 0.43876091209015794, "grad_norm": 0.5355132427232744, "learning_rate": 3.11808596918086e-05, "loss": 0.6183, "step": 15028 }, { "epoch": 0.4387901083180053, "grad_norm": 0.529674716134762, "learning_rate": 3.117923763179238e-05, "loss": 0.6025, "step": 15029 }, { "epoch": 0.43881930454585266, "grad_norm": 0.44011614748971417, "learning_rate": 3.117761557177616e-05, "loss": 0.4355, "step": 15030 }, { "epoch": 0.4388485007737, "grad_norm": 0.5367698949360254, "learning_rate": 3.117599351175994e-05, "loss": 0.6413, "step": 15031 }, { "epoch": 0.4388776970015474, "grad_norm": 0.5409098346265804, "learning_rate": 3.1174371451743716e-05, "loss": 0.6276, "step": 15032 }, { "epoch": 0.43890689322939475, "grad_norm": 0.5370202296409746, "learning_rate": 3.11727493917275e-05, "loss": 0.6046, "step": 15033 }, { "epoch": 0.4389360894572421, "grad_norm": 0.5133127151327419, "learning_rate": 3.117112733171127e-05, "loss": 0.5977, "step": 15034 }, { "epoch": 0.43896528568508947, "grad_norm": 0.5271415117887978, "learning_rate": 3.1169505271695055e-05, "loss": 0.6298, "step": 15035 }, { "epoch": 0.43899448191293683, "grad_norm": 0.5243422015005712, "learning_rate": 3.1167883211678836e-05, "loss": 0.5963, "step": 15036 }, { "epoch": 0.4390236781407842, "grad_norm": 0.551851816915486, "learning_rate": 3.116626115166261e-05, "loss": 0.6828, "step": 15037 }, { "epoch": 0.43905287436863155, "grad_norm": 0.5121358141064283, "learning_rate": 3.116463909164639e-05, "loss": 0.6233, "step": 15038 }, { "epoch": 0.4390820705964789, "grad_norm": 0.508486633909724, "learning_rate": 3.116301703163017e-05, "loss": 0.6315, "step": 15039 }, { "epoch": 0.4391112668243263, "grad_norm": 0.5356114329615043, "learning_rate": 3.116139497161395e-05, "loss": 0.6293, "step": 15040 }, { "epoch": 0.43914046305217364, "grad_norm": 0.5426067561681405, "learning_rate": 3.115977291159773e-05, "loss": 0.6718, "step": 15041 }, { "epoch": 0.439169659280021, "grad_norm": 0.5025322289573029, "learning_rate": 3.1158150851581507e-05, "loss": 0.538, "step": 15042 }, { "epoch": 0.43919885550786836, "grad_norm": 0.5229002244449572, "learning_rate": 3.115652879156529e-05, "loss": 0.6255, "step": 15043 }, { "epoch": 0.4392280517357157, "grad_norm": 0.5379305908782714, "learning_rate": 3.115490673154906e-05, "loss": 0.6202, "step": 15044 }, { "epoch": 0.4392572479635631, "grad_norm": 0.5634388289774598, "learning_rate": 3.115328467153285e-05, "loss": 0.7091, "step": 15045 }, { "epoch": 0.4392864441914105, "grad_norm": 0.5571402030947947, "learning_rate": 3.115166261151663e-05, "loss": 0.6804, "step": 15046 }, { "epoch": 0.43931564041925786, "grad_norm": 0.5389332937627019, "learning_rate": 3.115004055150041e-05, "loss": 0.6279, "step": 15047 }, { "epoch": 0.4393448366471052, "grad_norm": 0.6088315487724721, "learning_rate": 3.114841849148419e-05, "loss": 0.7367, "step": 15048 }, { "epoch": 0.4393740328749526, "grad_norm": 0.5667485142499643, "learning_rate": 3.1146796431467965e-05, "loss": 0.6737, "step": 15049 }, { "epoch": 0.43940322910279994, "grad_norm": 0.5543708275067607, "learning_rate": 3.114517437145175e-05, "loss": 0.6636, "step": 15050 }, { "epoch": 0.4394324253306473, "grad_norm": 0.5158836929080852, "learning_rate": 3.114355231143552e-05, "loss": 0.558, "step": 15051 }, { "epoch": 0.43946162155849466, "grad_norm": 0.5493620793995854, "learning_rate": 3.1141930251419304e-05, "loss": 0.6398, "step": 15052 }, { "epoch": 0.439490817786342, "grad_norm": 0.6539598300507224, "learning_rate": 3.1140308191403086e-05, "loss": 0.6669, "step": 15053 }, { "epoch": 0.4395200140141894, "grad_norm": 0.49949657298947053, "learning_rate": 3.113868613138686e-05, "loss": 0.5411, "step": 15054 }, { "epoch": 0.43954921024203675, "grad_norm": 0.55435948709399, "learning_rate": 3.113706407137064e-05, "loss": 0.6893, "step": 15055 }, { "epoch": 0.4395784064698841, "grad_norm": 0.5751843313818297, "learning_rate": 3.1135442011354424e-05, "loss": 0.6596, "step": 15056 }, { "epoch": 0.43960760269773147, "grad_norm": 0.5020702259845926, "learning_rate": 3.11338199513382e-05, "loss": 0.5996, "step": 15057 }, { "epoch": 0.43963679892557883, "grad_norm": 0.5076812797960853, "learning_rate": 3.113219789132198e-05, "loss": 0.5397, "step": 15058 }, { "epoch": 0.4396659951534262, "grad_norm": 0.5416465882704348, "learning_rate": 3.1130575831305756e-05, "loss": 0.6616, "step": 15059 }, { "epoch": 0.43969519138127355, "grad_norm": 0.5294564967537475, "learning_rate": 3.112895377128954e-05, "loss": 0.6034, "step": 15060 }, { "epoch": 0.4397243876091209, "grad_norm": 0.500686846824627, "learning_rate": 3.112733171127332e-05, "loss": 0.5931, "step": 15061 }, { "epoch": 0.4397535838369683, "grad_norm": 0.5075857030573538, "learning_rate": 3.1125709651257094e-05, "loss": 0.5857, "step": 15062 }, { "epoch": 0.43978278006481564, "grad_norm": 0.5255465594314881, "learning_rate": 3.1124087591240876e-05, "loss": 0.6164, "step": 15063 }, { "epoch": 0.439811976292663, "grad_norm": 0.5192861618248968, "learning_rate": 3.112246553122466e-05, "loss": 0.5963, "step": 15064 }, { "epoch": 0.43984117252051036, "grad_norm": 0.547328076148771, "learning_rate": 3.112084347120844e-05, "loss": 0.5942, "step": 15065 }, { "epoch": 0.4398703687483577, "grad_norm": 0.5303400077880934, "learning_rate": 3.1119221411192215e-05, "loss": 0.6335, "step": 15066 }, { "epoch": 0.4398995649762051, "grad_norm": 0.45912942412867774, "learning_rate": 3.1117599351175996e-05, "loss": 0.4846, "step": 15067 }, { "epoch": 0.43992876120405244, "grad_norm": 0.5376931745838701, "learning_rate": 3.111597729115978e-05, "loss": 0.6423, "step": 15068 }, { "epoch": 0.4399579574318998, "grad_norm": 0.6537858216475569, "learning_rate": 3.111435523114355e-05, "loss": 0.7516, "step": 15069 }, { "epoch": 0.43998715365974717, "grad_norm": 0.5745425437850106, "learning_rate": 3.1112733171127335e-05, "loss": 0.6459, "step": 15070 }, { "epoch": 0.4400163498875945, "grad_norm": 0.4953284906150465, "learning_rate": 3.111111111111111e-05, "loss": 0.56, "step": 15071 }, { "epoch": 0.4400455461154419, "grad_norm": 0.5395242974307516, "learning_rate": 3.110948905109489e-05, "loss": 0.6734, "step": 15072 }, { "epoch": 0.44007474234328925, "grad_norm": 0.5317833098394822, "learning_rate": 3.1107866991078673e-05, "loss": 0.6291, "step": 15073 }, { "epoch": 0.4401039385711366, "grad_norm": 0.49820918717085216, "learning_rate": 3.110624493106245e-05, "loss": 0.558, "step": 15074 }, { "epoch": 0.44013313479898397, "grad_norm": 0.4972580750594936, "learning_rate": 3.110462287104623e-05, "loss": 0.5545, "step": 15075 }, { "epoch": 0.44016233102683133, "grad_norm": 0.5755024398540565, "learning_rate": 3.110300081103001e-05, "loss": 0.6965, "step": 15076 }, { "epoch": 0.4401915272546787, "grad_norm": 0.5669370230886062, "learning_rate": 3.110137875101379e-05, "loss": 0.679, "step": 15077 }, { "epoch": 0.44022072348252606, "grad_norm": 0.472763498352708, "learning_rate": 3.109975669099757e-05, "loss": 0.5074, "step": 15078 }, { "epoch": 0.4402499197103734, "grad_norm": 0.7559748669096595, "learning_rate": 3.1098134630981344e-05, "loss": 0.7503, "step": 15079 }, { "epoch": 0.4402791159382208, "grad_norm": 0.5485900115030842, "learning_rate": 3.1096512570965125e-05, "loss": 0.6413, "step": 15080 }, { "epoch": 0.44030831216606814, "grad_norm": 0.5775291395602479, "learning_rate": 3.109489051094891e-05, "loss": 0.7097, "step": 15081 }, { "epoch": 0.4403375083939155, "grad_norm": 0.5175818920982085, "learning_rate": 3.109326845093268e-05, "loss": 0.6259, "step": 15082 }, { "epoch": 0.44036670462176286, "grad_norm": 0.529898548974424, "learning_rate": 3.109164639091647e-05, "loss": 0.6374, "step": 15083 }, { "epoch": 0.4403959008496102, "grad_norm": 0.5414007578384857, "learning_rate": 3.1090024330900246e-05, "loss": 0.6821, "step": 15084 }, { "epoch": 0.4404250970774576, "grad_norm": 0.5356457595353554, "learning_rate": 3.108840227088403e-05, "loss": 0.6144, "step": 15085 }, { "epoch": 0.44045429330530494, "grad_norm": 0.5142762258372688, "learning_rate": 3.10867802108678e-05, "loss": 0.6105, "step": 15086 }, { "epoch": 0.4404834895331523, "grad_norm": 0.5613899125208957, "learning_rate": 3.1085158150851584e-05, "loss": 0.6749, "step": 15087 }, { "epoch": 0.44051268576099967, "grad_norm": 0.4993746188934732, "learning_rate": 3.1083536090835366e-05, "loss": 0.5865, "step": 15088 }, { "epoch": 0.44054188198884703, "grad_norm": 0.5209502180794556, "learning_rate": 3.108191403081914e-05, "loss": 0.6333, "step": 15089 }, { "epoch": 0.4405710782166944, "grad_norm": 0.5439699592442635, "learning_rate": 3.108029197080292e-05, "loss": 0.6254, "step": 15090 }, { "epoch": 0.44060027444454175, "grad_norm": 0.5416728794546607, "learning_rate": 3.10786699107867e-05, "loss": 0.6666, "step": 15091 }, { "epoch": 0.4406294706723891, "grad_norm": 0.5500044847490497, "learning_rate": 3.107704785077048e-05, "loss": 0.6999, "step": 15092 }, { "epoch": 0.4406586669002365, "grad_norm": 0.5364239640988913, "learning_rate": 3.107542579075426e-05, "loss": 0.6287, "step": 15093 }, { "epoch": 0.44068786312808383, "grad_norm": 0.5521531558652677, "learning_rate": 3.1073803730738036e-05, "loss": 0.6363, "step": 15094 }, { "epoch": 0.4407170593559312, "grad_norm": 0.5378502897312193, "learning_rate": 3.107218167072182e-05, "loss": 0.6685, "step": 15095 }, { "epoch": 0.44074625558377856, "grad_norm": 0.6264136202304192, "learning_rate": 3.107055961070559e-05, "loss": 0.7385, "step": 15096 }, { "epoch": 0.4407754518116259, "grad_norm": 0.48714900315427984, "learning_rate": 3.1068937550689375e-05, "loss": 0.5645, "step": 15097 }, { "epoch": 0.4408046480394733, "grad_norm": 0.5355388532668254, "learning_rate": 3.1067315490673156e-05, "loss": 0.6379, "step": 15098 }, { "epoch": 0.44083384426732064, "grad_norm": 0.5392802250844724, "learning_rate": 3.106569343065693e-05, "loss": 0.6345, "step": 15099 }, { "epoch": 0.440863040495168, "grad_norm": 0.4943286115333, "learning_rate": 3.106407137064071e-05, "loss": 0.5523, "step": 15100 }, { "epoch": 0.44089223672301536, "grad_norm": 0.5110500183736609, "learning_rate": 3.1062449310624495e-05, "loss": 0.5582, "step": 15101 }, { "epoch": 0.4409214329508627, "grad_norm": 0.5351541765601718, "learning_rate": 3.106082725060828e-05, "loss": 0.6131, "step": 15102 }, { "epoch": 0.4409506291787101, "grad_norm": 0.5903273557535939, "learning_rate": 3.105920519059206e-05, "loss": 0.781, "step": 15103 }, { "epoch": 0.44097982540655745, "grad_norm": 0.530998986228085, "learning_rate": 3.1057583130575834e-05, "loss": 0.6402, "step": 15104 }, { "epoch": 0.4410090216344048, "grad_norm": 0.49905165280681174, "learning_rate": 3.1055961070559615e-05, "loss": 0.558, "step": 15105 }, { "epoch": 0.4410382178622522, "grad_norm": 0.5030698038607566, "learning_rate": 3.105433901054339e-05, "loss": 0.6019, "step": 15106 }, { "epoch": 0.4410674140900996, "grad_norm": 0.5721881902298384, "learning_rate": 3.105271695052717e-05, "loss": 0.6238, "step": 15107 }, { "epoch": 0.44109661031794695, "grad_norm": 0.5346659669737963, "learning_rate": 3.1051094890510954e-05, "loss": 0.6575, "step": 15108 }, { "epoch": 0.4411258065457943, "grad_norm": 0.5711903468959769, "learning_rate": 3.104947283049473e-05, "loss": 0.639, "step": 15109 }, { "epoch": 0.44115500277364167, "grad_norm": 0.4861024988268285, "learning_rate": 3.104785077047851e-05, "loss": 0.5549, "step": 15110 }, { "epoch": 0.44118419900148903, "grad_norm": 0.5375598504375397, "learning_rate": 3.1046228710462286e-05, "loss": 0.5887, "step": 15111 }, { "epoch": 0.4412133952293364, "grad_norm": 0.5260794003464893, "learning_rate": 3.104460665044607e-05, "loss": 0.6431, "step": 15112 }, { "epoch": 0.44124259145718375, "grad_norm": 0.5105124371667927, "learning_rate": 3.104298459042985e-05, "loss": 0.5836, "step": 15113 }, { "epoch": 0.4412717876850311, "grad_norm": 0.516324800586502, "learning_rate": 3.1041362530413624e-05, "loss": 0.5853, "step": 15114 }, { "epoch": 0.4413009839128785, "grad_norm": 0.5198401419013065, "learning_rate": 3.1039740470397406e-05, "loss": 0.5985, "step": 15115 }, { "epoch": 0.44133018014072584, "grad_norm": 0.5444551733389369, "learning_rate": 3.103811841038118e-05, "loss": 0.6426, "step": 15116 }, { "epoch": 0.4413593763685732, "grad_norm": 0.5509574424532716, "learning_rate": 3.103649635036496e-05, "loss": 0.6512, "step": 15117 }, { "epoch": 0.44138857259642056, "grad_norm": 0.5046823350376051, "learning_rate": 3.1034874290348744e-05, "loss": 0.5695, "step": 15118 }, { "epoch": 0.4414177688242679, "grad_norm": 0.5018003298417246, "learning_rate": 3.103325223033252e-05, "loss": 0.5457, "step": 15119 }, { "epoch": 0.4414469650521153, "grad_norm": 0.5090319328196895, "learning_rate": 3.103163017031631e-05, "loss": 0.587, "step": 15120 }, { "epoch": 0.44147616127996264, "grad_norm": 0.5397096804547786, "learning_rate": 3.103000811030008e-05, "loss": 0.6729, "step": 15121 }, { "epoch": 0.44150535750781, "grad_norm": 0.5268232280799285, "learning_rate": 3.1028386050283865e-05, "loss": 0.6154, "step": 15122 }, { "epoch": 0.44153455373565736, "grad_norm": 0.5377292028688369, "learning_rate": 3.1026763990267646e-05, "loss": 0.6331, "step": 15123 }, { "epoch": 0.4415637499635047, "grad_norm": 0.7567666360626675, "learning_rate": 3.102514193025142e-05, "loss": 0.7584, "step": 15124 }, { "epoch": 0.4415929461913521, "grad_norm": 0.5103691420108041, "learning_rate": 3.10235198702352e-05, "loss": 0.5682, "step": 15125 }, { "epoch": 0.44162214241919945, "grad_norm": 0.5103490525245792, "learning_rate": 3.102189781021898e-05, "loss": 0.5713, "step": 15126 }, { "epoch": 0.4416513386470468, "grad_norm": 0.508311685836326, "learning_rate": 3.102027575020276e-05, "loss": 0.589, "step": 15127 }, { "epoch": 0.44168053487489417, "grad_norm": 0.5434358441871785, "learning_rate": 3.101865369018654e-05, "loss": 0.6505, "step": 15128 }, { "epoch": 0.44170973110274153, "grad_norm": 0.47710010106336276, "learning_rate": 3.1017031630170317e-05, "loss": 0.5277, "step": 15129 }, { "epoch": 0.4417389273305889, "grad_norm": 0.5187339307440781, "learning_rate": 3.10154095701541e-05, "loss": 0.6089, "step": 15130 }, { "epoch": 0.44176812355843625, "grad_norm": 0.5191726497195178, "learning_rate": 3.101378751013787e-05, "loss": 0.6086, "step": 15131 }, { "epoch": 0.4417973197862836, "grad_norm": 0.5466292565279068, "learning_rate": 3.1012165450121655e-05, "loss": 0.63, "step": 15132 }, { "epoch": 0.441826516014131, "grad_norm": 0.4889465032110933, "learning_rate": 3.101054339010544e-05, "loss": 0.546, "step": 15133 }, { "epoch": 0.44185571224197834, "grad_norm": 0.5196668414497599, "learning_rate": 3.100892133008921e-05, "loss": 0.601, "step": 15134 }, { "epoch": 0.4418849084698257, "grad_norm": 0.5146377389115537, "learning_rate": 3.1007299270072994e-05, "loss": 0.5915, "step": 15135 }, { "epoch": 0.44191410469767306, "grad_norm": 0.5408453930425974, "learning_rate": 3.100567721005677e-05, "loss": 0.6295, "step": 15136 }, { "epoch": 0.4419433009255204, "grad_norm": 0.5660816950125442, "learning_rate": 3.100405515004055e-05, "loss": 0.6863, "step": 15137 }, { "epoch": 0.4419724971533678, "grad_norm": 0.5322746101357541, "learning_rate": 3.100243309002433e-05, "loss": 0.6008, "step": 15138 }, { "epoch": 0.44200169338121514, "grad_norm": 0.5331805074046052, "learning_rate": 3.1000811030008114e-05, "loss": 0.6473, "step": 15139 }, { "epoch": 0.4420308896090625, "grad_norm": 0.5548431132800852, "learning_rate": 3.0999188969991896e-05, "loss": 0.6878, "step": 15140 }, { "epoch": 0.44206008583690987, "grad_norm": 0.5947218653064479, "learning_rate": 3.099756690997567e-05, "loss": 0.6533, "step": 15141 }, { "epoch": 0.4420892820647572, "grad_norm": 0.5131475243148779, "learning_rate": 3.099594484995945e-05, "loss": 0.6069, "step": 15142 }, { "epoch": 0.4421184782926046, "grad_norm": 0.5107989210407439, "learning_rate": 3.0994322789943234e-05, "loss": 0.6138, "step": 15143 }, { "epoch": 0.44214767452045195, "grad_norm": 0.5416653950293329, "learning_rate": 3.099270072992701e-05, "loss": 0.6157, "step": 15144 }, { "epoch": 0.4421768707482993, "grad_norm": 0.5632700514098553, "learning_rate": 3.099107866991079e-05, "loss": 0.6904, "step": 15145 }, { "epoch": 0.44220606697614667, "grad_norm": 0.5115022135101108, "learning_rate": 3.0989456609894566e-05, "loss": 0.5662, "step": 15146 }, { "epoch": 0.44223526320399403, "grad_norm": 0.5113968665537405, "learning_rate": 3.098783454987835e-05, "loss": 0.6032, "step": 15147 }, { "epoch": 0.4422644594318414, "grad_norm": 0.5451980363264086, "learning_rate": 3.098621248986213e-05, "loss": 0.7337, "step": 15148 }, { "epoch": 0.44229365565968876, "grad_norm": 0.5447719206818887, "learning_rate": 3.0984590429845904e-05, "loss": 0.6887, "step": 15149 }, { "epoch": 0.4423228518875361, "grad_norm": 0.5291192349167264, "learning_rate": 3.0982968369829686e-05, "loss": 0.5982, "step": 15150 }, { "epoch": 0.4423520481153835, "grad_norm": 0.7843189109709322, "learning_rate": 3.098134630981346e-05, "loss": 0.5523, "step": 15151 }, { "epoch": 0.44238124434323084, "grad_norm": 0.5429640830944802, "learning_rate": 3.097972424979724e-05, "loss": 0.6309, "step": 15152 }, { "epoch": 0.4424104405710782, "grad_norm": 0.5474035351668624, "learning_rate": 3.0978102189781025e-05, "loss": 0.6289, "step": 15153 }, { "epoch": 0.44243963679892556, "grad_norm": 0.5135999026816565, "learning_rate": 3.09764801297648e-05, "loss": 0.6127, "step": 15154 }, { "epoch": 0.4424688330267729, "grad_norm": 0.507627904260971, "learning_rate": 3.097485806974858e-05, "loss": 0.6219, "step": 15155 }, { "epoch": 0.4424980292546203, "grad_norm": 0.5170095291395432, "learning_rate": 3.0973236009732356e-05, "loss": 0.6017, "step": 15156 }, { "epoch": 0.44252722548246765, "grad_norm": 0.5319363401673706, "learning_rate": 3.097161394971614e-05, "loss": 0.6136, "step": 15157 }, { "epoch": 0.442556421710315, "grad_norm": 0.5085868652131881, "learning_rate": 3.096999188969992e-05, "loss": 0.5884, "step": 15158 }, { "epoch": 0.44258561793816237, "grad_norm": 0.5232995485422317, "learning_rate": 3.09683698296837e-05, "loss": 0.5915, "step": 15159 }, { "epoch": 0.44261481416600973, "grad_norm": 0.5086680556809889, "learning_rate": 3.0966747769667483e-05, "loss": 0.5912, "step": 15160 }, { "epoch": 0.4426440103938571, "grad_norm": 0.5522409905974648, "learning_rate": 3.096512570965126e-05, "loss": 0.7319, "step": 15161 }, { "epoch": 0.44267320662170445, "grad_norm": 0.5166149211865425, "learning_rate": 3.096350364963504e-05, "loss": 0.6203, "step": 15162 }, { "epoch": 0.4427024028495518, "grad_norm": 0.5113034575917115, "learning_rate": 3.096188158961882e-05, "loss": 0.6081, "step": 15163 }, { "epoch": 0.4427315990773992, "grad_norm": 0.582552000330561, "learning_rate": 3.09602595296026e-05, "loss": 0.7005, "step": 15164 }, { "epoch": 0.44276079530524654, "grad_norm": 0.5659590478010681, "learning_rate": 3.095863746958638e-05, "loss": 0.7036, "step": 15165 }, { "epoch": 0.44278999153309395, "grad_norm": 0.5595977796517624, "learning_rate": 3.0957015409570154e-05, "loss": 0.659, "step": 15166 }, { "epoch": 0.4428191877609413, "grad_norm": 0.5163853200777974, "learning_rate": 3.0955393349553935e-05, "loss": 0.6093, "step": 15167 }, { "epoch": 0.4428483839887887, "grad_norm": 0.5677123374860503, "learning_rate": 3.095377128953772e-05, "loss": 0.7062, "step": 15168 }, { "epoch": 0.44287758021663604, "grad_norm": 0.5369177928199388, "learning_rate": 3.095214922952149e-05, "loss": 0.6065, "step": 15169 }, { "epoch": 0.4429067764444834, "grad_norm": 0.5317954240080245, "learning_rate": 3.0950527169505274e-05, "loss": 0.5898, "step": 15170 }, { "epoch": 0.44293597267233076, "grad_norm": 0.49996155133374287, "learning_rate": 3.094890510948905e-05, "loss": 0.5899, "step": 15171 }, { "epoch": 0.4429651689001781, "grad_norm": 0.517385068508746, "learning_rate": 3.094728304947283e-05, "loss": 0.6036, "step": 15172 }, { "epoch": 0.4429943651280255, "grad_norm": 0.5504623606280962, "learning_rate": 3.094566098945661e-05, "loss": 0.5692, "step": 15173 }, { "epoch": 0.44302356135587284, "grad_norm": 0.5374242200882515, "learning_rate": 3.094403892944039e-05, "loss": 0.6374, "step": 15174 }, { "epoch": 0.4430527575837202, "grad_norm": 0.5178451400289512, "learning_rate": 3.094241686942417e-05, "loss": 0.6131, "step": 15175 }, { "epoch": 0.44308195381156756, "grad_norm": 0.5097605433421024, "learning_rate": 3.0940794809407944e-05, "loss": 0.5357, "step": 15176 }, { "epoch": 0.4431111500394149, "grad_norm": 0.543377660449767, "learning_rate": 3.093917274939173e-05, "loss": 0.7023, "step": 15177 }, { "epoch": 0.4431403462672623, "grad_norm": 0.5614689906858682, "learning_rate": 3.093755068937551e-05, "loss": 0.6553, "step": 15178 }, { "epoch": 0.44316954249510965, "grad_norm": 0.5475395583780879, "learning_rate": 3.093592862935929e-05, "loss": 0.6297, "step": 15179 }, { "epoch": 0.443198738722957, "grad_norm": 0.47344052437369777, "learning_rate": 3.093430656934307e-05, "loss": 0.4795, "step": 15180 }, { "epoch": 0.44322793495080437, "grad_norm": 0.5286626697485646, "learning_rate": 3.0932684509326846e-05, "loss": 0.6263, "step": 15181 }, { "epoch": 0.44325713117865173, "grad_norm": 0.5069534351508372, "learning_rate": 3.093106244931063e-05, "loss": 0.5857, "step": 15182 }, { "epoch": 0.4432863274064991, "grad_norm": 0.5224073642349734, "learning_rate": 3.09294403892944e-05, "loss": 0.6036, "step": 15183 }, { "epoch": 0.44331552363434645, "grad_norm": 0.5537439374333644, "learning_rate": 3.0927818329278185e-05, "loss": 0.6644, "step": 15184 }, { "epoch": 0.4433447198621938, "grad_norm": 0.5345218368725906, "learning_rate": 3.0926196269261966e-05, "loss": 0.6444, "step": 15185 }, { "epoch": 0.4433739160900412, "grad_norm": 0.5279169301538711, "learning_rate": 3.092457420924574e-05, "loss": 0.6417, "step": 15186 }, { "epoch": 0.44340311231788854, "grad_norm": 0.5804023098072354, "learning_rate": 3.092295214922952e-05, "loss": 0.6412, "step": 15187 }, { "epoch": 0.4434323085457359, "grad_norm": 0.5841725964136474, "learning_rate": 3.0921330089213305e-05, "loss": 0.7259, "step": 15188 }, { "epoch": 0.44346150477358326, "grad_norm": 0.5204083624236077, "learning_rate": 3.091970802919708e-05, "loss": 0.6284, "step": 15189 }, { "epoch": 0.4434907010014306, "grad_norm": 0.5388184301949284, "learning_rate": 3.091808596918086e-05, "loss": 0.6389, "step": 15190 }, { "epoch": 0.443519897229278, "grad_norm": 0.53787928027905, "learning_rate": 3.091646390916464e-05, "loss": 0.6451, "step": 15191 }, { "epoch": 0.44354909345712534, "grad_norm": 0.5277952953887477, "learning_rate": 3.091484184914842e-05, "loss": 0.6793, "step": 15192 }, { "epoch": 0.4435782896849727, "grad_norm": 0.5286038016761127, "learning_rate": 3.09132197891322e-05, "loss": 0.6185, "step": 15193 }, { "epoch": 0.44360748591282007, "grad_norm": 0.5670183391048024, "learning_rate": 3.0911597729115975e-05, "loss": 0.6358, "step": 15194 }, { "epoch": 0.4436366821406674, "grad_norm": 0.5191799221180345, "learning_rate": 3.090997566909976e-05, "loss": 0.5977, "step": 15195 }, { "epoch": 0.4436658783685148, "grad_norm": 0.529453170060627, "learning_rate": 3.090835360908354e-05, "loss": 0.5981, "step": 15196 }, { "epoch": 0.44369507459636215, "grad_norm": 0.4871888635254749, "learning_rate": 3.090673154906732e-05, "loss": 0.5791, "step": 15197 }, { "epoch": 0.4437242708242095, "grad_norm": 0.5133636389269438, "learning_rate": 3.0905109489051096e-05, "loss": 0.6122, "step": 15198 }, { "epoch": 0.44375346705205687, "grad_norm": 0.5151399439391613, "learning_rate": 3.090348742903488e-05, "loss": 0.6014, "step": 15199 }, { "epoch": 0.44378266327990423, "grad_norm": 0.5438978815423038, "learning_rate": 3.090186536901866e-05, "loss": 0.6094, "step": 15200 }, { "epoch": 0.4438118595077516, "grad_norm": 0.5048511792614128, "learning_rate": 3.0900243309002434e-05, "loss": 0.5535, "step": 15201 }, { "epoch": 0.44384105573559895, "grad_norm": 0.5628214268492785, "learning_rate": 3.0898621248986216e-05, "loss": 0.699, "step": 15202 }, { "epoch": 0.4438702519634463, "grad_norm": 0.5494264038456631, "learning_rate": 3.089699918896999e-05, "loss": 0.654, "step": 15203 }, { "epoch": 0.4438994481912937, "grad_norm": 0.4855815776985649, "learning_rate": 3.089537712895377e-05, "loss": 0.5369, "step": 15204 }, { "epoch": 0.44392864441914104, "grad_norm": 0.5017340483816233, "learning_rate": 3.0893755068937554e-05, "loss": 0.5913, "step": 15205 }, { "epoch": 0.4439578406469884, "grad_norm": 0.5537802732214648, "learning_rate": 3.089213300892133e-05, "loss": 0.673, "step": 15206 }, { "epoch": 0.44398703687483576, "grad_norm": 0.512270631743689, "learning_rate": 3.089051094890511e-05, "loss": 0.5924, "step": 15207 }, { "epoch": 0.4440162331026831, "grad_norm": 0.4888310376346495, "learning_rate": 3.088888888888889e-05, "loss": 0.5771, "step": 15208 }, { "epoch": 0.4440454293305305, "grad_norm": 0.5149132328062582, "learning_rate": 3.088726682887267e-05, "loss": 0.6065, "step": 15209 }, { "epoch": 0.44407462555837784, "grad_norm": 0.519591408433606, "learning_rate": 3.088564476885645e-05, "loss": 0.5334, "step": 15210 }, { "epoch": 0.4441038217862252, "grad_norm": 0.5608691693470756, "learning_rate": 3.0884022708840225e-05, "loss": 0.6883, "step": 15211 }, { "epoch": 0.44413301801407257, "grad_norm": 0.5324683607930968, "learning_rate": 3.0882400648824006e-05, "loss": 0.6093, "step": 15212 }, { "epoch": 0.44416221424191993, "grad_norm": 0.4973692602860875, "learning_rate": 3.088077858880779e-05, "loss": 0.5847, "step": 15213 }, { "epoch": 0.4441914104697673, "grad_norm": 0.5712896784062687, "learning_rate": 3.087915652879156e-05, "loss": 0.7301, "step": 15214 }, { "epoch": 0.44422060669761465, "grad_norm": 0.5232544415832165, "learning_rate": 3.087753446877535e-05, "loss": 0.6477, "step": 15215 }, { "epoch": 0.444249802925462, "grad_norm": 0.5156012694733746, "learning_rate": 3.0875912408759127e-05, "loss": 0.585, "step": 15216 }, { "epoch": 0.4442789991533094, "grad_norm": 0.5389220369832292, "learning_rate": 3.087429034874291e-05, "loss": 0.6503, "step": 15217 }, { "epoch": 0.44430819538115673, "grad_norm": 0.4750988283916065, "learning_rate": 3.087266828872668e-05, "loss": 0.5403, "step": 15218 }, { "epoch": 0.4443373916090041, "grad_norm": 0.5388635376088832, "learning_rate": 3.0871046228710465e-05, "loss": 0.671, "step": 15219 }, { "epoch": 0.44436658783685146, "grad_norm": 0.5284503462826191, "learning_rate": 3.086942416869425e-05, "loss": 0.583, "step": 15220 }, { "epoch": 0.4443957840646988, "grad_norm": 0.5042108365651505, "learning_rate": 3.086780210867802e-05, "loss": 0.5914, "step": 15221 }, { "epoch": 0.4444249802925462, "grad_norm": 0.5161478286822089, "learning_rate": 3.0866180048661804e-05, "loss": 0.5687, "step": 15222 }, { "epoch": 0.44445417652039354, "grad_norm": 0.4554970217529675, "learning_rate": 3.086455798864558e-05, "loss": 0.4758, "step": 15223 }, { "epoch": 0.4444833727482409, "grad_norm": 0.5675827349077659, "learning_rate": 3.086293592862936e-05, "loss": 0.6644, "step": 15224 }, { "epoch": 0.44451256897608826, "grad_norm": 0.5689946741270359, "learning_rate": 3.086131386861314e-05, "loss": 0.6801, "step": 15225 }, { "epoch": 0.4445417652039356, "grad_norm": 0.5089872994341699, "learning_rate": 3.085969180859692e-05, "loss": 0.601, "step": 15226 }, { "epoch": 0.44457096143178304, "grad_norm": 0.556098117320879, "learning_rate": 3.08580697485807e-05, "loss": 0.6785, "step": 15227 }, { "epoch": 0.4446001576596304, "grad_norm": 0.5503662165260043, "learning_rate": 3.0856447688564474e-05, "loss": 0.6351, "step": 15228 }, { "epoch": 0.44462935388747776, "grad_norm": 0.5177662000622613, "learning_rate": 3.0854825628548256e-05, "loss": 0.6333, "step": 15229 }, { "epoch": 0.4446585501153251, "grad_norm": 0.6168454580194763, "learning_rate": 3.085320356853204e-05, "loss": 0.7857, "step": 15230 }, { "epoch": 0.4446877463431725, "grad_norm": 0.4728112253601349, "learning_rate": 3.085158150851581e-05, "loss": 0.5357, "step": 15231 }, { "epoch": 0.44471694257101985, "grad_norm": 0.5157586986480462, "learning_rate": 3.0849959448499594e-05, "loss": 0.5984, "step": 15232 }, { "epoch": 0.4447461387988672, "grad_norm": 0.5326662668496355, "learning_rate": 3.0848337388483376e-05, "loss": 0.6245, "step": 15233 }, { "epoch": 0.44477533502671457, "grad_norm": 0.4828769991691902, "learning_rate": 3.084671532846716e-05, "loss": 0.5405, "step": 15234 }, { "epoch": 0.44480453125456193, "grad_norm": 0.5024738828245844, "learning_rate": 3.084509326845094e-05, "loss": 0.5625, "step": 15235 }, { "epoch": 0.4448337274824093, "grad_norm": 0.5534039730525919, "learning_rate": 3.0843471208434714e-05, "loss": 0.5939, "step": 15236 }, { "epoch": 0.44486292371025665, "grad_norm": 0.4948499380589109, "learning_rate": 3.0841849148418496e-05, "loss": 0.5841, "step": 15237 }, { "epoch": 0.444892119938104, "grad_norm": 0.5172228017726623, "learning_rate": 3.084022708840227e-05, "loss": 0.588, "step": 15238 }, { "epoch": 0.4449213161659514, "grad_norm": 0.526385371190323, "learning_rate": 3.083860502838605e-05, "loss": 0.5645, "step": 15239 }, { "epoch": 0.44495051239379874, "grad_norm": 0.5334872835019686, "learning_rate": 3.0836982968369835e-05, "loss": 0.6229, "step": 15240 }, { "epoch": 0.4449797086216461, "grad_norm": 0.513871356735103, "learning_rate": 3.083536090835361e-05, "loss": 0.6329, "step": 15241 }, { "epoch": 0.44500890484949346, "grad_norm": 0.5919884507411601, "learning_rate": 3.083373884833739e-05, "loss": 0.72, "step": 15242 }, { "epoch": 0.4450381010773408, "grad_norm": 0.5262284291599796, "learning_rate": 3.0832116788321166e-05, "loss": 0.6058, "step": 15243 }, { "epoch": 0.4450672973051882, "grad_norm": 0.5686243900881902, "learning_rate": 3.083049472830495e-05, "loss": 0.7039, "step": 15244 }, { "epoch": 0.44509649353303554, "grad_norm": 0.5178609690111665, "learning_rate": 3.082887266828873e-05, "loss": 0.5942, "step": 15245 }, { "epoch": 0.4451256897608829, "grad_norm": 0.5320475995239766, "learning_rate": 3.0827250608272505e-05, "loss": 0.6662, "step": 15246 }, { "epoch": 0.44515488598873026, "grad_norm": 0.5490504580633976, "learning_rate": 3.082562854825629e-05, "loss": 0.5942, "step": 15247 }, { "epoch": 0.4451840822165776, "grad_norm": 0.5066256319052471, "learning_rate": 3.082400648824006e-05, "loss": 0.5915, "step": 15248 }, { "epoch": 0.445213278444425, "grad_norm": 0.5626878052655714, "learning_rate": 3.0822384428223843e-05, "loss": 0.671, "step": 15249 }, { "epoch": 0.44524247467227235, "grad_norm": 0.5222809889296898, "learning_rate": 3.0820762368207625e-05, "loss": 0.6047, "step": 15250 }, { "epoch": 0.4452716709001197, "grad_norm": 0.5441620209758903, "learning_rate": 3.08191403081914e-05, "loss": 0.6641, "step": 15251 }, { "epoch": 0.44530086712796707, "grad_norm": 0.5430127913176301, "learning_rate": 3.081751824817518e-05, "loss": 0.6182, "step": 15252 }, { "epoch": 0.44533006335581443, "grad_norm": 0.567432254501602, "learning_rate": 3.0815896188158964e-05, "loss": 0.6612, "step": 15253 }, { "epoch": 0.4453592595836618, "grad_norm": 0.5138868574809563, "learning_rate": 3.0814274128142745e-05, "loss": 0.5847, "step": 15254 }, { "epoch": 0.44538845581150915, "grad_norm": 0.507577575821726, "learning_rate": 3.081265206812653e-05, "loss": 0.5777, "step": 15255 }, { "epoch": 0.4454176520393565, "grad_norm": 0.5238846774326922, "learning_rate": 3.08110300081103e-05, "loss": 0.5998, "step": 15256 }, { "epoch": 0.4454468482672039, "grad_norm": 0.5318364107574165, "learning_rate": 3.0809407948094084e-05, "loss": 0.6232, "step": 15257 }, { "epoch": 0.44547604449505124, "grad_norm": 0.534071193557907, "learning_rate": 3.080778588807786e-05, "loss": 0.6221, "step": 15258 }, { "epoch": 0.4455052407228986, "grad_norm": 0.5538060177677293, "learning_rate": 3.080616382806164e-05, "loss": 0.7137, "step": 15259 }, { "epoch": 0.44553443695074596, "grad_norm": 0.5135073483520827, "learning_rate": 3.080454176804542e-05, "loss": 0.6292, "step": 15260 }, { "epoch": 0.4455636331785933, "grad_norm": 0.509235363797784, "learning_rate": 3.08029197080292e-05, "loss": 0.6242, "step": 15261 }, { "epoch": 0.4455928294064407, "grad_norm": 0.529285920149808, "learning_rate": 3.080129764801298e-05, "loss": 0.6427, "step": 15262 }, { "epoch": 0.44562202563428804, "grad_norm": 0.5365020638524349, "learning_rate": 3.0799675587996754e-05, "loss": 0.6784, "step": 15263 }, { "epoch": 0.4456512218621354, "grad_norm": 0.5272057408740438, "learning_rate": 3.0798053527980536e-05, "loss": 0.6042, "step": 15264 }, { "epoch": 0.44568041808998277, "grad_norm": 0.46200433772624677, "learning_rate": 3.079643146796432e-05, "loss": 0.5055, "step": 15265 }, { "epoch": 0.4457096143178301, "grad_norm": 0.48973589805103396, "learning_rate": 3.079480940794809e-05, "loss": 0.57, "step": 15266 }, { "epoch": 0.4457388105456775, "grad_norm": 0.5152106128494168, "learning_rate": 3.0793187347931874e-05, "loss": 0.5847, "step": 15267 }, { "epoch": 0.44576800677352485, "grad_norm": 0.5152433921262866, "learning_rate": 3.079156528791565e-05, "loss": 0.598, "step": 15268 }, { "epoch": 0.4457972030013722, "grad_norm": 0.5137767272275099, "learning_rate": 3.078994322789943e-05, "loss": 0.6134, "step": 15269 }, { "epoch": 0.44582639922921957, "grad_norm": 0.5291586474725596, "learning_rate": 3.078832116788321e-05, "loss": 0.6412, "step": 15270 }, { "epoch": 0.44585559545706693, "grad_norm": 0.5305380619743509, "learning_rate": 3.0786699107866995e-05, "loss": 0.6442, "step": 15271 }, { "epoch": 0.4458847916849143, "grad_norm": 0.5049906748359905, "learning_rate": 3.0785077047850777e-05, "loss": 0.5756, "step": 15272 }, { "epoch": 0.44591398791276166, "grad_norm": 0.5115592883133534, "learning_rate": 3.078345498783455e-05, "loss": 0.6229, "step": 15273 }, { "epoch": 0.445943184140609, "grad_norm": 0.5215010374181666, "learning_rate": 3.078183292781833e-05, "loss": 0.5839, "step": 15274 }, { "epoch": 0.4459723803684564, "grad_norm": 0.5228737658443199, "learning_rate": 3.0780210867802115e-05, "loss": 0.5674, "step": 15275 }, { "epoch": 0.44600157659630374, "grad_norm": 0.5389427121635397, "learning_rate": 3.077858880778589e-05, "loss": 0.6168, "step": 15276 }, { "epoch": 0.4460307728241511, "grad_norm": 0.549383809361182, "learning_rate": 3.077696674776967e-05, "loss": 0.6615, "step": 15277 }, { "epoch": 0.44605996905199846, "grad_norm": 0.522376257398284, "learning_rate": 3.077534468775345e-05, "loss": 0.5873, "step": 15278 }, { "epoch": 0.4460891652798458, "grad_norm": 0.574332543326171, "learning_rate": 3.077372262773723e-05, "loss": 0.6422, "step": 15279 }, { "epoch": 0.4461183615076932, "grad_norm": 0.5429442118152448, "learning_rate": 3.077210056772101e-05, "loss": 0.5961, "step": 15280 }, { "epoch": 0.44614755773554055, "grad_norm": 0.5670010491625729, "learning_rate": 3.0770478507704785e-05, "loss": 0.6188, "step": 15281 }, { "epoch": 0.4461767539633879, "grad_norm": 0.6209062452107889, "learning_rate": 3.076885644768857e-05, "loss": 0.7761, "step": 15282 }, { "epoch": 0.44620595019123527, "grad_norm": 0.5324943464785062, "learning_rate": 3.076723438767234e-05, "loss": 0.615, "step": 15283 }, { "epoch": 0.44623514641908263, "grad_norm": 0.5792679524190791, "learning_rate": 3.0765612327656124e-05, "loss": 0.7131, "step": 15284 }, { "epoch": 0.44626434264693, "grad_norm": 0.5273473047610099, "learning_rate": 3.0763990267639906e-05, "loss": 0.6355, "step": 15285 }, { "epoch": 0.44629353887477735, "grad_norm": 0.5062361727692086, "learning_rate": 3.076236820762368e-05, "loss": 0.5846, "step": 15286 }, { "epoch": 0.44632273510262477, "grad_norm": 0.553195379053866, "learning_rate": 3.076074614760746e-05, "loss": 0.6688, "step": 15287 }, { "epoch": 0.44635193133047213, "grad_norm": 0.5263449509595781, "learning_rate": 3.075912408759124e-05, "loss": 0.6145, "step": 15288 }, { "epoch": 0.4463811275583195, "grad_norm": 0.5323006485644642, "learning_rate": 3.075750202757502e-05, "loss": 0.6383, "step": 15289 }, { "epoch": 0.44641032378616685, "grad_norm": 0.49505955100908977, "learning_rate": 3.07558799675588e-05, "loss": 0.5885, "step": 15290 }, { "epoch": 0.4464395200140142, "grad_norm": 0.514508690145459, "learning_rate": 3.075425790754258e-05, "loss": 0.5635, "step": 15291 }, { "epoch": 0.4464687162418616, "grad_norm": 0.49998353554080177, "learning_rate": 3.0752635847526364e-05, "loss": 0.5841, "step": 15292 }, { "epoch": 0.44649791246970894, "grad_norm": 0.527257811732165, "learning_rate": 3.075101378751014e-05, "loss": 0.6669, "step": 15293 }, { "epoch": 0.4465271086975563, "grad_norm": 0.5508183381650825, "learning_rate": 3.074939172749392e-05, "loss": 0.6939, "step": 15294 }, { "epoch": 0.44655630492540366, "grad_norm": 0.5246524066215373, "learning_rate": 3.0747769667477696e-05, "loss": 0.6429, "step": 15295 }, { "epoch": 0.446585501153251, "grad_norm": 0.552454898020482, "learning_rate": 3.074614760746148e-05, "loss": 0.6449, "step": 15296 }, { "epoch": 0.4466146973810984, "grad_norm": 0.5782112426868264, "learning_rate": 3.074452554744526e-05, "loss": 0.6905, "step": 15297 }, { "epoch": 0.44664389360894574, "grad_norm": 0.5119915544718779, "learning_rate": 3.0742903487429035e-05, "loss": 0.6024, "step": 15298 }, { "epoch": 0.4466730898367931, "grad_norm": 0.5062065024517376, "learning_rate": 3.0741281427412816e-05, "loss": 0.6073, "step": 15299 }, { "epoch": 0.44670228606464046, "grad_norm": 0.4905470638557246, "learning_rate": 3.07396593673966e-05, "loss": 0.5653, "step": 15300 }, { "epoch": 0.4467314822924878, "grad_norm": 0.49249398548210005, "learning_rate": 3.073803730738037e-05, "loss": 0.5752, "step": 15301 }, { "epoch": 0.4467606785203352, "grad_norm": 0.5113196744759647, "learning_rate": 3.0736415247364155e-05, "loss": 0.5347, "step": 15302 }, { "epoch": 0.44678987474818255, "grad_norm": 0.5356637525832447, "learning_rate": 3.073479318734793e-05, "loss": 0.6223, "step": 15303 }, { "epoch": 0.4468190709760299, "grad_norm": 0.5009636503232429, "learning_rate": 3.073317112733171e-05, "loss": 0.5886, "step": 15304 }, { "epoch": 0.44684826720387727, "grad_norm": 0.5944048264475048, "learning_rate": 3.073154906731549e-05, "loss": 0.7059, "step": 15305 }, { "epoch": 0.44687746343172463, "grad_norm": 0.5392547244030069, "learning_rate": 3.072992700729927e-05, "loss": 0.6104, "step": 15306 }, { "epoch": 0.446906659659572, "grad_norm": 0.5010619135268322, "learning_rate": 3.072830494728305e-05, "loss": 0.5885, "step": 15307 }, { "epoch": 0.44693585588741935, "grad_norm": 0.515163615151494, "learning_rate": 3.0726682887266825e-05, "loss": 0.6463, "step": 15308 }, { "epoch": 0.4469650521152667, "grad_norm": 0.5424633991907393, "learning_rate": 3.0725060827250614e-05, "loss": 0.6857, "step": 15309 }, { "epoch": 0.4469942483431141, "grad_norm": 0.6120965745117639, "learning_rate": 3.072343876723439e-05, "loss": 0.661, "step": 15310 }, { "epoch": 0.44702344457096144, "grad_norm": 0.519064252205571, "learning_rate": 3.072181670721817e-05, "loss": 0.5732, "step": 15311 }, { "epoch": 0.4470526407988088, "grad_norm": 0.5697383699712165, "learning_rate": 3.072019464720195e-05, "loss": 0.7236, "step": 15312 }, { "epoch": 0.44708183702665616, "grad_norm": 0.5196015521863813, "learning_rate": 3.071857258718573e-05, "loss": 0.6269, "step": 15313 }, { "epoch": 0.4471110332545035, "grad_norm": 0.540148578277992, "learning_rate": 3.071695052716951e-05, "loss": 0.6449, "step": 15314 }, { "epoch": 0.4471402294823509, "grad_norm": 0.48482850056574356, "learning_rate": 3.0715328467153284e-05, "loss": 0.531, "step": 15315 }, { "epoch": 0.44716942571019824, "grad_norm": 0.5132838455634441, "learning_rate": 3.0713706407137066e-05, "loss": 0.6057, "step": 15316 }, { "epoch": 0.4471986219380456, "grad_norm": 0.5380662065923454, "learning_rate": 3.071208434712085e-05, "loss": 0.6727, "step": 15317 }, { "epoch": 0.44722781816589297, "grad_norm": 0.5129855925477981, "learning_rate": 3.071046228710462e-05, "loss": 0.5635, "step": 15318 }, { "epoch": 0.4472570143937403, "grad_norm": 0.49245099943323156, "learning_rate": 3.0708840227088404e-05, "loss": 0.5682, "step": 15319 }, { "epoch": 0.4472862106215877, "grad_norm": 0.5151751197468776, "learning_rate": 3.0707218167072186e-05, "loss": 0.5844, "step": 15320 }, { "epoch": 0.44731540684943505, "grad_norm": 0.5055963206446641, "learning_rate": 3.070559610705596e-05, "loss": 0.542, "step": 15321 }, { "epoch": 0.4473446030772824, "grad_norm": 0.47759937676023617, "learning_rate": 3.070397404703974e-05, "loss": 0.5106, "step": 15322 }, { "epoch": 0.44737379930512977, "grad_norm": 0.5470068617559913, "learning_rate": 3.070235198702352e-05, "loss": 0.6328, "step": 15323 }, { "epoch": 0.44740299553297713, "grad_norm": 0.5100394866132208, "learning_rate": 3.07007299270073e-05, "loss": 0.5641, "step": 15324 }, { "epoch": 0.4474321917608245, "grad_norm": 0.5344166829580236, "learning_rate": 3.069910786699108e-05, "loss": 0.6423, "step": 15325 }, { "epoch": 0.44746138798867185, "grad_norm": 0.5134030987662485, "learning_rate": 3.0697485806974856e-05, "loss": 0.5767, "step": 15326 }, { "epoch": 0.4474905842165192, "grad_norm": 0.5303418160049402, "learning_rate": 3.069586374695864e-05, "loss": 0.5868, "step": 15327 }, { "epoch": 0.4475197804443666, "grad_norm": 0.5644104742759191, "learning_rate": 3.069424168694242e-05, "loss": 0.6589, "step": 15328 }, { "epoch": 0.44754897667221394, "grad_norm": 0.525539846001305, "learning_rate": 3.06926196269262e-05, "loss": 0.6496, "step": 15329 }, { "epoch": 0.4475781729000613, "grad_norm": 0.5598045233908067, "learning_rate": 3.0690997566909976e-05, "loss": 0.678, "step": 15330 }, { "epoch": 0.44760736912790866, "grad_norm": 0.5676317393031568, "learning_rate": 3.068937550689376e-05, "loss": 0.66, "step": 15331 }, { "epoch": 0.447636565355756, "grad_norm": 0.5225059506822872, "learning_rate": 3.068775344687754e-05, "loss": 0.58, "step": 15332 }, { "epoch": 0.4476657615836034, "grad_norm": 0.5380262832414731, "learning_rate": 3.0686131386861315e-05, "loss": 0.6246, "step": 15333 }, { "epoch": 0.44769495781145074, "grad_norm": 0.5305762998512032, "learning_rate": 3.06845093268451e-05, "loss": 0.638, "step": 15334 }, { "epoch": 0.4477241540392981, "grad_norm": 0.524176727981259, "learning_rate": 3.068288726682887e-05, "loss": 0.6286, "step": 15335 }, { "epoch": 0.44775335026714547, "grad_norm": 0.556411860789882, "learning_rate": 3.0681265206812653e-05, "loss": 0.642, "step": 15336 }, { "epoch": 0.44778254649499283, "grad_norm": 0.5492140754888002, "learning_rate": 3.0679643146796435e-05, "loss": 0.6472, "step": 15337 }, { "epoch": 0.4478117427228402, "grad_norm": 0.49933045592497666, "learning_rate": 3.067802108678021e-05, "loss": 0.5947, "step": 15338 }, { "epoch": 0.44784093895068755, "grad_norm": 0.5257592206338414, "learning_rate": 3.067639902676399e-05, "loss": 0.67, "step": 15339 }, { "epoch": 0.4478701351785349, "grad_norm": 0.5317622941751188, "learning_rate": 3.067477696674777e-05, "loss": 0.6141, "step": 15340 }, { "epoch": 0.4478993314063823, "grad_norm": 0.5545333911865252, "learning_rate": 3.067315490673155e-05, "loss": 0.686, "step": 15341 }, { "epoch": 0.44792852763422963, "grad_norm": 0.447459436732582, "learning_rate": 3.067153284671533e-05, "loss": 0.4859, "step": 15342 }, { "epoch": 0.447957723862077, "grad_norm": 0.5359984204204884, "learning_rate": 3.0669910786699105e-05, "loss": 0.6279, "step": 15343 }, { "epoch": 0.44798692008992436, "grad_norm": 0.5509270691839058, "learning_rate": 3.066828872668289e-05, "loss": 0.6631, "step": 15344 }, { "epoch": 0.4480161163177717, "grad_norm": 0.5393965633517008, "learning_rate": 3.066666666666667e-05, "loss": 0.6409, "step": 15345 }, { "epoch": 0.4480453125456191, "grad_norm": 0.557068623079643, "learning_rate": 3.0665044606650444e-05, "loss": 0.6655, "step": 15346 }, { "epoch": 0.4480745087734665, "grad_norm": 0.4981152984132322, "learning_rate": 3.066342254663423e-05, "loss": 0.5665, "step": 15347 }, { "epoch": 0.44810370500131386, "grad_norm": 0.5392957207245105, "learning_rate": 3.066180048661801e-05, "loss": 0.6685, "step": 15348 }, { "epoch": 0.4481329012291612, "grad_norm": 0.5044938110502359, "learning_rate": 3.066017842660179e-05, "loss": 0.6026, "step": 15349 }, { "epoch": 0.4481620974570086, "grad_norm": 0.5268780810308881, "learning_rate": 3.0658556366585564e-05, "loss": 0.5893, "step": 15350 }, { "epoch": 0.44819129368485594, "grad_norm": 0.5818452998445796, "learning_rate": 3.0656934306569346e-05, "loss": 0.7101, "step": 15351 }, { "epoch": 0.4482204899127033, "grad_norm": 0.6110981997785407, "learning_rate": 3.065531224655313e-05, "loss": 0.7197, "step": 15352 }, { "epoch": 0.44824968614055066, "grad_norm": 0.5304726476029133, "learning_rate": 3.06536901865369e-05, "loss": 0.6226, "step": 15353 }, { "epoch": 0.448278882368398, "grad_norm": 0.5031050135300581, "learning_rate": 3.0652068126520684e-05, "loss": 0.5656, "step": 15354 }, { "epoch": 0.4483080785962454, "grad_norm": 0.5102390772411332, "learning_rate": 3.065044606650446e-05, "loss": 0.608, "step": 15355 }, { "epoch": 0.44833727482409275, "grad_norm": 0.5228354180362685, "learning_rate": 3.064882400648824e-05, "loss": 0.5827, "step": 15356 }, { "epoch": 0.4483664710519401, "grad_norm": 0.7155752919669672, "learning_rate": 3.064720194647202e-05, "loss": 0.7359, "step": 15357 }, { "epoch": 0.44839566727978747, "grad_norm": 0.5009750577049579, "learning_rate": 3.06455798864558e-05, "loss": 0.5458, "step": 15358 }, { "epoch": 0.44842486350763483, "grad_norm": 0.5100875578004406, "learning_rate": 3.064395782643958e-05, "loss": 0.5685, "step": 15359 }, { "epoch": 0.4484540597354822, "grad_norm": 0.5457398617924356, "learning_rate": 3.0642335766423355e-05, "loss": 0.7029, "step": 15360 }, { "epoch": 0.44848325596332955, "grad_norm": 0.5339198851657359, "learning_rate": 3.0640713706407136e-05, "loss": 0.6055, "step": 15361 }, { "epoch": 0.4485124521911769, "grad_norm": 0.5406451809207592, "learning_rate": 3.063909164639092e-05, "loss": 0.6389, "step": 15362 }, { "epoch": 0.4485416484190243, "grad_norm": 0.5317511941439487, "learning_rate": 3.063746958637469e-05, "loss": 0.6587, "step": 15363 }, { "epoch": 0.44857084464687164, "grad_norm": 0.5127509720191297, "learning_rate": 3.0635847526358475e-05, "loss": 0.6272, "step": 15364 }, { "epoch": 0.448600040874719, "grad_norm": 0.48067994648487194, "learning_rate": 3.063422546634226e-05, "loss": 0.5289, "step": 15365 }, { "epoch": 0.44862923710256636, "grad_norm": 0.5276030106675328, "learning_rate": 3.063260340632604e-05, "loss": 0.6185, "step": 15366 }, { "epoch": 0.4486584333304137, "grad_norm": 0.5220194388273713, "learning_rate": 3.063098134630982e-05, "loss": 0.6487, "step": 15367 }, { "epoch": 0.4486876295582611, "grad_norm": 0.5087519808829453, "learning_rate": 3.0629359286293595e-05, "loss": 0.5718, "step": 15368 }, { "epoch": 0.44871682578610844, "grad_norm": 0.58573072618279, "learning_rate": 3.062773722627738e-05, "loss": 0.6498, "step": 15369 }, { "epoch": 0.4487460220139558, "grad_norm": 0.5728708705460663, "learning_rate": 3.062611516626115e-05, "loss": 0.6271, "step": 15370 }, { "epoch": 0.44877521824180316, "grad_norm": 0.5694168480623976, "learning_rate": 3.0624493106244934e-05, "loss": 0.6425, "step": 15371 }, { "epoch": 0.4488044144696505, "grad_norm": 0.5226973440200068, "learning_rate": 3.0622871046228716e-05, "loss": 0.6228, "step": 15372 }, { "epoch": 0.4488336106974979, "grad_norm": 0.507914165138553, "learning_rate": 3.062124898621249e-05, "loss": 0.6036, "step": 15373 }, { "epoch": 0.44886280692534525, "grad_norm": 0.5301830107522718, "learning_rate": 3.061962692619627e-05, "loss": 0.6133, "step": 15374 }, { "epoch": 0.4488920031531926, "grad_norm": 0.6780919622972609, "learning_rate": 3.061800486618005e-05, "loss": 0.6676, "step": 15375 }, { "epoch": 0.44892119938103997, "grad_norm": 0.5575321504085793, "learning_rate": 3.061638280616383e-05, "loss": 0.6593, "step": 15376 }, { "epoch": 0.44895039560888733, "grad_norm": 0.529716630465558, "learning_rate": 3.061476074614761e-05, "loss": 0.6456, "step": 15377 }, { "epoch": 0.4489795918367347, "grad_norm": 0.5167861642271647, "learning_rate": 3.0613138686131386e-05, "loss": 0.5649, "step": 15378 }, { "epoch": 0.44900878806458205, "grad_norm": 0.516849615313634, "learning_rate": 3.061151662611517e-05, "loss": 0.5612, "step": 15379 }, { "epoch": 0.4490379842924294, "grad_norm": 0.5561815729040389, "learning_rate": 3.060989456609894e-05, "loss": 0.6361, "step": 15380 }, { "epoch": 0.4490671805202768, "grad_norm": 0.5109781598142763, "learning_rate": 3.0608272506082724e-05, "loss": 0.5997, "step": 15381 }, { "epoch": 0.44909637674812414, "grad_norm": 0.5605524440430651, "learning_rate": 3.0606650446066506e-05, "loss": 0.6875, "step": 15382 }, { "epoch": 0.4491255729759715, "grad_norm": 0.5133187351293609, "learning_rate": 3.060502838605028e-05, "loss": 0.5866, "step": 15383 }, { "epoch": 0.44915476920381886, "grad_norm": 0.5173850288759881, "learning_rate": 3.060340632603406e-05, "loss": 0.6008, "step": 15384 }, { "epoch": 0.4491839654316662, "grad_norm": 0.530049883783639, "learning_rate": 3.0601784266017845e-05, "loss": 0.6179, "step": 15385 }, { "epoch": 0.4492131616595136, "grad_norm": 0.5255514450993891, "learning_rate": 3.0600162206001626e-05, "loss": 0.6287, "step": 15386 }, { "epoch": 0.44924235788736094, "grad_norm": 0.5214327354988, "learning_rate": 3.059854014598541e-05, "loss": 0.6115, "step": 15387 }, { "epoch": 0.4492715541152083, "grad_norm": 0.5327870728630082, "learning_rate": 3.059691808596918e-05, "loss": 0.661, "step": 15388 }, { "epoch": 0.44930075034305567, "grad_norm": 0.5070097103091011, "learning_rate": 3.0595296025952965e-05, "loss": 0.5926, "step": 15389 }, { "epoch": 0.449329946570903, "grad_norm": 0.5126230539735008, "learning_rate": 3.059367396593674e-05, "loss": 0.6259, "step": 15390 }, { "epoch": 0.4493591427987504, "grad_norm": 0.572090454613831, "learning_rate": 3.059205190592052e-05, "loss": 0.7011, "step": 15391 }, { "epoch": 0.44938833902659775, "grad_norm": 0.522473755043709, "learning_rate": 3.05904298459043e-05, "loss": 0.6252, "step": 15392 }, { "epoch": 0.4494175352544451, "grad_norm": 0.5178661188579282, "learning_rate": 3.058880778588808e-05, "loss": 0.5827, "step": 15393 }, { "epoch": 0.44944673148229247, "grad_norm": 0.5429696807594423, "learning_rate": 3.058718572587186e-05, "loss": 0.6141, "step": 15394 }, { "epoch": 0.44947592771013983, "grad_norm": 0.49703835209192443, "learning_rate": 3.0585563665855635e-05, "loss": 0.6122, "step": 15395 }, { "epoch": 0.4495051239379872, "grad_norm": 0.532137399567488, "learning_rate": 3.058394160583942e-05, "loss": 0.6454, "step": 15396 }, { "epoch": 0.44953432016583456, "grad_norm": 0.5492847166060517, "learning_rate": 3.05823195458232e-05, "loss": 0.6805, "step": 15397 }, { "epoch": 0.4495635163936819, "grad_norm": 0.623196354676867, "learning_rate": 3.0580697485806974e-05, "loss": 0.6075, "step": 15398 }, { "epoch": 0.4495927126215293, "grad_norm": 0.5045416994511261, "learning_rate": 3.0579075425790755e-05, "loss": 0.5798, "step": 15399 }, { "epoch": 0.44962190884937664, "grad_norm": 0.4889855468047426, "learning_rate": 3.057745336577453e-05, "loss": 0.5675, "step": 15400 }, { "epoch": 0.449651105077224, "grad_norm": 0.5120357403354749, "learning_rate": 3.057583130575831e-05, "loss": 0.5974, "step": 15401 }, { "epoch": 0.44968030130507136, "grad_norm": 0.5102127642530677, "learning_rate": 3.0574209245742094e-05, "loss": 0.5902, "step": 15402 }, { "epoch": 0.4497094975329187, "grad_norm": 0.546700572593655, "learning_rate": 3.057258718572587e-05, "loss": 0.6624, "step": 15403 }, { "epoch": 0.4497386937607661, "grad_norm": 0.5503695592642014, "learning_rate": 3.057096512570966e-05, "loss": 0.649, "step": 15404 }, { "epoch": 0.44976788998861345, "grad_norm": 0.5170713750682268, "learning_rate": 3.056934306569343e-05, "loss": 0.5855, "step": 15405 }, { "epoch": 0.4497970862164608, "grad_norm": 0.5271852994205515, "learning_rate": 3.0567721005677214e-05, "loss": 0.6209, "step": 15406 }, { "epoch": 0.4498262824443082, "grad_norm": 0.5248421861663183, "learning_rate": 3.0566098945660996e-05, "loss": 0.6252, "step": 15407 }, { "epoch": 0.4498554786721556, "grad_norm": 0.5632791640132551, "learning_rate": 3.056447688564477e-05, "loss": 0.7057, "step": 15408 }, { "epoch": 0.44988467490000295, "grad_norm": 0.523222112117083, "learning_rate": 3.056285482562855e-05, "loss": 0.5956, "step": 15409 }, { "epoch": 0.4499138711278503, "grad_norm": 0.5634882625573591, "learning_rate": 3.056123276561233e-05, "loss": 0.6742, "step": 15410 }, { "epoch": 0.44994306735569767, "grad_norm": 0.5070081068918469, "learning_rate": 3.055961070559611e-05, "loss": 0.6275, "step": 15411 }, { "epoch": 0.44997226358354503, "grad_norm": 0.5369149233021752, "learning_rate": 3.055798864557989e-05, "loss": 0.6464, "step": 15412 }, { "epoch": 0.4500014598113924, "grad_norm": 0.4887925168499884, "learning_rate": 3.0556366585563666e-05, "loss": 0.5197, "step": 15413 }, { "epoch": 0.45003065603923975, "grad_norm": 0.5289242353015645, "learning_rate": 3.055474452554745e-05, "loss": 0.6691, "step": 15414 }, { "epoch": 0.4500598522670871, "grad_norm": 0.5271490660299579, "learning_rate": 3.055312246553122e-05, "loss": 0.6824, "step": 15415 }, { "epoch": 0.4500890484949345, "grad_norm": 0.5466415711930599, "learning_rate": 3.0551500405515005e-05, "loss": 0.679, "step": 15416 }, { "epoch": 0.45011824472278184, "grad_norm": 0.5032990132486715, "learning_rate": 3.0549878345498786e-05, "loss": 0.5663, "step": 15417 }, { "epoch": 0.4501474409506292, "grad_norm": 0.5214260480673466, "learning_rate": 3.054825628548256e-05, "loss": 0.6475, "step": 15418 }, { "epoch": 0.45017663717847656, "grad_norm": 0.5545933936659323, "learning_rate": 3.054663422546634e-05, "loss": 0.6365, "step": 15419 }, { "epoch": 0.4502058334063239, "grad_norm": 0.5238051094567109, "learning_rate": 3.054501216545012e-05, "loss": 0.587, "step": 15420 }, { "epoch": 0.4502350296341713, "grad_norm": 0.5194917658245765, "learning_rate": 3.05433901054339e-05, "loss": 0.6442, "step": 15421 }, { "epoch": 0.45026422586201864, "grad_norm": 0.5335891093551055, "learning_rate": 3.054176804541768e-05, "loss": 0.645, "step": 15422 }, { "epoch": 0.450293422089866, "grad_norm": 0.5905284458476682, "learning_rate": 3.0540145985401463e-05, "loss": 0.7667, "step": 15423 }, { "epoch": 0.45032261831771336, "grad_norm": 0.5342664764545341, "learning_rate": 3.0538523925385245e-05, "loss": 0.6337, "step": 15424 }, { "epoch": 0.4503518145455607, "grad_norm": 0.536577630376344, "learning_rate": 3.053690186536902e-05, "loss": 0.6114, "step": 15425 }, { "epoch": 0.4503810107734081, "grad_norm": 0.5215846029952845, "learning_rate": 3.05352798053528e-05, "loss": 0.6028, "step": 15426 }, { "epoch": 0.45041020700125545, "grad_norm": 0.5834486078768314, "learning_rate": 3.053365774533658e-05, "loss": 0.7679, "step": 15427 }, { "epoch": 0.4504394032291028, "grad_norm": 0.5247456524691121, "learning_rate": 3.053203568532036e-05, "loss": 0.5802, "step": 15428 }, { "epoch": 0.45046859945695017, "grad_norm": 0.5152794053940641, "learning_rate": 3.053041362530414e-05, "loss": 0.5782, "step": 15429 }, { "epoch": 0.45049779568479753, "grad_norm": 0.5543165897291699, "learning_rate": 3.0528791565287915e-05, "loss": 0.6777, "step": 15430 }, { "epoch": 0.4505269919126449, "grad_norm": 0.5323886309807123, "learning_rate": 3.05271695052717e-05, "loss": 0.6188, "step": 15431 }, { "epoch": 0.45055618814049225, "grad_norm": 0.5167580549301254, "learning_rate": 3.052554744525548e-05, "loss": 0.5698, "step": 15432 }, { "epoch": 0.4505853843683396, "grad_norm": 0.532870775202875, "learning_rate": 3.0523925385239254e-05, "loss": 0.6337, "step": 15433 }, { "epoch": 0.450614580596187, "grad_norm": 0.5755386505337503, "learning_rate": 3.0522303325223036e-05, "loss": 0.7266, "step": 15434 }, { "epoch": 0.45064377682403434, "grad_norm": 0.5073804178393445, "learning_rate": 3.052068126520681e-05, "loss": 0.5742, "step": 15435 }, { "epoch": 0.4506729730518817, "grad_norm": 0.49767565405876246, "learning_rate": 3.051905920519059e-05, "loss": 0.5698, "step": 15436 }, { "epoch": 0.45070216927972906, "grad_norm": 0.4995853603422714, "learning_rate": 3.051743714517437e-05, "loss": 0.5821, "step": 15437 }, { "epoch": 0.4507313655075764, "grad_norm": 0.5608062241302102, "learning_rate": 3.051581508515815e-05, "loss": 0.657, "step": 15438 }, { "epoch": 0.4507605617354238, "grad_norm": 0.49877833191175747, "learning_rate": 3.051419302514193e-05, "loss": 0.5442, "step": 15439 }, { "epoch": 0.45078975796327114, "grad_norm": 0.5740581015888158, "learning_rate": 3.051257096512571e-05, "loss": 0.5487, "step": 15440 }, { "epoch": 0.4508189541911185, "grad_norm": 0.5150147843572271, "learning_rate": 3.0510948905109494e-05, "loss": 0.564, "step": 15441 }, { "epoch": 0.45084815041896587, "grad_norm": 0.5398179465574028, "learning_rate": 3.0509326845093273e-05, "loss": 0.6655, "step": 15442 }, { "epoch": 0.4508773466468132, "grad_norm": 0.5089839546890845, "learning_rate": 3.050770478507705e-05, "loss": 0.5616, "step": 15443 }, { "epoch": 0.4509065428746606, "grad_norm": 0.5185479456000166, "learning_rate": 3.050608272506083e-05, "loss": 0.5771, "step": 15444 }, { "epoch": 0.45093573910250795, "grad_norm": 0.5085067139464653, "learning_rate": 3.0504460665044608e-05, "loss": 0.5713, "step": 15445 }, { "epoch": 0.4509649353303553, "grad_norm": 0.5113665794487612, "learning_rate": 3.050283860502839e-05, "loss": 0.6096, "step": 15446 }, { "epoch": 0.45099413155820267, "grad_norm": 0.531032038408446, "learning_rate": 3.0501216545012168e-05, "loss": 0.5898, "step": 15447 }, { "epoch": 0.45102332778605003, "grad_norm": 0.5316174868334889, "learning_rate": 3.0499594484995946e-05, "loss": 0.662, "step": 15448 }, { "epoch": 0.4510525240138974, "grad_norm": 0.5244911539671369, "learning_rate": 3.0497972424979725e-05, "loss": 0.6372, "step": 15449 }, { "epoch": 0.45108172024174475, "grad_norm": 0.5229503988094626, "learning_rate": 3.0496350364963507e-05, "loss": 0.6365, "step": 15450 }, { "epoch": 0.4511109164695921, "grad_norm": 0.5195104175660846, "learning_rate": 3.0494728304947285e-05, "loss": 0.5612, "step": 15451 }, { "epoch": 0.4511401126974395, "grad_norm": 0.5073927589921232, "learning_rate": 3.0493106244931063e-05, "loss": 0.621, "step": 15452 }, { "epoch": 0.45116930892528684, "grad_norm": 0.5841248472338354, "learning_rate": 3.0491484184914842e-05, "loss": 0.6848, "step": 15453 }, { "epoch": 0.4511985051531342, "grad_norm": 0.5574361173640557, "learning_rate": 3.048986212489862e-05, "loss": 0.5265, "step": 15454 }, { "epoch": 0.45122770138098156, "grad_norm": 0.5445720139957573, "learning_rate": 3.0488240064882402e-05, "loss": 0.6251, "step": 15455 }, { "epoch": 0.4512568976088289, "grad_norm": 0.5196738579885298, "learning_rate": 3.048661800486618e-05, "loss": 0.6263, "step": 15456 }, { "epoch": 0.4512860938366763, "grad_norm": 0.5385428310635872, "learning_rate": 3.048499594484996e-05, "loss": 0.6708, "step": 15457 }, { "epoch": 0.45131529006452364, "grad_norm": 0.5299570047433425, "learning_rate": 3.0483373884833737e-05, "loss": 0.6441, "step": 15458 }, { "epoch": 0.451344486292371, "grad_norm": 0.6478830875794376, "learning_rate": 3.048175182481752e-05, "loss": 0.7184, "step": 15459 }, { "epoch": 0.45137368252021837, "grad_norm": 0.4976370382740573, "learning_rate": 3.04801297648013e-05, "loss": 0.5853, "step": 15460 }, { "epoch": 0.45140287874806573, "grad_norm": 0.5250076220288671, "learning_rate": 3.0478507704785082e-05, "loss": 0.6267, "step": 15461 }, { "epoch": 0.4514320749759131, "grad_norm": 0.5196626377077719, "learning_rate": 3.047688564476886e-05, "loss": 0.5679, "step": 15462 }, { "epoch": 0.45146127120376045, "grad_norm": 0.4609553486147537, "learning_rate": 3.047526358475264e-05, "loss": 0.4998, "step": 15463 }, { "epoch": 0.4514904674316078, "grad_norm": 0.5672736653487499, "learning_rate": 3.0473641524736417e-05, "loss": 0.7439, "step": 15464 }, { "epoch": 0.4515196636594552, "grad_norm": 0.5234982961760916, "learning_rate": 3.0472019464720196e-05, "loss": 0.5998, "step": 15465 }, { "epoch": 0.45154885988730253, "grad_norm": 0.5420457710428929, "learning_rate": 3.0470397404703978e-05, "loss": 0.5996, "step": 15466 }, { "epoch": 0.4515780561151499, "grad_norm": 0.6051978368835738, "learning_rate": 3.0468775344687756e-05, "loss": 0.6132, "step": 15467 }, { "epoch": 0.4516072523429973, "grad_norm": 0.5105724499299854, "learning_rate": 3.0467153284671534e-05, "loss": 0.5422, "step": 15468 }, { "epoch": 0.4516364485708447, "grad_norm": 0.5482095735363237, "learning_rate": 3.0465531224655313e-05, "loss": 0.6428, "step": 15469 }, { "epoch": 0.45166564479869203, "grad_norm": 0.48945058802441965, "learning_rate": 3.0463909164639094e-05, "loss": 0.5182, "step": 15470 }, { "epoch": 0.4516948410265394, "grad_norm": 0.5182859977344932, "learning_rate": 3.0462287104622873e-05, "loss": 0.6267, "step": 15471 }, { "epoch": 0.45172403725438676, "grad_norm": 0.46296923540688123, "learning_rate": 3.046066504460665e-05, "loss": 0.4867, "step": 15472 }, { "epoch": 0.4517532334822341, "grad_norm": 0.5227096804617146, "learning_rate": 3.045904298459043e-05, "loss": 0.5876, "step": 15473 }, { "epoch": 0.4517824297100815, "grad_norm": 0.5036462784013666, "learning_rate": 3.0457420924574208e-05, "loss": 0.5839, "step": 15474 }, { "epoch": 0.45181162593792884, "grad_norm": 0.5931412245426483, "learning_rate": 3.045579886455799e-05, "loss": 0.7059, "step": 15475 }, { "epoch": 0.4518408221657762, "grad_norm": 0.5233341075112434, "learning_rate": 3.0454176804541768e-05, "loss": 0.643, "step": 15476 }, { "epoch": 0.45187001839362356, "grad_norm": 0.5781411743296042, "learning_rate": 3.0452554744525546e-05, "loss": 0.7318, "step": 15477 }, { "epoch": 0.4518992146214709, "grad_norm": 0.5473992484422446, "learning_rate": 3.0450932684509325e-05, "loss": 0.6783, "step": 15478 }, { "epoch": 0.4519284108493183, "grad_norm": 0.47348141512625747, "learning_rate": 3.044931062449311e-05, "loss": 0.509, "step": 15479 }, { "epoch": 0.45195760707716565, "grad_norm": 0.5174006464720109, "learning_rate": 3.044768856447689e-05, "loss": 0.6623, "step": 15480 }, { "epoch": 0.451986803305013, "grad_norm": 0.47244580276681264, "learning_rate": 3.044606650446067e-05, "loss": 0.549, "step": 15481 }, { "epoch": 0.45201599953286037, "grad_norm": 0.5394930427809916, "learning_rate": 3.044444444444445e-05, "loss": 0.6406, "step": 15482 }, { "epoch": 0.45204519576070773, "grad_norm": 0.5502842627726945, "learning_rate": 3.0442822384428227e-05, "loss": 0.678, "step": 15483 }, { "epoch": 0.4520743919885551, "grad_norm": 0.5066803667627926, "learning_rate": 3.0441200324412005e-05, "loss": 0.5998, "step": 15484 }, { "epoch": 0.45210358821640245, "grad_norm": 0.52977282943177, "learning_rate": 3.0439578264395784e-05, "loss": 0.6318, "step": 15485 }, { "epoch": 0.4521327844442498, "grad_norm": 0.4670675860017815, "learning_rate": 3.0437956204379565e-05, "loss": 0.5281, "step": 15486 }, { "epoch": 0.4521619806720972, "grad_norm": 0.5022241223891857, "learning_rate": 3.0436334144363344e-05, "loss": 0.5785, "step": 15487 }, { "epoch": 0.45219117689994454, "grad_norm": 0.5226077530749723, "learning_rate": 3.0434712084347122e-05, "loss": 0.6058, "step": 15488 }, { "epoch": 0.4522203731277919, "grad_norm": 0.5427733303856391, "learning_rate": 3.04330900243309e-05, "loss": 0.6398, "step": 15489 }, { "epoch": 0.45224956935563926, "grad_norm": 0.48852291085301075, "learning_rate": 3.043146796431468e-05, "loss": 0.5644, "step": 15490 }, { "epoch": 0.4522787655834866, "grad_norm": 0.5092155658517475, "learning_rate": 3.042984590429846e-05, "loss": 0.5344, "step": 15491 }, { "epoch": 0.452307961811334, "grad_norm": 0.5320419034693888, "learning_rate": 3.042822384428224e-05, "loss": 0.5976, "step": 15492 }, { "epoch": 0.45233715803918134, "grad_norm": 0.527353955963784, "learning_rate": 3.0426601784266017e-05, "loss": 0.6365, "step": 15493 }, { "epoch": 0.4523663542670287, "grad_norm": 0.5129944906294049, "learning_rate": 3.0424979724249796e-05, "loss": 0.6043, "step": 15494 }, { "epoch": 0.45239555049487606, "grad_norm": 0.5431276401186549, "learning_rate": 3.0423357664233578e-05, "loss": 0.6044, "step": 15495 }, { "epoch": 0.4524247467227234, "grad_norm": 0.513527145924174, "learning_rate": 3.0421735604217356e-05, "loss": 0.5898, "step": 15496 }, { "epoch": 0.4524539429505708, "grad_norm": 0.5121990614999893, "learning_rate": 3.0420113544201134e-05, "loss": 0.6282, "step": 15497 }, { "epoch": 0.45248313917841815, "grad_norm": 0.4903238906978512, "learning_rate": 3.041849148418492e-05, "loss": 0.5496, "step": 15498 }, { "epoch": 0.4525123354062655, "grad_norm": 0.5700183450882167, "learning_rate": 3.0416869424168698e-05, "loss": 0.6698, "step": 15499 }, { "epoch": 0.45254153163411287, "grad_norm": 0.5552406286563408, "learning_rate": 3.0415247364152476e-05, "loss": 0.6615, "step": 15500 }, { "epoch": 0.45257072786196023, "grad_norm": 0.5892017200525667, "learning_rate": 3.0413625304136255e-05, "loss": 0.6688, "step": 15501 }, { "epoch": 0.4525999240898076, "grad_norm": 0.559005245748083, "learning_rate": 3.0412003244120036e-05, "loss": 0.6547, "step": 15502 }, { "epoch": 0.45262912031765495, "grad_norm": 0.6560938447820021, "learning_rate": 3.0410381184103815e-05, "loss": 0.7506, "step": 15503 }, { "epoch": 0.4526583165455023, "grad_norm": 0.5441240436553912, "learning_rate": 3.0408759124087593e-05, "loss": 0.7242, "step": 15504 }, { "epoch": 0.4526875127733497, "grad_norm": 0.5234771831792766, "learning_rate": 3.040713706407137e-05, "loss": 0.5862, "step": 15505 }, { "epoch": 0.45271670900119704, "grad_norm": 0.5235545503123983, "learning_rate": 3.0405515004055153e-05, "loss": 0.642, "step": 15506 }, { "epoch": 0.4527459052290444, "grad_norm": 0.5128136261557148, "learning_rate": 3.040389294403893e-05, "loss": 0.6005, "step": 15507 }, { "epoch": 0.45277510145689176, "grad_norm": 0.48718741351870115, "learning_rate": 3.040227088402271e-05, "loss": 0.5319, "step": 15508 }, { "epoch": 0.4528042976847391, "grad_norm": 0.5675485604652933, "learning_rate": 3.0400648824006488e-05, "loss": 0.7069, "step": 15509 }, { "epoch": 0.4528334939125865, "grad_norm": 0.5649819956457875, "learning_rate": 3.0399026763990267e-05, "loss": 0.6739, "step": 15510 }, { "epoch": 0.45286269014043384, "grad_norm": 0.5212667813296615, "learning_rate": 3.039740470397405e-05, "loss": 0.6086, "step": 15511 }, { "epoch": 0.4528918863682812, "grad_norm": 0.5395775213877653, "learning_rate": 3.0395782643957827e-05, "loss": 0.6422, "step": 15512 }, { "epoch": 0.45292108259612857, "grad_norm": 0.5346974942631244, "learning_rate": 3.0394160583941605e-05, "loss": 0.6404, "step": 15513 }, { "epoch": 0.4529502788239759, "grad_norm": 0.48313118049291576, "learning_rate": 3.0392538523925384e-05, "loss": 0.5359, "step": 15514 }, { "epoch": 0.4529794750518233, "grad_norm": 0.5316548558805161, "learning_rate": 3.0390916463909165e-05, "loss": 0.5599, "step": 15515 }, { "epoch": 0.45300867127967065, "grad_norm": 0.529845761028966, "learning_rate": 3.0389294403892944e-05, "loss": 0.6529, "step": 15516 }, { "epoch": 0.453037867507518, "grad_norm": 0.5547825691895387, "learning_rate": 3.038767234387673e-05, "loss": 0.6258, "step": 15517 }, { "epoch": 0.45306706373536537, "grad_norm": 0.5255457959484798, "learning_rate": 3.0386050283860507e-05, "loss": 0.6074, "step": 15518 }, { "epoch": 0.45309625996321273, "grad_norm": 0.5174382658786407, "learning_rate": 3.0384428223844286e-05, "loss": 0.627, "step": 15519 }, { "epoch": 0.4531254561910601, "grad_norm": 0.5367186185495402, "learning_rate": 3.0382806163828064e-05, "loss": 0.6269, "step": 15520 }, { "epoch": 0.45315465241890746, "grad_norm": 0.5139968755771513, "learning_rate": 3.0381184103811842e-05, "loss": 0.6162, "step": 15521 }, { "epoch": 0.4531838486467548, "grad_norm": 0.5445126729281686, "learning_rate": 3.0379562043795624e-05, "loss": 0.5859, "step": 15522 }, { "epoch": 0.4532130448746022, "grad_norm": 0.5354403729064591, "learning_rate": 3.0377939983779402e-05, "loss": 0.6092, "step": 15523 }, { "epoch": 0.45324224110244954, "grad_norm": 0.5379652798860072, "learning_rate": 3.037631792376318e-05, "loss": 0.6042, "step": 15524 }, { "epoch": 0.4532714373302969, "grad_norm": 0.5670815491719466, "learning_rate": 3.037469586374696e-05, "loss": 0.6892, "step": 15525 }, { "epoch": 0.45330063355814426, "grad_norm": 0.5144928575445186, "learning_rate": 3.037307380373074e-05, "loss": 0.5751, "step": 15526 }, { "epoch": 0.4533298297859916, "grad_norm": 0.514137263704939, "learning_rate": 3.037145174371452e-05, "loss": 0.6027, "step": 15527 }, { "epoch": 0.45335902601383904, "grad_norm": 0.5481459934765025, "learning_rate": 3.0369829683698298e-05, "loss": 0.6274, "step": 15528 }, { "epoch": 0.4533882222416864, "grad_norm": 0.5306219621904597, "learning_rate": 3.0368207623682076e-05, "loss": 0.6524, "step": 15529 }, { "epoch": 0.45341741846953376, "grad_norm": 0.5456958830973079, "learning_rate": 3.0366585563665854e-05, "loss": 0.6055, "step": 15530 }, { "epoch": 0.4534466146973811, "grad_norm": 0.5429842015418332, "learning_rate": 3.0364963503649636e-05, "loss": 0.6138, "step": 15531 }, { "epoch": 0.4534758109252285, "grad_norm": 0.5650553024583012, "learning_rate": 3.0363341443633415e-05, "loss": 0.6075, "step": 15532 }, { "epoch": 0.45350500715307585, "grad_norm": 0.5790904953597716, "learning_rate": 3.0361719383617193e-05, "loss": 0.7118, "step": 15533 }, { "epoch": 0.4535342033809232, "grad_norm": 0.5100606970874687, "learning_rate": 3.036009732360097e-05, "loss": 0.6257, "step": 15534 }, { "epoch": 0.45356339960877057, "grad_norm": 0.5167847946487693, "learning_rate": 3.035847526358475e-05, "loss": 0.5845, "step": 15535 }, { "epoch": 0.45359259583661793, "grad_norm": 0.49753804582054667, "learning_rate": 3.0356853203568535e-05, "loss": 0.5567, "step": 15536 }, { "epoch": 0.4536217920644653, "grad_norm": 0.48732980929638564, "learning_rate": 3.0355231143552317e-05, "loss": 0.5413, "step": 15537 }, { "epoch": 0.45365098829231265, "grad_norm": 0.6116490048051051, "learning_rate": 3.0353609083536095e-05, "loss": 0.7294, "step": 15538 }, { "epoch": 0.45368018452016, "grad_norm": 0.5747934575607933, "learning_rate": 3.0351987023519873e-05, "loss": 0.7133, "step": 15539 }, { "epoch": 0.4537093807480074, "grad_norm": 0.5914979172944872, "learning_rate": 3.0350364963503652e-05, "loss": 0.683, "step": 15540 }, { "epoch": 0.45373857697585473, "grad_norm": 0.5304845634533116, "learning_rate": 3.034874290348743e-05, "loss": 0.5823, "step": 15541 }, { "epoch": 0.4537677732037021, "grad_norm": 0.5475187142810864, "learning_rate": 3.0347120843471212e-05, "loss": 0.6599, "step": 15542 }, { "epoch": 0.45379696943154946, "grad_norm": 0.5323835377731375, "learning_rate": 3.034549878345499e-05, "loss": 0.6017, "step": 15543 }, { "epoch": 0.4538261656593968, "grad_norm": 0.5283699601489852, "learning_rate": 3.034387672343877e-05, "loss": 0.5962, "step": 15544 }, { "epoch": 0.4538553618872442, "grad_norm": 0.509731135518833, "learning_rate": 3.0342254663422547e-05, "loss": 0.6126, "step": 15545 }, { "epoch": 0.45388455811509154, "grad_norm": 0.4856782013978273, "learning_rate": 3.0340632603406325e-05, "loss": 0.5291, "step": 15546 }, { "epoch": 0.4539137543429389, "grad_norm": 0.5608278014682335, "learning_rate": 3.0339010543390107e-05, "loss": 0.662, "step": 15547 }, { "epoch": 0.45394295057078626, "grad_norm": 0.5348086375078703, "learning_rate": 3.0337388483373886e-05, "loss": 0.661, "step": 15548 }, { "epoch": 0.4539721467986336, "grad_norm": 0.5297068115795366, "learning_rate": 3.0335766423357664e-05, "loss": 0.6037, "step": 15549 }, { "epoch": 0.454001343026481, "grad_norm": 0.5312646765971534, "learning_rate": 3.0334144363341442e-05, "loss": 0.6288, "step": 15550 }, { "epoch": 0.45403053925432835, "grad_norm": 0.517739371856993, "learning_rate": 3.0332522303325224e-05, "loss": 0.6212, "step": 15551 }, { "epoch": 0.4540597354821757, "grad_norm": 0.5415460272588437, "learning_rate": 3.0330900243309002e-05, "loss": 0.6395, "step": 15552 }, { "epoch": 0.45408893171002307, "grad_norm": 0.48987088704660553, "learning_rate": 3.032927818329278e-05, "loss": 0.5462, "step": 15553 }, { "epoch": 0.45411812793787043, "grad_norm": 0.5712650548400993, "learning_rate": 3.032765612327656e-05, "loss": 0.6604, "step": 15554 }, { "epoch": 0.4541473241657178, "grad_norm": 0.5354108441204519, "learning_rate": 3.0326034063260344e-05, "loss": 0.6325, "step": 15555 }, { "epoch": 0.45417652039356515, "grad_norm": 0.5120503274151479, "learning_rate": 3.0324412003244123e-05, "loss": 0.6169, "step": 15556 }, { "epoch": 0.4542057166214125, "grad_norm": 0.5633985424481369, "learning_rate": 3.0322789943227904e-05, "loss": 0.6719, "step": 15557 }, { "epoch": 0.4542349128492599, "grad_norm": 0.5650098037993724, "learning_rate": 3.0321167883211683e-05, "loss": 0.6939, "step": 15558 }, { "epoch": 0.45426410907710724, "grad_norm": 0.5307414758085379, "learning_rate": 3.031954582319546e-05, "loss": 0.636, "step": 15559 }, { "epoch": 0.4542933053049546, "grad_norm": 0.5179227274838097, "learning_rate": 3.031792376317924e-05, "loss": 0.5672, "step": 15560 }, { "epoch": 0.45432250153280196, "grad_norm": 0.49548696845816187, "learning_rate": 3.0316301703163018e-05, "loss": 0.5134, "step": 15561 }, { "epoch": 0.4543516977606493, "grad_norm": 0.5551092413338968, "learning_rate": 3.03146796431468e-05, "loss": 0.7087, "step": 15562 }, { "epoch": 0.4543808939884967, "grad_norm": 0.5076657451285186, "learning_rate": 3.0313057583130578e-05, "loss": 0.5766, "step": 15563 }, { "epoch": 0.45441009021634404, "grad_norm": 0.5617260263301233, "learning_rate": 3.0311435523114356e-05, "loss": 0.6781, "step": 15564 }, { "epoch": 0.4544392864441914, "grad_norm": 0.5269847067671364, "learning_rate": 3.0309813463098135e-05, "loss": 0.5767, "step": 15565 }, { "epoch": 0.45446848267203876, "grad_norm": 0.5612069887888382, "learning_rate": 3.0308191403081913e-05, "loss": 0.6806, "step": 15566 }, { "epoch": 0.4544976788998861, "grad_norm": 0.5776607355575969, "learning_rate": 3.0306569343065695e-05, "loss": 0.7019, "step": 15567 }, { "epoch": 0.4545268751277335, "grad_norm": 0.5166698258340164, "learning_rate": 3.0304947283049473e-05, "loss": 0.5826, "step": 15568 }, { "epoch": 0.45455607135558085, "grad_norm": 0.528508304155178, "learning_rate": 3.0303325223033252e-05, "loss": 0.6451, "step": 15569 }, { "epoch": 0.4545852675834282, "grad_norm": 0.49388108974063966, "learning_rate": 3.030170316301703e-05, "loss": 0.588, "step": 15570 }, { "epoch": 0.45461446381127557, "grad_norm": 0.5260339999136281, "learning_rate": 3.0300081103000812e-05, "loss": 0.6122, "step": 15571 }, { "epoch": 0.45464366003912293, "grad_norm": 0.4813872771054043, "learning_rate": 3.029845904298459e-05, "loss": 0.5321, "step": 15572 }, { "epoch": 0.4546728562669703, "grad_norm": 0.5174710300739169, "learning_rate": 3.0296836982968375e-05, "loss": 0.6221, "step": 15573 }, { "epoch": 0.45470205249481765, "grad_norm": 0.49664396996779464, "learning_rate": 3.0295214922952154e-05, "loss": 0.5602, "step": 15574 }, { "epoch": 0.454731248722665, "grad_norm": 0.5400784501446001, "learning_rate": 3.0293592862935932e-05, "loss": 0.6815, "step": 15575 }, { "epoch": 0.4547604449505124, "grad_norm": 0.48799632391587505, "learning_rate": 3.029197080291971e-05, "loss": 0.535, "step": 15576 }, { "epoch": 0.45478964117835974, "grad_norm": 0.5149435100685733, "learning_rate": 3.029034874290349e-05, "loss": 0.5938, "step": 15577 }, { "epoch": 0.4548188374062071, "grad_norm": 0.5557219996529356, "learning_rate": 3.028872668288727e-05, "loss": 0.6701, "step": 15578 }, { "epoch": 0.45484803363405446, "grad_norm": 0.8196418320708788, "learning_rate": 3.028710462287105e-05, "loss": 0.8246, "step": 15579 }, { "epoch": 0.4548772298619018, "grad_norm": 0.5075528258827712, "learning_rate": 3.0285482562854827e-05, "loss": 0.5383, "step": 15580 }, { "epoch": 0.4549064260897492, "grad_norm": 0.5676342237640191, "learning_rate": 3.0283860502838606e-05, "loss": 0.7177, "step": 15581 }, { "epoch": 0.45493562231759654, "grad_norm": 0.4982918057810108, "learning_rate": 3.0282238442822388e-05, "loss": 0.5832, "step": 15582 }, { "epoch": 0.4549648185454439, "grad_norm": 0.5299253031813659, "learning_rate": 3.0280616382806166e-05, "loss": 0.6363, "step": 15583 }, { "epoch": 0.45499401477329127, "grad_norm": 0.533370791745651, "learning_rate": 3.0278994322789944e-05, "loss": 0.6501, "step": 15584 }, { "epoch": 0.4550232110011386, "grad_norm": 0.5045439083126034, "learning_rate": 3.0277372262773723e-05, "loss": 0.5942, "step": 15585 }, { "epoch": 0.455052407228986, "grad_norm": 0.5816054913162246, "learning_rate": 3.02757502027575e-05, "loss": 0.7622, "step": 15586 }, { "epoch": 0.45508160345683335, "grad_norm": 0.5224363943867751, "learning_rate": 3.0274128142741283e-05, "loss": 0.5675, "step": 15587 }, { "epoch": 0.45511079968468077, "grad_norm": 0.5307778680556288, "learning_rate": 3.027250608272506e-05, "loss": 0.588, "step": 15588 }, { "epoch": 0.45513999591252813, "grad_norm": 0.5395207112152192, "learning_rate": 3.027088402270884e-05, "loss": 0.6068, "step": 15589 }, { "epoch": 0.4551691921403755, "grad_norm": 0.5410989343749959, "learning_rate": 3.0269261962692618e-05, "loss": 0.6343, "step": 15590 }, { "epoch": 0.45519838836822285, "grad_norm": 0.49609495706306345, "learning_rate": 3.0267639902676396e-05, "loss": 0.5995, "step": 15591 }, { "epoch": 0.4552275845960702, "grad_norm": 0.5321467766093433, "learning_rate": 3.026601784266018e-05, "loss": 0.6208, "step": 15592 }, { "epoch": 0.4552567808239176, "grad_norm": 0.5602191328936162, "learning_rate": 3.0264395782643963e-05, "loss": 0.6692, "step": 15593 }, { "epoch": 0.45528597705176493, "grad_norm": 0.4864332415991085, "learning_rate": 3.026277372262774e-05, "loss": 0.531, "step": 15594 }, { "epoch": 0.4553151732796123, "grad_norm": 0.5525189532352498, "learning_rate": 3.026115166261152e-05, "loss": 0.6667, "step": 15595 }, { "epoch": 0.45534436950745966, "grad_norm": 0.47413140945995813, "learning_rate": 3.0259529602595298e-05, "loss": 0.5288, "step": 15596 }, { "epoch": 0.455373565735307, "grad_norm": 0.5335477321948204, "learning_rate": 3.0257907542579077e-05, "loss": 0.6669, "step": 15597 }, { "epoch": 0.4554027619631544, "grad_norm": 0.5192565817318574, "learning_rate": 3.025628548256286e-05, "loss": 0.6058, "step": 15598 }, { "epoch": 0.45543195819100174, "grad_norm": 0.5196515665946981, "learning_rate": 3.0254663422546637e-05, "loss": 0.598, "step": 15599 }, { "epoch": 0.4554611544188491, "grad_norm": 0.5525100802048684, "learning_rate": 3.0253041362530415e-05, "loss": 0.6935, "step": 15600 }, { "epoch": 0.45549035064669646, "grad_norm": 0.5518628069355906, "learning_rate": 3.0251419302514194e-05, "loss": 0.6418, "step": 15601 }, { "epoch": 0.4555195468745438, "grad_norm": 0.5125478985306036, "learning_rate": 3.0249797242497975e-05, "loss": 0.6007, "step": 15602 }, { "epoch": 0.4555487431023912, "grad_norm": 0.5270758845595117, "learning_rate": 3.0248175182481754e-05, "loss": 0.601, "step": 15603 }, { "epoch": 0.45557793933023855, "grad_norm": 0.5899045061268784, "learning_rate": 3.0246553122465532e-05, "loss": 0.6446, "step": 15604 }, { "epoch": 0.4556071355580859, "grad_norm": 0.5377983523010648, "learning_rate": 3.024493106244931e-05, "loss": 0.6183, "step": 15605 }, { "epoch": 0.45563633178593327, "grad_norm": 0.5415293957352315, "learning_rate": 3.024330900243309e-05, "loss": 0.6579, "step": 15606 }, { "epoch": 0.45566552801378063, "grad_norm": 0.5420796317085032, "learning_rate": 3.024168694241687e-05, "loss": 0.6749, "step": 15607 }, { "epoch": 0.455694724241628, "grad_norm": 0.592942661705228, "learning_rate": 3.024006488240065e-05, "loss": 0.6789, "step": 15608 }, { "epoch": 0.45572392046947535, "grad_norm": 0.5399982978921263, "learning_rate": 3.0238442822384427e-05, "loss": 0.6125, "step": 15609 }, { "epoch": 0.4557531166973227, "grad_norm": 0.4879234594324607, "learning_rate": 3.0236820762368206e-05, "loss": 0.5559, "step": 15610 }, { "epoch": 0.4557823129251701, "grad_norm": 0.54436726396283, "learning_rate": 3.023519870235199e-05, "loss": 0.6485, "step": 15611 }, { "epoch": 0.45581150915301744, "grad_norm": 0.5503054845347293, "learning_rate": 3.023357664233577e-05, "loss": 0.6155, "step": 15612 }, { "epoch": 0.4558407053808648, "grad_norm": 0.5467267846974827, "learning_rate": 3.023195458231955e-05, "loss": 0.6454, "step": 15613 }, { "epoch": 0.45586990160871216, "grad_norm": 0.5224949722525315, "learning_rate": 3.023033252230333e-05, "loss": 0.582, "step": 15614 }, { "epoch": 0.4558990978365595, "grad_norm": 0.5537910074572178, "learning_rate": 3.0228710462287108e-05, "loss": 0.6993, "step": 15615 }, { "epoch": 0.4559282940644069, "grad_norm": 0.5403513155052129, "learning_rate": 3.0227088402270886e-05, "loss": 0.6238, "step": 15616 }, { "epoch": 0.45595749029225424, "grad_norm": 0.5468072324985681, "learning_rate": 3.0225466342254664e-05, "loss": 0.6653, "step": 15617 }, { "epoch": 0.4559866865201016, "grad_norm": 0.534643797291919, "learning_rate": 3.0223844282238446e-05, "loss": 0.6074, "step": 15618 }, { "epoch": 0.45601588274794896, "grad_norm": 0.57917448428429, "learning_rate": 3.0222222222222225e-05, "loss": 0.7208, "step": 15619 }, { "epoch": 0.4560450789757963, "grad_norm": 0.5310564445901832, "learning_rate": 3.0220600162206003e-05, "loss": 0.6226, "step": 15620 }, { "epoch": 0.4560742752036437, "grad_norm": 0.5116036599592027, "learning_rate": 3.021897810218978e-05, "loss": 0.5582, "step": 15621 }, { "epoch": 0.45610347143149105, "grad_norm": 0.5372298100964462, "learning_rate": 3.021735604217356e-05, "loss": 0.5744, "step": 15622 }, { "epoch": 0.4561326676593384, "grad_norm": 0.5304555843660966, "learning_rate": 3.021573398215734e-05, "loss": 0.6224, "step": 15623 }, { "epoch": 0.45616186388718577, "grad_norm": 0.5496852642782912, "learning_rate": 3.021411192214112e-05, "loss": 0.6436, "step": 15624 }, { "epoch": 0.45619106011503313, "grad_norm": 0.5314917814587742, "learning_rate": 3.0212489862124898e-05, "loss": 0.6414, "step": 15625 }, { "epoch": 0.4562202563428805, "grad_norm": 0.486581326949464, "learning_rate": 3.0210867802108677e-05, "loss": 0.5471, "step": 15626 }, { "epoch": 0.45624945257072785, "grad_norm": 0.5085364220352624, "learning_rate": 3.020924574209246e-05, "loss": 0.6314, "step": 15627 }, { "epoch": 0.4562786487985752, "grad_norm": 0.5353162230837183, "learning_rate": 3.0207623682076237e-05, "loss": 0.6642, "step": 15628 }, { "epoch": 0.4563078450264226, "grad_norm": 0.5493911395845478, "learning_rate": 3.0206001622060015e-05, "loss": 0.6258, "step": 15629 }, { "epoch": 0.45633704125426994, "grad_norm": 0.5515336928746604, "learning_rate": 3.02043795620438e-05, "loss": 0.6132, "step": 15630 }, { "epoch": 0.4563662374821173, "grad_norm": 0.5466423846694718, "learning_rate": 3.020275750202758e-05, "loss": 0.6682, "step": 15631 }, { "epoch": 0.45639543370996466, "grad_norm": 0.5010076034621627, "learning_rate": 3.0201135442011357e-05, "loss": 0.5855, "step": 15632 }, { "epoch": 0.456424629937812, "grad_norm": 0.5317261996664898, "learning_rate": 3.0199513381995135e-05, "loss": 0.5791, "step": 15633 }, { "epoch": 0.4564538261656594, "grad_norm": 0.496589286374361, "learning_rate": 3.0197891321978917e-05, "loss": 0.5486, "step": 15634 }, { "epoch": 0.45648302239350674, "grad_norm": 0.5851942135822165, "learning_rate": 3.0196269261962696e-05, "loss": 0.7345, "step": 15635 }, { "epoch": 0.4565122186213541, "grad_norm": 0.526571763568227, "learning_rate": 3.0194647201946474e-05, "loss": 0.6358, "step": 15636 }, { "epoch": 0.45654141484920147, "grad_norm": 0.5203755243882505, "learning_rate": 3.0193025141930252e-05, "loss": 0.6108, "step": 15637 }, { "epoch": 0.4565706110770488, "grad_norm": 0.4968224496684354, "learning_rate": 3.0191403081914034e-05, "loss": 0.5672, "step": 15638 }, { "epoch": 0.4565998073048962, "grad_norm": 0.4838715808397207, "learning_rate": 3.0189781021897812e-05, "loss": 0.5494, "step": 15639 }, { "epoch": 0.45662900353274355, "grad_norm": 0.5589114953408627, "learning_rate": 3.018815896188159e-05, "loss": 0.6968, "step": 15640 }, { "epoch": 0.4566581997605909, "grad_norm": 0.5185506128181167, "learning_rate": 3.018653690186537e-05, "loss": 0.6, "step": 15641 }, { "epoch": 0.45668739598843827, "grad_norm": 0.5017727101223036, "learning_rate": 3.0184914841849148e-05, "loss": 0.5973, "step": 15642 }, { "epoch": 0.45671659221628563, "grad_norm": 0.5084520827371859, "learning_rate": 3.018329278183293e-05, "loss": 0.6003, "step": 15643 }, { "epoch": 0.456745788444133, "grad_norm": 0.5839172731035592, "learning_rate": 3.0181670721816708e-05, "loss": 0.7143, "step": 15644 }, { "epoch": 0.45677498467198036, "grad_norm": 0.49702372898028796, "learning_rate": 3.0180048661800486e-05, "loss": 0.5517, "step": 15645 }, { "epoch": 0.4568041808998277, "grad_norm": 0.5228676979849122, "learning_rate": 3.0178426601784264e-05, "loss": 0.5761, "step": 15646 }, { "epoch": 0.4568333771276751, "grad_norm": 0.5238227210695284, "learning_rate": 3.0176804541768046e-05, "loss": 0.618, "step": 15647 }, { "epoch": 0.45686257335552244, "grad_norm": 0.48737782120901013, "learning_rate": 3.0175182481751825e-05, "loss": 0.5352, "step": 15648 }, { "epoch": 0.45689176958336986, "grad_norm": 0.5990757464306752, "learning_rate": 3.017356042173561e-05, "loss": 0.6152, "step": 15649 }, { "epoch": 0.4569209658112172, "grad_norm": 0.5172405372664317, "learning_rate": 3.0171938361719388e-05, "loss": 0.5836, "step": 15650 }, { "epoch": 0.4569501620390646, "grad_norm": 0.52865923250044, "learning_rate": 3.0170316301703166e-05, "loss": 0.6249, "step": 15651 }, { "epoch": 0.45697935826691194, "grad_norm": 0.5084243141193376, "learning_rate": 3.0168694241686945e-05, "loss": 0.5714, "step": 15652 }, { "epoch": 0.4570085544947593, "grad_norm": 0.5181951079855684, "learning_rate": 3.0167072181670723e-05, "loss": 0.532, "step": 15653 }, { "epoch": 0.45703775072260666, "grad_norm": 0.5203802107622905, "learning_rate": 3.0165450121654505e-05, "loss": 0.632, "step": 15654 }, { "epoch": 0.457066946950454, "grad_norm": 0.5645881472075933, "learning_rate": 3.0163828061638283e-05, "loss": 0.6716, "step": 15655 }, { "epoch": 0.4570961431783014, "grad_norm": 0.5813276180211397, "learning_rate": 3.0162206001622062e-05, "loss": 0.7831, "step": 15656 }, { "epoch": 0.45712533940614875, "grad_norm": 0.5528742777808319, "learning_rate": 3.016058394160584e-05, "loss": 0.6205, "step": 15657 }, { "epoch": 0.4571545356339961, "grad_norm": 0.4899959833036137, "learning_rate": 3.0158961881589622e-05, "loss": 0.5577, "step": 15658 }, { "epoch": 0.45718373186184347, "grad_norm": 0.5282637966149727, "learning_rate": 3.01573398215734e-05, "loss": 0.5946, "step": 15659 }, { "epoch": 0.45721292808969083, "grad_norm": 0.5245363502677285, "learning_rate": 3.015571776155718e-05, "loss": 0.6093, "step": 15660 }, { "epoch": 0.4572421243175382, "grad_norm": 0.5175463539733127, "learning_rate": 3.0154095701540957e-05, "loss": 0.574, "step": 15661 }, { "epoch": 0.45727132054538555, "grad_norm": 0.5150448734792966, "learning_rate": 3.0152473641524735e-05, "loss": 0.6075, "step": 15662 }, { "epoch": 0.4573005167732329, "grad_norm": 0.5171830883855792, "learning_rate": 3.0150851581508517e-05, "loss": 0.5745, "step": 15663 }, { "epoch": 0.4573297130010803, "grad_norm": 0.5032558737229978, "learning_rate": 3.0149229521492295e-05, "loss": 0.5651, "step": 15664 }, { "epoch": 0.45735890922892763, "grad_norm": 0.5356605150301499, "learning_rate": 3.0147607461476074e-05, "loss": 0.6192, "step": 15665 }, { "epoch": 0.457388105456775, "grad_norm": 0.5510984943110471, "learning_rate": 3.0145985401459852e-05, "loss": 0.679, "step": 15666 }, { "epoch": 0.45741730168462236, "grad_norm": 0.530278483347089, "learning_rate": 3.014436334144363e-05, "loss": 0.63, "step": 15667 }, { "epoch": 0.4574464979124697, "grad_norm": 0.5658986403392106, "learning_rate": 3.0142741281427416e-05, "loss": 0.7247, "step": 15668 }, { "epoch": 0.4574756941403171, "grad_norm": 0.5256112659622056, "learning_rate": 3.0141119221411198e-05, "loss": 0.5958, "step": 15669 }, { "epoch": 0.45750489036816444, "grad_norm": 0.47124034812173216, "learning_rate": 3.0139497161394976e-05, "loss": 0.5324, "step": 15670 }, { "epoch": 0.4575340865960118, "grad_norm": 0.5152171394356552, "learning_rate": 3.0137875101378754e-05, "loss": 0.6288, "step": 15671 }, { "epoch": 0.45756328282385916, "grad_norm": 0.47135269470671587, "learning_rate": 3.0136253041362533e-05, "loss": 0.5264, "step": 15672 }, { "epoch": 0.4575924790517065, "grad_norm": 0.5754590463063673, "learning_rate": 3.013463098134631e-05, "loss": 0.6614, "step": 15673 }, { "epoch": 0.4576216752795539, "grad_norm": 0.49776312387388716, "learning_rate": 3.0133008921330093e-05, "loss": 0.5743, "step": 15674 }, { "epoch": 0.45765087150740125, "grad_norm": 0.4962078015341578, "learning_rate": 3.013138686131387e-05, "loss": 0.5499, "step": 15675 }, { "epoch": 0.4576800677352486, "grad_norm": 0.5156328033306319, "learning_rate": 3.012976480129765e-05, "loss": 0.5613, "step": 15676 }, { "epoch": 0.45770926396309597, "grad_norm": 0.5073087700193508, "learning_rate": 3.0128142741281428e-05, "loss": 0.6076, "step": 15677 }, { "epoch": 0.45773846019094333, "grad_norm": 0.5059240515820926, "learning_rate": 3.0126520681265206e-05, "loss": 0.5756, "step": 15678 }, { "epoch": 0.4577676564187907, "grad_norm": 0.5225403129036169, "learning_rate": 3.0124898621248988e-05, "loss": 0.5993, "step": 15679 }, { "epoch": 0.45779685264663805, "grad_norm": 0.536399665393151, "learning_rate": 3.0123276561232766e-05, "loss": 0.628, "step": 15680 }, { "epoch": 0.4578260488744854, "grad_norm": 0.4860979390185846, "learning_rate": 3.0121654501216545e-05, "loss": 0.5074, "step": 15681 }, { "epoch": 0.4578552451023328, "grad_norm": 0.4993071187041998, "learning_rate": 3.0120032441200323e-05, "loss": 0.5807, "step": 15682 }, { "epoch": 0.45788444133018014, "grad_norm": 0.5488897361238378, "learning_rate": 3.0118410381184105e-05, "loss": 0.6268, "step": 15683 }, { "epoch": 0.4579136375580275, "grad_norm": 0.5203119183191188, "learning_rate": 3.0116788321167883e-05, "loss": 0.6371, "step": 15684 }, { "epoch": 0.45794283378587486, "grad_norm": 0.5477680215373109, "learning_rate": 3.011516626115166e-05, "loss": 0.6291, "step": 15685 }, { "epoch": 0.4579720300137222, "grad_norm": 0.5478316278155546, "learning_rate": 3.011354420113544e-05, "loss": 0.6469, "step": 15686 }, { "epoch": 0.4580012262415696, "grad_norm": 0.558579600841784, "learning_rate": 3.0111922141119225e-05, "loss": 0.6495, "step": 15687 }, { "epoch": 0.45803042246941694, "grad_norm": 0.5039639206952748, "learning_rate": 3.0110300081103004e-05, "loss": 0.5575, "step": 15688 }, { "epoch": 0.4580596186972643, "grad_norm": 0.49028432967953667, "learning_rate": 3.0108678021086782e-05, "loss": 0.5612, "step": 15689 }, { "epoch": 0.45808881492511166, "grad_norm": 0.5178771510625475, "learning_rate": 3.0107055961070564e-05, "loss": 0.5741, "step": 15690 }, { "epoch": 0.458118011152959, "grad_norm": 0.5025922937458763, "learning_rate": 3.0105433901054342e-05, "loss": 0.565, "step": 15691 }, { "epoch": 0.4581472073808064, "grad_norm": 0.5074882760411643, "learning_rate": 3.010381184103812e-05, "loss": 0.5733, "step": 15692 }, { "epoch": 0.45817640360865375, "grad_norm": 0.5059301618710488, "learning_rate": 3.01021897810219e-05, "loss": 0.5972, "step": 15693 }, { "epoch": 0.4582055998365011, "grad_norm": 0.5311442944443728, "learning_rate": 3.010056772100568e-05, "loss": 0.6727, "step": 15694 }, { "epoch": 0.45823479606434847, "grad_norm": 0.5600136802485629, "learning_rate": 3.009894566098946e-05, "loss": 0.7039, "step": 15695 }, { "epoch": 0.45826399229219583, "grad_norm": 0.5797827644183389, "learning_rate": 3.0097323600973237e-05, "loss": 0.6863, "step": 15696 }, { "epoch": 0.4582931885200432, "grad_norm": 0.5454884035609324, "learning_rate": 3.0095701540957016e-05, "loss": 0.677, "step": 15697 }, { "epoch": 0.45832238474789055, "grad_norm": 0.5429911702878002, "learning_rate": 3.0094079480940794e-05, "loss": 0.6277, "step": 15698 }, { "epoch": 0.4583515809757379, "grad_norm": 0.5501869559115404, "learning_rate": 3.0092457420924576e-05, "loss": 0.6727, "step": 15699 }, { "epoch": 0.4583807772035853, "grad_norm": 0.5460359267850403, "learning_rate": 3.0090835360908354e-05, "loss": 0.6343, "step": 15700 }, { "epoch": 0.45840997343143264, "grad_norm": 0.5118773170023221, "learning_rate": 3.0089213300892133e-05, "loss": 0.6253, "step": 15701 }, { "epoch": 0.45843916965928, "grad_norm": 0.5104152783691339, "learning_rate": 3.008759124087591e-05, "loss": 0.5843, "step": 15702 }, { "epoch": 0.45846836588712736, "grad_norm": 0.5044754711220512, "learning_rate": 3.0085969180859693e-05, "loss": 0.5793, "step": 15703 }, { "epoch": 0.4584975621149747, "grad_norm": 0.48966484192349197, "learning_rate": 3.008434712084347e-05, "loss": 0.5391, "step": 15704 }, { "epoch": 0.4585267583428221, "grad_norm": 0.5300044796976745, "learning_rate": 3.008272506082725e-05, "loss": 0.6412, "step": 15705 }, { "epoch": 0.45855595457066944, "grad_norm": 0.4835424380283631, "learning_rate": 3.0081103000811035e-05, "loss": 0.5109, "step": 15706 }, { "epoch": 0.4585851507985168, "grad_norm": 0.5779564204301059, "learning_rate": 3.0079480940794813e-05, "loss": 0.6836, "step": 15707 }, { "epoch": 0.45861434702636417, "grad_norm": 0.4852913536165042, "learning_rate": 3.007785888077859e-05, "loss": 0.5478, "step": 15708 }, { "epoch": 0.4586435432542116, "grad_norm": 0.5299102997867329, "learning_rate": 3.007623682076237e-05, "loss": 0.6038, "step": 15709 }, { "epoch": 0.45867273948205894, "grad_norm": 0.5504738710297977, "learning_rate": 3.007461476074615e-05, "loss": 0.6761, "step": 15710 }, { "epoch": 0.4587019357099063, "grad_norm": 0.4702763399331356, "learning_rate": 3.007299270072993e-05, "loss": 0.5049, "step": 15711 }, { "epoch": 0.45873113193775367, "grad_norm": 0.5526064108808475, "learning_rate": 3.0071370640713708e-05, "loss": 0.6673, "step": 15712 }, { "epoch": 0.45876032816560103, "grad_norm": 0.5505414292463097, "learning_rate": 3.0069748580697487e-05, "loss": 0.6458, "step": 15713 }, { "epoch": 0.4587895243934484, "grad_norm": 0.5260188684631116, "learning_rate": 3.006812652068127e-05, "loss": 0.6327, "step": 15714 }, { "epoch": 0.45881872062129575, "grad_norm": 0.552293129599663, "learning_rate": 3.0066504460665047e-05, "loss": 0.6879, "step": 15715 }, { "epoch": 0.4588479168491431, "grad_norm": 0.625802997713798, "learning_rate": 3.0064882400648825e-05, "loss": 0.7033, "step": 15716 }, { "epoch": 0.4588771130769905, "grad_norm": 0.5284841161400005, "learning_rate": 3.0063260340632604e-05, "loss": 0.6302, "step": 15717 }, { "epoch": 0.45890630930483783, "grad_norm": 0.5355460774939196, "learning_rate": 3.0061638280616382e-05, "loss": 0.6611, "step": 15718 }, { "epoch": 0.4589355055326852, "grad_norm": 0.5486857067276973, "learning_rate": 3.0060016220600164e-05, "loss": 0.6957, "step": 15719 }, { "epoch": 0.45896470176053256, "grad_norm": 0.5321657812546401, "learning_rate": 3.0058394160583942e-05, "loss": 0.6089, "step": 15720 }, { "epoch": 0.4589938979883799, "grad_norm": 0.5557498888648638, "learning_rate": 3.005677210056772e-05, "loss": 0.6695, "step": 15721 }, { "epoch": 0.4590230942162273, "grad_norm": 0.5809135356882685, "learning_rate": 3.00551500405515e-05, "loss": 0.722, "step": 15722 }, { "epoch": 0.45905229044407464, "grad_norm": 0.49456578990662003, "learning_rate": 3.0053527980535277e-05, "loss": 0.5693, "step": 15723 }, { "epoch": 0.459081486671922, "grad_norm": 0.5259781101778763, "learning_rate": 3.0051905920519062e-05, "loss": 0.6203, "step": 15724 }, { "epoch": 0.45911068289976936, "grad_norm": 0.5004577793300542, "learning_rate": 3.0050283860502844e-05, "loss": 0.5833, "step": 15725 }, { "epoch": 0.4591398791276167, "grad_norm": 0.5111889151738889, "learning_rate": 3.0048661800486622e-05, "loss": 0.5576, "step": 15726 }, { "epoch": 0.4591690753554641, "grad_norm": 0.5388078947560941, "learning_rate": 3.00470397404704e-05, "loss": 0.6439, "step": 15727 }, { "epoch": 0.45919827158331145, "grad_norm": 0.5232937752898776, "learning_rate": 3.004541768045418e-05, "loss": 0.6367, "step": 15728 }, { "epoch": 0.4592274678111588, "grad_norm": 0.506092280682668, "learning_rate": 3.0043795620437958e-05, "loss": 0.5788, "step": 15729 }, { "epoch": 0.45925666403900617, "grad_norm": 0.5074603014717217, "learning_rate": 3.004217356042174e-05, "loss": 0.5705, "step": 15730 }, { "epoch": 0.45928586026685353, "grad_norm": 0.486686837499276, "learning_rate": 3.0040551500405518e-05, "loss": 0.5723, "step": 15731 }, { "epoch": 0.4593150564947009, "grad_norm": 0.5700706018940822, "learning_rate": 3.0038929440389296e-05, "loss": 0.695, "step": 15732 }, { "epoch": 0.45934425272254825, "grad_norm": 0.4953898145841567, "learning_rate": 3.0037307380373074e-05, "loss": 0.5568, "step": 15733 }, { "epoch": 0.4593734489503956, "grad_norm": 0.48029757900433884, "learning_rate": 3.0035685320356853e-05, "loss": 0.5435, "step": 15734 }, { "epoch": 0.459402645178243, "grad_norm": 0.5780712845221097, "learning_rate": 3.0034063260340635e-05, "loss": 0.683, "step": 15735 }, { "epoch": 0.45943184140609034, "grad_norm": 0.5641273794923168, "learning_rate": 3.0032441200324413e-05, "loss": 0.7332, "step": 15736 }, { "epoch": 0.4594610376339377, "grad_norm": 0.5412256553033471, "learning_rate": 3.003081914030819e-05, "loss": 0.6498, "step": 15737 }, { "epoch": 0.45949023386178506, "grad_norm": 0.5271142296882091, "learning_rate": 3.002919708029197e-05, "loss": 0.6041, "step": 15738 }, { "epoch": 0.4595194300896324, "grad_norm": 0.5427958417177, "learning_rate": 3.002757502027575e-05, "loss": 0.619, "step": 15739 }, { "epoch": 0.4595486263174798, "grad_norm": 0.5030588168622012, "learning_rate": 3.002595296025953e-05, "loss": 0.5656, "step": 15740 }, { "epoch": 0.45957782254532714, "grad_norm": 0.49559589944295523, "learning_rate": 3.0024330900243308e-05, "loss": 0.6007, "step": 15741 }, { "epoch": 0.4596070187731745, "grad_norm": 0.5916819189417529, "learning_rate": 3.0022708840227087e-05, "loss": 0.7348, "step": 15742 }, { "epoch": 0.45963621500102186, "grad_norm": 0.4917762520725499, "learning_rate": 3.0021086780210872e-05, "loss": 0.5518, "step": 15743 }, { "epoch": 0.4596654112288692, "grad_norm": 0.5207798858653356, "learning_rate": 3.001946472019465e-05, "loss": 0.6069, "step": 15744 }, { "epoch": 0.4596946074567166, "grad_norm": 0.5143867159000869, "learning_rate": 3.001784266017843e-05, "loss": 0.5913, "step": 15745 }, { "epoch": 0.45972380368456395, "grad_norm": 0.5175288935271357, "learning_rate": 3.001622060016221e-05, "loss": 0.5926, "step": 15746 }, { "epoch": 0.4597529999124113, "grad_norm": 0.5357609175592172, "learning_rate": 3.001459854014599e-05, "loss": 0.5983, "step": 15747 }, { "epoch": 0.45978219614025867, "grad_norm": 0.5584937667214757, "learning_rate": 3.0012976480129767e-05, "loss": 0.6507, "step": 15748 }, { "epoch": 0.45981139236810603, "grad_norm": 0.5312130606216005, "learning_rate": 3.0011354420113545e-05, "loss": 0.5945, "step": 15749 }, { "epoch": 0.4598405885959534, "grad_norm": 0.5176896130145427, "learning_rate": 3.0009732360097327e-05, "loss": 0.6174, "step": 15750 }, { "epoch": 0.45986978482380075, "grad_norm": 0.5423964360030898, "learning_rate": 3.0008110300081106e-05, "loss": 0.6364, "step": 15751 }, { "epoch": 0.4598989810516481, "grad_norm": 0.5449512495544504, "learning_rate": 3.0006488240064884e-05, "loss": 0.665, "step": 15752 }, { "epoch": 0.4599281772794955, "grad_norm": 0.5690837904642633, "learning_rate": 3.0004866180048662e-05, "loss": 0.7209, "step": 15753 }, { "epoch": 0.45995737350734284, "grad_norm": 0.5090371107971952, "learning_rate": 3.000324412003244e-05, "loss": 0.6197, "step": 15754 }, { "epoch": 0.4599865697351902, "grad_norm": 0.5429406350729835, "learning_rate": 3.0001622060016222e-05, "loss": 0.6765, "step": 15755 }, { "epoch": 0.46001576596303756, "grad_norm": 0.5793292282935911, "learning_rate": 3e-05, "loss": 0.6907, "step": 15756 }, { "epoch": 0.4600449621908849, "grad_norm": 0.4817370285130709, "learning_rate": 2.999837793998378e-05, "loss": 0.5444, "step": 15757 }, { "epoch": 0.4600741584187323, "grad_norm": 0.5543187433014936, "learning_rate": 2.9996755879967558e-05, "loss": 0.6225, "step": 15758 }, { "epoch": 0.46010335464657964, "grad_norm": 0.5614858764646675, "learning_rate": 2.999513381995134e-05, "loss": 0.6533, "step": 15759 }, { "epoch": 0.460132550874427, "grad_norm": 0.5665540971893893, "learning_rate": 2.9993511759935118e-05, "loss": 0.6908, "step": 15760 }, { "epoch": 0.46016174710227437, "grad_norm": 0.5038499044465456, "learning_rate": 2.9991889699918896e-05, "loss": 0.5875, "step": 15761 }, { "epoch": 0.4601909433301217, "grad_norm": 0.5270771206274649, "learning_rate": 2.999026763990268e-05, "loss": 0.6072, "step": 15762 }, { "epoch": 0.4602201395579691, "grad_norm": 0.5333886468657424, "learning_rate": 2.998864557988646e-05, "loss": 0.6169, "step": 15763 }, { "epoch": 0.46024933578581645, "grad_norm": 0.597646827230608, "learning_rate": 2.9987023519870238e-05, "loss": 0.6817, "step": 15764 }, { "epoch": 0.4602785320136638, "grad_norm": 0.49360175966462067, "learning_rate": 2.9985401459854016e-05, "loss": 0.5732, "step": 15765 }, { "epoch": 0.46030772824151117, "grad_norm": 0.4917475045276409, "learning_rate": 2.9983779399837798e-05, "loss": 0.5785, "step": 15766 }, { "epoch": 0.46033692446935853, "grad_norm": 0.5130301047724339, "learning_rate": 2.9982157339821576e-05, "loss": 0.613, "step": 15767 }, { "epoch": 0.4603661206972059, "grad_norm": 0.5254928152411832, "learning_rate": 2.9980535279805355e-05, "loss": 0.612, "step": 15768 }, { "epoch": 0.4603953169250533, "grad_norm": 0.5565833168993732, "learning_rate": 2.9978913219789133e-05, "loss": 0.6094, "step": 15769 }, { "epoch": 0.46042451315290067, "grad_norm": 0.5100563837817231, "learning_rate": 2.9977291159772915e-05, "loss": 0.6108, "step": 15770 }, { "epoch": 0.46045370938074803, "grad_norm": 0.5471604754939935, "learning_rate": 2.9975669099756693e-05, "loss": 0.6459, "step": 15771 }, { "epoch": 0.4604829056085954, "grad_norm": 0.5042770035780602, "learning_rate": 2.997404703974047e-05, "loss": 0.549, "step": 15772 }, { "epoch": 0.46051210183644276, "grad_norm": 0.5682042076535024, "learning_rate": 2.997242497972425e-05, "loss": 0.6875, "step": 15773 }, { "epoch": 0.4605412980642901, "grad_norm": 0.5075471865758143, "learning_rate": 2.997080291970803e-05, "loss": 0.634, "step": 15774 }, { "epoch": 0.4605704942921375, "grad_norm": 0.5177912693599502, "learning_rate": 2.996918085969181e-05, "loss": 0.5568, "step": 15775 }, { "epoch": 0.46059969051998484, "grad_norm": 0.5270303469058469, "learning_rate": 2.996755879967559e-05, "loss": 0.6539, "step": 15776 }, { "epoch": 0.4606288867478322, "grad_norm": 0.5104799339250227, "learning_rate": 2.9965936739659367e-05, "loss": 0.5494, "step": 15777 }, { "epoch": 0.46065808297567956, "grad_norm": 0.5276685764350723, "learning_rate": 2.9964314679643145e-05, "loss": 0.5907, "step": 15778 }, { "epoch": 0.4606872792035269, "grad_norm": 0.5320873897279783, "learning_rate": 2.9962692619626924e-05, "loss": 0.6155, "step": 15779 }, { "epoch": 0.4607164754313743, "grad_norm": 0.5247674000533339, "learning_rate": 2.9961070559610705e-05, "loss": 0.6611, "step": 15780 }, { "epoch": 0.46074567165922165, "grad_norm": 0.6081845311301102, "learning_rate": 2.995944849959449e-05, "loss": 0.7454, "step": 15781 }, { "epoch": 0.460774867887069, "grad_norm": 0.5305650351353154, "learning_rate": 2.995782643957827e-05, "loss": 0.5929, "step": 15782 }, { "epoch": 0.46080406411491637, "grad_norm": 0.5647090091316285, "learning_rate": 2.9956204379562047e-05, "loss": 0.6873, "step": 15783 }, { "epoch": 0.46083326034276373, "grad_norm": 0.5370381941291772, "learning_rate": 2.9954582319545826e-05, "loss": 0.6118, "step": 15784 }, { "epoch": 0.4608624565706111, "grad_norm": 0.5473322531144696, "learning_rate": 2.9952960259529604e-05, "loss": 0.6571, "step": 15785 }, { "epoch": 0.46089165279845845, "grad_norm": 0.5607683611150441, "learning_rate": 2.9951338199513386e-05, "loss": 0.6777, "step": 15786 }, { "epoch": 0.4609208490263058, "grad_norm": 0.5467736224721803, "learning_rate": 2.9949716139497164e-05, "loss": 0.657, "step": 15787 }, { "epoch": 0.4609500452541532, "grad_norm": 0.5134115435425011, "learning_rate": 2.9948094079480943e-05, "loss": 0.607, "step": 15788 }, { "epoch": 0.46097924148200053, "grad_norm": 0.5044804941702917, "learning_rate": 2.994647201946472e-05, "loss": 0.5863, "step": 15789 }, { "epoch": 0.4610084377098479, "grad_norm": 0.5095632374972391, "learning_rate": 2.99448499594485e-05, "loss": 0.5933, "step": 15790 }, { "epoch": 0.46103763393769526, "grad_norm": 0.5373409288238796, "learning_rate": 2.994322789943228e-05, "loss": 0.6413, "step": 15791 }, { "epoch": 0.4610668301655426, "grad_norm": 0.5373040335555008, "learning_rate": 2.994160583941606e-05, "loss": 0.6285, "step": 15792 }, { "epoch": 0.46109602639339, "grad_norm": 0.4961796349687235, "learning_rate": 2.9939983779399838e-05, "loss": 0.5386, "step": 15793 }, { "epoch": 0.46112522262123734, "grad_norm": 0.46830652345241763, "learning_rate": 2.9938361719383616e-05, "loss": 0.5125, "step": 15794 }, { "epoch": 0.4611544188490847, "grad_norm": 0.5101046140763812, "learning_rate": 2.9936739659367398e-05, "loss": 0.5938, "step": 15795 }, { "epoch": 0.46118361507693206, "grad_norm": 0.507544781083971, "learning_rate": 2.9935117599351176e-05, "loss": 0.6382, "step": 15796 }, { "epoch": 0.4612128113047794, "grad_norm": 0.5223092311771629, "learning_rate": 2.9933495539334955e-05, "loss": 0.5854, "step": 15797 }, { "epoch": 0.4612420075326268, "grad_norm": 0.46865610269727, "learning_rate": 2.9931873479318733e-05, "loss": 0.4888, "step": 15798 }, { "epoch": 0.46127120376047415, "grad_norm": 0.5608627111050373, "learning_rate": 2.993025141930251e-05, "loss": 0.6523, "step": 15799 }, { "epoch": 0.4613003999883215, "grad_norm": 0.52664141163575, "learning_rate": 2.9928629359286297e-05, "loss": 0.6451, "step": 15800 }, { "epoch": 0.46132959621616887, "grad_norm": 0.5460668362926584, "learning_rate": 2.992700729927008e-05, "loss": 0.6427, "step": 15801 }, { "epoch": 0.46135879244401623, "grad_norm": 0.5410084566021428, "learning_rate": 2.9925385239253857e-05, "loss": 0.6523, "step": 15802 }, { "epoch": 0.4613879886718636, "grad_norm": 0.520675964821137, "learning_rate": 2.9923763179237635e-05, "loss": 0.6079, "step": 15803 }, { "epoch": 0.46141718489971095, "grad_norm": 0.5145147839359143, "learning_rate": 2.9922141119221414e-05, "loss": 0.6362, "step": 15804 }, { "epoch": 0.4614463811275583, "grad_norm": 0.5319476983544298, "learning_rate": 2.9920519059205192e-05, "loss": 0.6232, "step": 15805 }, { "epoch": 0.4614755773554057, "grad_norm": 0.5886559186659468, "learning_rate": 2.9918896999188974e-05, "loss": 0.7069, "step": 15806 }, { "epoch": 0.46150477358325304, "grad_norm": 0.5789069981565712, "learning_rate": 2.9917274939172752e-05, "loss": 0.7011, "step": 15807 }, { "epoch": 0.4615339698111004, "grad_norm": 0.5352489358194323, "learning_rate": 2.991565287915653e-05, "loss": 0.6501, "step": 15808 }, { "epoch": 0.46156316603894776, "grad_norm": 0.4742263360758669, "learning_rate": 2.991403081914031e-05, "loss": 0.5129, "step": 15809 }, { "epoch": 0.4615923622667951, "grad_norm": 0.516444284724228, "learning_rate": 2.9912408759124087e-05, "loss": 0.6018, "step": 15810 }, { "epoch": 0.4616215584946425, "grad_norm": 0.5030287917145162, "learning_rate": 2.991078669910787e-05, "loss": 0.5482, "step": 15811 }, { "epoch": 0.46165075472248984, "grad_norm": 0.5798489043370956, "learning_rate": 2.9909164639091647e-05, "loss": 0.6692, "step": 15812 }, { "epoch": 0.4616799509503372, "grad_norm": 0.5177914831025582, "learning_rate": 2.9907542579075426e-05, "loss": 0.6233, "step": 15813 }, { "epoch": 0.46170914717818456, "grad_norm": 0.47639230726286125, "learning_rate": 2.9905920519059204e-05, "loss": 0.5293, "step": 15814 }, { "epoch": 0.4617383434060319, "grad_norm": 0.5140606656756058, "learning_rate": 2.9904298459042986e-05, "loss": 0.5827, "step": 15815 }, { "epoch": 0.4617675396338793, "grad_norm": 0.5895084991438709, "learning_rate": 2.9902676399026764e-05, "loss": 0.6924, "step": 15816 }, { "epoch": 0.46179673586172665, "grad_norm": 0.556856119816582, "learning_rate": 2.9901054339010543e-05, "loss": 0.6757, "step": 15817 }, { "epoch": 0.461825932089574, "grad_norm": 0.5344550640695691, "learning_rate": 2.989943227899432e-05, "loss": 0.6238, "step": 15818 }, { "epoch": 0.46185512831742137, "grad_norm": 0.515662830816122, "learning_rate": 2.9897810218978106e-05, "loss": 0.6266, "step": 15819 }, { "epoch": 0.46188432454526873, "grad_norm": 0.5519262978972507, "learning_rate": 2.9896188158961884e-05, "loss": 0.6437, "step": 15820 }, { "epoch": 0.4619135207731161, "grad_norm": 0.5554298357262984, "learning_rate": 2.9894566098945663e-05, "loss": 0.6629, "step": 15821 }, { "epoch": 0.46194271700096345, "grad_norm": 0.49274666861434213, "learning_rate": 2.9892944038929445e-05, "loss": 0.5411, "step": 15822 }, { "epoch": 0.4619719132288108, "grad_norm": 0.4822607827674387, "learning_rate": 2.9891321978913223e-05, "loss": 0.4913, "step": 15823 }, { "epoch": 0.4620011094566582, "grad_norm": 0.5175371015438466, "learning_rate": 2.9889699918897e-05, "loss": 0.5722, "step": 15824 }, { "epoch": 0.46203030568450554, "grad_norm": 0.5042015513596927, "learning_rate": 2.988807785888078e-05, "loss": 0.5694, "step": 15825 }, { "epoch": 0.4620595019123529, "grad_norm": 0.568724411297082, "learning_rate": 2.988645579886456e-05, "loss": 0.7116, "step": 15826 }, { "epoch": 0.46208869814020026, "grad_norm": 0.506927933143792, "learning_rate": 2.988483373884834e-05, "loss": 0.5829, "step": 15827 }, { "epoch": 0.4621178943680476, "grad_norm": 0.5256299334241887, "learning_rate": 2.9883211678832118e-05, "loss": 0.5976, "step": 15828 }, { "epoch": 0.462147090595895, "grad_norm": 0.5532261649802672, "learning_rate": 2.9881589618815897e-05, "loss": 0.6809, "step": 15829 }, { "epoch": 0.4621762868237424, "grad_norm": 0.5692131367125858, "learning_rate": 2.9879967558799675e-05, "loss": 0.6673, "step": 15830 }, { "epoch": 0.46220548305158976, "grad_norm": 0.48261653515345354, "learning_rate": 2.9878345498783457e-05, "loss": 0.5656, "step": 15831 }, { "epoch": 0.4622346792794371, "grad_norm": 0.5555943259999151, "learning_rate": 2.9876723438767235e-05, "loss": 0.6653, "step": 15832 }, { "epoch": 0.4622638755072845, "grad_norm": 0.5023081877635093, "learning_rate": 2.9875101378751013e-05, "loss": 0.5972, "step": 15833 }, { "epoch": 0.46229307173513184, "grad_norm": 0.49380872136296267, "learning_rate": 2.9873479318734792e-05, "loss": 0.5332, "step": 15834 }, { "epoch": 0.4623222679629792, "grad_norm": 0.5085876535702207, "learning_rate": 2.987185725871857e-05, "loss": 0.5766, "step": 15835 }, { "epoch": 0.46235146419082657, "grad_norm": 0.49867601538674655, "learning_rate": 2.9870235198702352e-05, "loss": 0.5457, "step": 15836 }, { "epoch": 0.4623806604186739, "grad_norm": 0.5421334406882962, "learning_rate": 2.986861313868613e-05, "loss": 0.5923, "step": 15837 }, { "epoch": 0.4624098566465213, "grad_norm": 0.5352305969960525, "learning_rate": 2.9866991078669916e-05, "loss": 0.6252, "step": 15838 }, { "epoch": 0.46243905287436865, "grad_norm": 0.5312608210610894, "learning_rate": 2.9865369018653694e-05, "loss": 0.6082, "step": 15839 }, { "epoch": 0.462468249102216, "grad_norm": 0.4910683048782961, "learning_rate": 2.9863746958637472e-05, "loss": 0.5784, "step": 15840 }, { "epoch": 0.4624974453300634, "grad_norm": 0.5232176097693758, "learning_rate": 2.986212489862125e-05, "loss": 0.645, "step": 15841 }, { "epoch": 0.46252664155791073, "grad_norm": 0.5159899668800115, "learning_rate": 2.9860502838605032e-05, "loss": 0.6029, "step": 15842 }, { "epoch": 0.4625558377857581, "grad_norm": 0.5076909034591871, "learning_rate": 2.985888077858881e-05, "loss": 0.5924, "step": 15843 }, { "epoch": 0.46258503401360546, "grad_norm": 0.535558861304832, "learning_rate": 2.985725871857259e-05, "loss": 0.5746, "step": 15844 }, { "epoch": 0.4626142302414528, "grad_norm": 0.491222709220287, "learning_rate": 2.9855636658556368e-05, "loss": 0.5395, "step": 15845 }, { "epoch": 0.4626434264693002, "grad_norm": 0.5014477292355807, "learning_rate": 2.985401459854015e-05, "loss": 0.576, "step": 15846 }, { "epoch": 0.46267262269714754, "grad_norm": 0.5715505435987155, "learning_rate": 2.9852392538523928e-05, "loss": 0.6739, "step": 15847 }, { "epoch": 0.4627018189249949, "grad_norm": 0.528440151522762, "learning_rate": 2.9850770478507706e-05, "loss": 0.6083, "step": 15848 }, { "epoch": 0.46273101515284226, "grad_norm": 0.5173301338059777, "learning_rate": 2.9849148418491484e-05, "loss": 0.5834, "step": 15849 }, { "epoch": 0.4627602113806896, "grad_norm": 0.46764145323682643, "learning_rate": 2.9847526358475263e-05, "loss": 0.4818, "step": 15850 }, { "epoch": 0.462789407608537, "grad_norm": 0.5705774966136332, "learning_rate": 2.9845904298459045e-05, "loss": 0.6534, "step": 15851 }, { "epoch": 0.46281860383638435, "grad_norm": 0.5215714175386503, "learning_rate": 2.9844282238442823e-05, "loss": 0.6023, "step": 15852 }, { "epoch": 0.4628478000642317, "grad_norm": 0.4803937072575653, "learning_rate": 2.98426601784266e-05, "loss": 0.5685, "step": 15853 }, { "epoch": 0.46287699629207907, "grad_norm": 0.49734416970063117, "learning_rate": 2.984103811841038e-05, "loss": 0.5394, "step": 15854 }, { "epoch": 0.46290619251992643, "grad_norm": 0.5030998168356152, "learning_rate": 2.9839416058394158e-05, "loss": 0.6137, "step": 15855 }, { "epoch": 0.4629353887477738, "grad_norm": 0.5354503578488479, "learning_rate": 2.983779399837794e-05, "loss": 0.5963, "step": 15856 }, { "epoch": 0.46296458497562115, "grad_norm": 0.4721516204022741, "learning_rate": 2.9836171938361725e-05, "loss": 0.561, "step": 15857 }, { "epoch": 0.4629937812034685, "grad_norm": 0.5421126905187192, "learning_rate": 2.9834549878345503e-05, "loss": 0.6055, "step": 15858 }, { "epoch": 0.4630229774313159, "grad_norm": 0.5087357225897233, "learning_rate": 2.9832927818329282e-05, "loss": 0.569, "step": 15859 }, { "epoch": 0.46305217365916324, "grad_norm": 0.522517054366462, "learning_rate": 2.983130575831306e-05, "loss": 0.6458, "step": 15860 }, { "epoch": 0.4630813698870106, "grad_norm": 0.5206227376831774, "learning_rate": 2.982968369829684e-05, "loss": 0.6611, "step": 15861 }, { "epoch": 0.46311056611485796, "grad_norm": 0.5122298880779272, "learning_rate": 2.982806163828062e-05, "loss": 0.6168, "step": 15862 }, { "epoch": 0.4631397623427053, "grad_norm": 0.5473323712334418, "learning_rate": 2.98264395782644e-05, "loss": 0.6405, "step": 15863 }, { "epoch": 0.4631689585705527, "grad_norm": 0.5326229407934935, "learning_rate": 2.9824817518248177e-05, "loss": 0.6539, "step": 15864 }, { "epoch": 0.46319815479840004, "grad_norm": 0.5473745492154263, "learning_rate": 2.9823195458231955e-05, "loss": 0.6011, "step": 15865 }, { "epoch": 0.4632273510262474, "grad_norm": 0.5723360107844255, "learning_rate": 2.9821573398215734e-05, "loss": 0.7051, "step": 15866 }, { "epoch": 0.46325654725409476, "grad_norm": 0.48700984776720707, "learning_rate": 2.9819951338199515e-05, "loss": 0.5815, "step": 15867 }, { "epoch": 0.4632857434819421, "grad_norm": 0.52689294678879, "learning_rate": 2.9818329278183294e-05, "loss": 0.5864, "step": 15868 }, { "epoch": 0.4633149397097895, "grad_norm": 0.5511614781784737, "learning_rate": 2.9816707218167072e-05, "loss": 0.6906, "step": 15869 }, { "epoch": 0.46334413593763685, "grad_norm": 0.47995863748958156, "learning_rate": 2.981508515815085e-05, "loss": 0.5409, "step": 15870 }, { "epoch": 0.4633733321654842, "grad_norm": 0.5440058023241405, "learning_rate": 2.9813463098134632e-05, "loss": 0.6187, "step": 15871 }, { "epoch": 0.46340252839333157, "grad_norm": 0.5422803692471058, "learning_rate": 2.981184103811841e-05, "loss": 0.6062, "step": 15872 }, { "epoch": 0.46343172462117893, "grad_norm": 0.5329442199101826, "learning_rate": 2.981021897810219e-05, "loss": 0.6211, "step": 15873 }, { "epoch": 0.4634609208490263, "grad_norm": 0.5314385313380168, "learning_rate": 2.9808596918085967e-05, "loss": 0.6551, "step": 15874 }, { "epoch": 0.46349011707687365, "grad_norm": 0.5847108424356225, "learning_rate": 2.9806974858069753e-05, "loss": 0.7212, "step": 15875 }, { "epoch": 0.463519313304721, "grad_norm": 0.5270389946363239, "learning_rate": 2.980535279805353e-05, "loss": 0.5805, "step": 15876 }, { "epoch": 0.4635485095325684, "grad_norm": 0.5497577204040947, "learning_rate": 2.980373073803731e-05, "loss": 0.6285, "step": 15877 }, { "epoch": 0.46357770576041574, "grad_norm": 0.527222353955496, "learning_rate": 2.980210867802109e-05, "loss": 0.6182, "step": 15878 }, { "epoch": 0.4636069019882631, "grad_norm": 0.5428516902831954, "learning_rate": 2.980048661800487e-05, "loss": 0.6182, "step": 15879 }, { "epoch": 0.46363609821611046, "grad_norm": 0.47906516911660924, "learning_rate": 2.9798864557988648e-05, "loss": 0.5405, "step": 15880 }, { "epoch": 0.4636652944439578, "grad_norm": 0.5098006292205626, "learning_rate": 2.9797242497972426e-05, "loss": 0.6299, "step": 15881 }, { "epoch": 0.4636944906718052, "grad_norm": 0.5331464942629895, "learning_rate": 2.9795620437956208e-05, "loss": 0.6313, "step": 15882 }, { "epoch": 0.46372368689965254, "grad_norm": 0.4771589601141102, "learning_rate": 2.9793998377939986e-05, "loss": 0.4948, "step": 15883 }, { "epoch": 0.4637528831274999, "grad_norm": 0.5364137692998535, "learning_rate": 2.9792376317923765e-05, "loss": 0.6185, "step": 15884 }, { "epoch": 0.46378207935534727, "grad_norm": 0.5118644581883062, "learning_rate": 2.9790754257907543e-05, "loss": 0.5641, "step": 15885 }, { "epoch": 0.4638112755831946, "grad_norm": 0.528528507582244, "learning_rate": 2.978913219789132e-05, "loss": 0.604, "step": 15886 }, { "epoch": 0.463840471811042, "grad_norm": 0.5347386954178049, "learning_rate": 2.9787510137875103e-05, "loss": 0.6198, "step": 15887 }, { "epoch": 0.46386966803888935, "grad_norm": 0.6761381405723231, "learning_rate": 2.978588807785888e-05, "loss": 0.6578, "step": 15888 }, { "epoch": 0.4638988642667367, "grad_norm": 0.5265649114424498, "learning_rate": 2.978426601784266e-05, "loss": 0.6072, "step": 15889 }, { "epoch": 0.4639280604945841, "grad_norm": 0.5354589534098756, "learning_rate": 2.978264395782644e-05, "loss": 0.6259, "step": 15890 }, { "epoch": 0.4639572567224315, "grad_norm": 0.5018442345301236, "learning_rate": 2.9781021897810217e-05, "loss": 0.5752, "step": 15891 }, { "epoch": 0.46398645295027885, "grad_norm": 0.509138796067253, "learning_rate": 2.9779399837794e-05, "loss": 0.582, "step": 15892 }, { "epoch": 0.4640156491781262, "grad_norm": 0.5840841653585603, "learning_rate": 2.9777777777777777e-05, "loss": 0.6575, "step": 15893 }, { "epoch": 0.46404484540597357, "grad_norm": 0.47934714121453537, "learning_rate": 2.9776155717761562e-05, "loss": 0.5272, "step": 15894 }, { "epoch": 0.46407404163382093, "grad_norm": 0.573966173102201, "learning_rate": 2.977453365774534e-05, "loss": 0.6886, "step": 15895 }, { "epoch": 0.4641032378616683, "grad_norm": 0.5060652751433509, "learning_rate": 2.977291159772912e-05, "loss": 0.5849, "step": 15896 }, { "epoch": 0.46413243408951566, "grad_norm": 0.5127004148708587, "learning_rate": 2.9771289537712897e-05, "loss": 0.5559, "step": 15897 }, { "epoch": 0.464161630317363, "grad_norm": 0.5792837012436851, "learning_rate": 2.976966747769668e-05, "loss": 0.7601, "step": 15898 }, { "epoch": 0.4641908265452104, "grad_norm": 0.5350047098424173, "learning_rate": 2.9768045417680457e-05, "loss": 0.6602, "step": 15899 }, { "epoch": 0.46422002277305774, "grad_norm": 0.5331773757625332, "learning_rate": 2.9766423357664236e-05, "loss": 0.6732, "step": 15900 }, { "epoch": 0.4642492190009051, "grad_norm": 0.51917595381452, "learning_rate": 2.9764801297648014e-05, "loss": 0.5783, "step": 15901 }, { "epoch": 0.46427841522875246, "grad_norm": 0.47197110112877677, "learning_rate": 2.9763179237631796e-05, "loss": 0.5038, "step": 15902 }, { "epoch": 0.4643076114565998, "grad_norm": 0.5383268396049223, "learning_rate": 2.9761557177615574e-05, "loss": 0.6089, "step": 15903 }, { "epoch": 0.4643368076844472, "grad_norm": 0.5089391247996452, "learning_rate": 2.9759935117599353e-05, "loss": 0.5922, "step": 15904 }, { "epoch": 0.46436600391229454, "grad_norm": 0.531854674262867, "learning_rate": 2.975831305758313e-05, "loss": 0.6475, "step": 15905 }, { "epoch": 0.4643952001401419, "grad_norm": 0.5518958786622541, "learning_rate": 2.975669099756691e-05, "loss": 0.6573, "step": 15906 }, { "epoch": 0.46442439636798927, "grad_norm": 0.5225398278110881, "learning_rate": 2.975506893755069e-05, "loss": 0.5974, "step": 15907 }, { "epoch": 0.46445359259583663, "grad_norm": 0.5439683720427073, "learning_rate": 2.975344687753447e-05, "loss": 0.6401, "step": 15908 }, { "epoch": 0.464482788823684, "grad_norm": 0.5640538212064758, "learning_rate": 2.9751824817518248e-05, "loss": 0.7211, "step": 15909 }, { "epoch": 0.46451198505153135, "grad_norm": 0.48995116441425246, "learning_rate": 2.9750202757502026e-05, "loss": 0.5173, "step": 15910 }, { "epoch": 0.4645411812793787, "grad_norm": 0.5909297456382089, "learning_rate": 2.9748580697485805e-05, "loss": 0.6928, "step": 15911 }, { "epoch": 0.4645703775072261, "grad_norm": 0.5297483611589, "learning_rate": 2.9746958637469586e-05, "loss": 0.6152, "step": 15912 }, { "epoch": 0.46459957373507343, "grad_norm": 0.5047209168742687, "learning_rate": 2.974533657745337e-05, "loss": 0.5918, "step": 15913 }, { "epoch": 0.4646287699629208, "grad_norm": 0.5023927234827338, "learning_rate": 2.974371451743715e-05, "loss": 0.5736, "step": 15914 }, { "epoch": 0.46465796619076816, "grad_norm": 0.526151864395717, "learning_rate": 2.9742092457420928e-05, "loss": 0.6089, "step": 15915 }, { "epoch": 0.4646871624186155, "grad_norm": 0.5746465368961834, "learning_rate": 2.9740470397404707e-05, "loss": 0.6811, "step": 15916 }, { "epoch": 0.4647163586464629, "grad_norm": 0.5260044634687757, "learning_rate": 2.9738848337388485e-05, "loss": 0.5873, "step": 15917 }, { "epoch": 0.46474555487431024, "grad_norm": 0.5641231017017349, "learning_rate": 2.9737226277372267e-05, "loss": 0.6568, "step": 15918 }, { "epoch": 0.4647747511021576, "grad_norm": 0.5131442859415645, "learning_rate": 2.9735604217356045e-05, "loss": 0.5823, "step": 15919 }, { "epoch": 0.46480394733000496, "grad_norm": 0.4936658746757925, "learning_rate": 2.9733982157339823e-05, "loss": 0.544, "step": 15920 }, { "epoch": 0.4648331435578523, "grad_norm": 0.5160896013056742, "learning_rate": 2.9732360097323602e-05, "loss": 0.5849, "step": 15921 }, { "epoch": 0.4648623397856997, "grad_norm": 0.5712227606603336, "learning_rate": 2.973073803730738e-05, "loss": 0.6569, "step": 15922 }, { "epoch": 0.46489153601354705, "grad_norm": 0.5183297121357962, "learning_rate": 2.9729115977291162e-05, "loss": 0.6149, "step": 15923 }, { "epoch": 0.4649207322413944, "grad_norm": 0.5169484031170257, "learning_rate": 2.972749391727494e-05, "loss": 0.6348, "step": 15924 }, { "epoch": 0.46494992846924177, "grad_norm": 0.5597558604832945, "learning_rate": 2.972587185725872e-05, "loss": 0.6192, "step": 15925 }, { "epoch": 0.46497912469708913, "grad_norm": 0.5063375131353475, "learning_rate": 2.9724249797242497e-05, "loss": 0.5513, "step": 15926 }, { "epoch": 0.4650083209249365, "grad_norm": 0.5014344801299528, "learning_rate": 2.972262773722628e-05, "loss": 0.5359, "step": 15927 }, { "epoch": 0.46503751715278385, "grad_norm": 0.5473455937992001, "learning_rate": 2.9721005677210057e-05, "loss": 0.6214, "step": 15928 }, { "epoch": 0.4650667133806312, "grad_norm": 0.5843492978562577, "learning_rate": 2.9719383617193836e-05, "loss": 0.7518, "step": 15929 }, { "epoch": 0.4650959096084786, "grad_norm": 0.5297245724564773, "learning_rate": 2.9717761557177614e-05, "loss": 0.6753, "step": 15930 }, { "epoch": 0.46512510583632594, "grad_norm": 0.4703685273995269, "learning_rate": 2.9716139497161392e-05, "loss": 0.5458, "step": 15931 }, { "epoch": 0.4651543020641733, "grad_norm": 0.5084382752112644, "learning_rate": 2.9714517437145178e-05, "loss": 0.626, "step": 15932 }, { "epoch": 0.46518349829202066, "grad_norm": 0.49875795254084326, "learning_rate": 2.9712895377128956e-05, "loss": 0.5482, "step": 15933 }, { "epoch": 0.465212694519868, "grad_norm": 0.5218526644133913, "learning_rate": 2.9711273317112738e-05, "loss": 0.6179, "step": 15934 }, { "epoch": 0.4652418907477154, "grad_norm": 0.5127335593934231, "learning_rate": 2.9709651257096516e-05, "loss": 0.5962, "step": 15935 }, { "epoch": 0.46527108697556274, "grad_norm": 0.5422519899539793, "learning_rate": 2.9708029197080294e-05, "loss": 0.5984, "step": 15936 }, { "epoch": 0.4653002832034101, "grad_norm": 0.630327828397738, "learning_rate": 2.9706407137064073e-05, "loss": 0.5778, "step": 15937 }, { "epoch": 0.46532947943125746, "grad_norm": 0.5183227996241598, "learning_rate": 2.9704785077047855e-05, "loss": 0.6458, "step": 15938 }, { "epoch": 0.4653586756591048, "grad_norm": 0.49268991689900565, "learning_rate": 2.9703163017031633e-05, "loss": 0.5774, "step": 15939 }, { "epoch": 0.4653878718869522, "grad_norm": 0.5564230435109531, "learning_rate": 2.970154095701541e-05, "loss": 0.6862, "step": 15940 }, { "epoch": 0.46541706811479955, "grad_norm": 0.5503955478044221, "learning_rate": 2.969991889699919e-05, "loss": 0.7034, "step": 15941 }, { "epoch": 0.4654462643426469, "grad_norm": 0.4964528191869275, "learning_rate": 2.9698296836982968e-05, "loss": 0.521, "step": 15942 }, { "epoch": 0.46547546057049427, "grad_norm": 0.49567529257174847, "learning_rate": 2.969667477696675e-05, "loss": 0.5427, "step": 15943 }, { "epoch": 0.46550465679834163, "grad_norm": 0.5292987565904947, "learning_rate": 2.9695052716950528e-05, "loss": 0.6085, "step": 15944 }, { "epoch": 0.465533853026189, "grad_norm": 0.5024434165241418, "learning_rate": 2.9693430656934307e-05, "loss": 0.6027, "step": 15945 }, { "epoch": 0.46556304925403635, "grad_norm": 0.5338635400929579, "learning_rate": 2.9691808596918085e-05, "loss": 0.6299, "step": 15946 }, { "epoch": 0.4655922454818837, "grad_norm": 0.5731594957569264, "learning_rate": 2.9690186536901867e-05, "loss": 0.6618, "step": 15947 }, { "epoch": 0.4656214417097311, "grad_norm": 0.49388273704126234, "learning_rate": 2.9688564476885645e-05, "loss": 0.5553, "step": 15948 }, { "epoch": 0.46565063793757844, "grad_norm": 0.49489942405770193, "learning_rate": 2.9686942416869423e-05, "loss": 0.5839, "step": 15949 }, { "epoch": 0.46567983416542585, "grad_norm": 0.5508238057846605, "learning_rate": 2.9685320356853202e-05, "loss": 0.6119, "step": 15950 }, { "epoch": 0.4657090303932732, "grad_norm": 0.5304106332896112, "learning_rate": 2.9683698296836987e-05, "loss": 0.6653, "step": 15951 }, { "epoch": 0.4657382266211206, "grad_norm": 0.5609225387598327, "learning_rate": 2.9682076236820765e-05, "loss": 0.603, "step": 15952 }, { "epoch": 0.46576742284896794, "grad_norm": 0.545963758044841, "learning_rate": 2.9680454176804544e-05, "loss": 0.666, "step": 15953 }, { "epoch": 0.4657966190768153, "grad_norm": 0.560128485129172, "learning_rate": 2.9678832116788325e-05, "loss": 0.6318, "step": 15954 }, { "epoch": 0.46582581530466266, "grad_norm": 0.5444602115902792, "learning_rate": 2.9677210056772104e-05, "loss": 0.657, "step": 15955 }, { "epoch": 0.46585501153251, "grad_norm": 0.5287977930937974, "learning_rate": 2.9675587996755882e-05, "loss": 0.6262, "step": 15956 }, { "epoch": 0.4658842077603574, "grad_norm": 0.5212787289754847, "learning_rate": 2.967396593673966e-05, "loss": 0.6324, "step": 15957 }, { "epoch": 0.46591340398820474, "grad_norm": 0.5523459133361115, "learning_rate": 2.9672343876723442e-05, "loss": 0.6824, "step": 15958 }, { "epoch": 0.4659426002160521, "grad_norm": 0.509592066084399, "learning_rate": 2.967072181670722e-05, "loss": 0.561, "step": 15959 }, { "epoch": 0.46597179644389947, "grad_norm": 0.5344079116514553, "learning_rate": 2.9669099756691e-05, "loss": 0.6057, "step": 15960 }, { "epoch": 0.4660009926717468, "grad_norm": 0.5084111942647694, "learning_rate": 2.9667477696674777e-05, "loss": 0.5607, "step": 15961 }, { "epoch": 0.4660301888995942, "grad_norm": 0.5406034923698686, "learning_rate": 2.9665855636658556e-05, "loss": 0.6349, "step": 15962 }, { "epoch": 0.46605938512744155, "grad_norm": 0.48795565096824073, "learning_rate": 2.9664233576642338e-05, "loss": 0.5465, "step": 15963 }, { "epoch": 0.4660885813552889, "grad_norm": 0.5313671000092555, "learning_rate": 2.9662611516626116e-05, "loss": 0.6182, "step": 15964 }, { "epoch": 0.4661177775831363, "grad_norm": 0.5002795925635545, "learning_rate": 2.9660989456609894e-05, "loss": 0.577, "step": 15965 }, { "epoch": 0.46614697381098363, "grad_norm": 0.5567593298782719, "learning_rate": 2.9659367396593673e-05, "loss": 0.692, "step": 15966 }, { "epoch": 0.466176170038831, "grad_norm": 0.4778360065909144, "learning_rate": 2.965774533657745e-05, "loss": 0.5285, "step": 15967 }, { "epoch": 0.46620536626667836, "grad_norm": 0.532808860184836, "learning_rate": 2.9656123276561233e-05, "loss": 0.6527, "step": 15968 }, { "epoch": 0.4662345624945257, "grad_norm": 0.49721222509148866, "learning_rate": 2.965450121654501e-05, "loss": 0.559, "step": 15969 }, { "epoch": 0.4662637587223731, "grad_norm": 0.5364449465733829, "learning_rate": 2.9652879156528796e-05, "loss": 0.6521, "step": 15970 }, { "epoch": 0.46629295495022044, "grad_norm": 0.5089883066802067, "learning_rate": 2.9651257096512575e-05, "loss": 0.6053, "step": 15971 }, { "epoch": 0.4663221511780678, "grad_norm": 0.6265851541374493, "learning_rate": 2.9649635036496353e-05, "loss": 0.7376, "step": 15972 }, { "epoch": 0.46635134740591516, "grad_norm": 0.5581385815734412, "learning_rate": 2.964801297648013e-05, "loss": 0.6544, "step": 15973 }, { "epoch": 0.4663805436337625, "grad_norm": 0.5237236798649858, "learning_rate": 2.9646390916463913e-05, "loss": 0.5707, "step": 15974 }, { "epoch": 0.4664097398616099, "grad_norm": 0.5357173935403237, "learning_rate": 2.964476885644769e-05, "loss": 0.6319, "step": 15975 }, { "epoch": 0.46643893608945725, "grad_norm": 0.5046314335316512, "learning_rate": 2.964314679643147e-05, "loss": 0.5727, "step": 15976 }, { "epoch": 0.4664681323173046, "grad_norm": 0.524520634056019, "learning_rate": 2.964152473641525e-05, "loss": 0.5907, "step": 15977 }, { "epoch": 0.46649732854515197, "grad_norm": 0.5216664635019738, "learning_rate": 2.9639902676399027e-05, "loss": 0.6257, "step": 15978 }, { "epoch": 0.46652652477299933, "grad_norm": 0.5212162784076793, "learning_rate": 2.963828061638281e-05, "loss": 0.606, "step": 15979 }, { "epoch": 0.4665557210008467, "grad_norm": 0.5711985126744474, "learning_rate": 2.9636658556366587e-05, "loss": 0.6966, "step": 15980 }, { "epoch": 0.46658491722869405, "grad_norm": 0.5203387835563025, "learning_rate": 2.9635036496350365e-05, "loss": 0.5981, "step": 15981 }, { "epoch": 0.4666141134565414, "grad_norm": 0.4909536995437035, "learning_rate": 2.9633414436334144e-05, "loss": 0.5634, "step": 15982 }, { "epoch": 0.4666433096843888, "grad_norm": 0.5035787975616082, "learning_rate": 2.9631792376317925e-05, "loss": 0.577, "step": 15983 }, { "epoch": 0.46667250591223614, "grad_norm": 0.5096196615052038, "learning_rate": 2.9630170316301704e-05, "loss": 0.6201, "step": 15984 }, { "epoch": 0.4667017021400835, "grad_norm": 0.5309427849687308, "learning_rate": 2.9628548256285482e-05, "loss": 0.6601, "step": 15985 }, { "epoch": 0.46673089836793086, "grad_norm": 0.5133892543592514, "learning_rate": 2.962692619626926e-05, "loss": 0.6457, "step": 15986 }, { "epoch": 0.4667600945957782, "grad_norm": 0.5240903656496907, "learning_rate": 2.962530413625304e-05, "loss": 0.6362, "step": 15987 }, { "epoch": 0.4667892908236256, "grad_norm": 0.48734712466316094, "learning_rate": 2.962368207623682e-05, "loss": 0.5098, "step": 15988 }, { "epoch": 0.46681848705147294, "grad_norm": 0.535743224763715, "learning_rate": 2.9622060016220602e-05, "loss": 0.6529, "step": 15989 }, { "epoch": 0.4668476832793203, "grad_norm": 0.5379081491798575, "learning_rate": 2.9620437956204384e-05, "loss": 0.6362, "step": 15990 }, { "epoch": 0.46687687950716766, "grad_norm": 0.5241459663121878, "learning_rate": 2.9618815896188163e-05, "loss": 0.5823, "step": 15991 }, { "epoch": 0.466906075735015, "grad_norm": 0.5326918145352237, "learning_rate": 2.961719383617194e-05, "loss": 0.6305, "step": 15992 }, { "epoch": 0.4669352719628624, "grad_norm": 0.48006441169038805, "learning_rate": 2.961557177615572e-05, "loss": 0.5409, "step": 15993 }, { "epoch": 0.46696446819070975, "grad_norm": 0.5299275419455406, "learning_rate": 2.96139497161395e-05, "loss": 0.6115, "step": 15994 }, { "epoch": 0.4669936644185571, "grad_norm": 0.5413366992714439, "learning_rate": 2.961232765612328e-05, "loss": 0.6905, "step": 15995 }, { "epoch": 0.46702286064640447, "grad_norm": 0.5199577955126964, "learning_rate": 2.9610705596107058e-05, "loss": 0.6032, "step": 15996 }, { "epoch": 0.46705205687425183, "grad_norm": 0.6450986270181818, "learning_rate": 2.9609083536090836e-05, "loss": 0.6232, "step": 15997 }, { "epoch": 0.4670812531020992, "grad_norm": 0.4969358893311952, "learning_rate": 2.9607461476074615e-05, "loss": 0.561, "step": 15998 }, { "epoch": 0.46711044932994655, "grad_norm": 0.5111885951208045, "learning_rate": 2.9605839416058396e-05, "loss": 0.6099, "step": 15999 }, { "epoch": 0.4671396455577939, "grad_norm": 0.512464202252861, "learning_rate": 2.9604217356042175e-05, "loss": 0.5677, "step": 16000 }, { "epoch": 0.4671688417856413, "grad_norm": 0.5158433989482926, "learning_rate": 2.9602595296025953e-05, "loss": 0.6084, "step": 16001 }, { "epoch": 0.46719803801348864, "grad_norm": 0.4958666760131685, "learning_rate": 2.960097323600973e-05, "loss": 0.5499, "step": 16002 }, { "epoch": 0.467227234241336, "grad_norm": 0.5900839887262762, "learning_rate": 2.9599351175993513e-05, "loss": 0.6774, "step": 16003 }, { "epoch": 0.46725643046918336, "grad_norm": 0.5150233630620555, "learning_rate": 2.959772911597729e-05, "loss": 0.6233, "step": 16004 }, { "epoch": 0.4672856266970307, "grad_norm": 0.5401385238840473, "learning_rate": 2.959610705596107e-05, "loss": 0.6191, "step": 16005 }, { "epoch": 0.4673148229248781, "grad_norm": 0.5930319762959665, "learning_rate": 2.959448499594485e-05, "loss": 0.7031, "step": 16006 }, { "epoch": 0.46734401915272544, "grad_norm": 0.5194598354634162, "learning_rate": 2.9592862935928627e-05, "loss": 0.6121, "step": 16007 }, { "epoch": 0.4673732153805728, "grad_norm": 0.5904774979337963, "learning_rate": 2.9591240875912412e-05, "loss": 0.6728, "step": 16008 }, { "epoch": 0.46740241160842017, "grad_norm": 0.5278858854067486, "learning_rate": 2.958961881589619e-05, "loss": 0.6376, "step": 16009 }, { "epoch": 0.4674316078362676, "grad_norm": 0.5306807726196545, "learning_rate": 2.9587996755879972e-05, "loss": 0.641, "step": 16010 }, { "epoch": 0.46746080406411494, "grad_norm": 0.5537886237478398, "learning_rate": 2.958637469586375e-05, "loss": 0.6772, "step": 16011 }, { "epoch": 0.4674900002919623, "grad_norm": 0.4865460635273243, "learning_rate": 2.958475263584753e-05, "loss": 0.5625, "step": 16012 }, { "epoch": 0.46751919651980967, "grad_norm": 0.5036833528874093, "learning_rate": 2.9583130575831307e-05, "loss": 0.5826, "step": 16013 }, { "epoch": 0.467548392747657, "grad_norm": 0.5227993809145851, "learning_rate": 2.958150851581509e-05, "loss": 0.6368, "step": 16014 }, { "epoch": 0.4675775889755044, "grad_norm": 0.5433699319299127, "learning_rate": 2.9579886455798867e-05, "loss": 0.6167, "step": 16015 }, { "epoch": 0.46760678520335175, "grad_norm": 0.5232549921803709, "learning_rate": 2.9578264395782646e-05, "loss": 0.593, "step": 16016 }, { "epoch": 0.4676359814311991, "grad_norm": 0.5243591475041799, "learning_rate": 2.9576642335766424e-05, "loss": 0.6069, "step": 16017 }, { "epoch": 0.46766517765904647, "grad_norm": 0.557320372271912, "learning_rate": 2.9575020275750202e-05, "loss": 0.6791, "step": 16018 }, { "epoch": 0.46769437388689383, "grad_norm": 0.5482337341279312, "learning_rate": 2.9573398215733984e-05, "loss": 0.686, "step": 16019 }, { "epoch": 0.4677235701147412, "grad_norm": 0.5109071459277976, "learning_rate": 2.9571776155717763e-05, "loss": 0.5941, "step": 16020 }, { "epoch": 0.46775276634258856, "grad_norm": 0.5155446576694587, "learning_rate": 2.957015409570154e-05, "loss": 0.5453, "step": 16021 }, { "epoch": 0.4677819625704359, "grad_norm": 0.5307942845021905, "learning_rate": 2.956853203568532e-05, "loss": 0.6056, "step": 16022 }, { "epoch": 0.4678111587982833, "grad_norm": 0.5102438234668278, "learning_rate": 2.9566909975669098e-05, "loss": 0.6225, "step": 16023 }, { "epoch": 0.46784035502613064, "grad_norm": 0.5687477665629644, "learning_rate": 2.956528791565288e-05, "loss": 0.6823, "step": 16024 }, { "epoch": 0.467869551253978, "grad_norm": 0.5806827486495684, "learning_rate": 2.9563665855636658e-05, "loss": 0.7614, "step": 16025 }, { "epoch": 0.46789874748182536, "grad_norm": 0.5139280361255423, "learning_rate": 2.9562043795620443e-05, "loss": 0.5726, "step": 16026 }, { "epoch": 0.4679279437096727, "grad_norm": 0.6107870384114845, "learning_rate": 2.956042173560422e-05, "loss": 0.6855, "step": 16027 }, { "epoch": 0.4679571399375201, "grad_norm": 0.4724525617419933, "learning_rate": 2.9558799675588e-05, "loss": 0.4959, "step": 16028 }, { "epoch": 0.46798633616536744, "grad_norm": 0.6169993010230381, "learning_rate": 2.9557177615571778e-05, "loss": 0.7577, "step": 16029 }, { "epoch": 0.4680155323932148, "grad_norm": 0.5288040038613911, "learning_rate": 2.955555555555556e-05, "loss": 0.6378, "step": 16030 }, { "epoch": 0.46804472862106217, "grad_norm": 0.4817923506099073, "learning_rate": 2.9553933495539338e-05, "loss": 0.5366, "step": 16031 }, { "epoch": 0.46807392484890953, "grad_norm": 0.5257365441463838, "learning_rate": 2.9552311435523117e-05, "loss": 0.645, "step": 16032 }, { "epoch": 0.4681031210767569, "grad_norm": 0.5804388929861628, "learning_rate": 2.9550689375506895e-05, "loss": 0.6316, "step": 16033 }, { "epoch": 0.46813231730460425, "grad_norm": 0.5408478038711663, "learning_rate": 2.9549067315490673e-05, "loss": 0.5974, "step": 16034 }, { "epoch": 0.4681615135324516, "grad_norm": 0.5401536460747448, "learning_rate": 2.9547445255474455e-05, "loss": 0.5976, "step": 16035 }, { "epoch": 0.468190709760299, "grad_norm": 0.531621218782993, "learning_rate": 2.9545823195458233e-05, "loss": 0.5696, "step": 16036 }, { "epoch": 0.46821990598814633, "grad_norm": 0.4937375957963525, "learning_rate": 2.9544201135442012e-05, "loss": 0.5419, "step": 16037 }, { "epoch": 0.4682491022159937, "grad_norm": 0.5575320477203671, "learning_rate": 2.954257907542579e-05, "loss": 0.7294, "step": 16038 }, { "epoch": 0.46827829844384106, "grad_norm": 0.5411707256965205, "learning_rate": 2.9540957015409572e-05, "loss": 0.6474, "step": 16039 }, { "epoch": 0.4683074946716884, "grad_norm": 0.5317431108202858, "learning_rate": 2.953933495539335e-05, "loss": 0.6923, "step": 16040 }, { "epoch": 0.4683366908995358, "grad_norm": 0.5859032111455447, "learning_rate": 2.953771289537713e-05, "loss": 0.7523, "step": 16041 }, { "epoch": 0.46836588712738314, "grad_norm": 0.5238754337223169, "learning_rate": 2.9536090835360907e-05, "loss": 0.621, "step": 16042 }, { "epoch": 0.4683950833552305, "grad_norm": 0.5373618031517939, "learning_rate": 2.9534468775344685e-05, "loss": 0.6317, "step": 16043 }, { "epoch": 0.46842427958307786, "grad_norm": 0.5485098586885663, "learning_rate": 2.9532846715328467e-05, "loss": 0.6205, "step": 16044 }, { "epoch": 0.4684534758109252, "grad_norm": 0.6034384309644898, "learning_rate": 2.9531224655312252e-05, "loss": 0.7167, "step": 16045 }, { "epoch": 0.4684826720387726, "grad_norm": 0.5309436901828959, "learning_rate": 2.952960259529603e-05, "loss": 0.6378, "step": 16046 }, { "epoch": 0.46851186826661995, "grad_norm": 0.5394328403486739, "learning_rate": 2.952798053527981e-05, "loss": 0.6071, "step": 16047 }, { "epoch": 0.4685410644944673, "grad_norm": 0.5054340570036875, "learning_rate": 2.9526358475263587e-05, "loss": 0.6035, "step": 16048 }, { "epoch": 0.46857026072231467, "grad_norm": 0.5376769532660057, "learning_rate": 2.9524736415247366e-05, "loss": 0.6475, "step": 16049 }, { "epoch": 0.46859945695016203, "grad_norm": 0.5015201224379143, "learning_rate": 2.9523114355231148e-05, "loss": 0.5732, "step": 16050 }, { "epoch": 0.4686286531780094, "grad_norm": 0.5050169873514312, "learning_rate": 2.9521492295214926e-05, "loss": 0.5891, "step": 16051 }, { "epoch": 0.46865784940585675, "grad_norm": 0.498887851524814, "learning_rate": 2.9519870235198704e-05, "loss": 0.5701, "step": 16052 }, { "epoch": 0.4686870456337041, "grad_norm": 0.47296790580262354, "learning_rate": 2.9518248175182483e-05, "loss": 0.5421, "step": 16053 }, { "epoch": 0.4687162418615515, "grad_norm": 0.5116740750167393, "learning_rate": 2.951662611516626e-05, "loss": 0.6192, "step": 16054 }, { "epoch": 0.46874543808939884, "grad_norm": 0.51957513819042, "learning_rate": 2.9515004055150043e-05, "loss": 0.5856, "step": 16055 }, { "epoch": 0.4687746343172462, "grad_norm": 0.504963903874363, "learning_rate": 2.951338199513382e-05, "loss": 0.5992, "step": 16056 }, { "epoch": 0.46880383054509356, "grad_norm": 0.5327952813153816, "learning_rate": 2.95117599351176e-05, "loss": 0.6228, "step": 16057 }, { "epoch": 0.4688330267729409, "grad_norm": 0.49146149924393956, "learning_rate": 2.9510137875101378e-05, "loss": 0.546, "step": 16058 }, { "epoch": 0.4688622230007883, "grad_norm": 0.5328779397668906, "learning_rate": 2.950851581508516e-05, "loss": 0.6125, "step": 16059 }, { "epoch": 0.46889141922863564, "grad_norm": 0.5707016212126607, "learning_rate": 2.9506893755068938e-05, "loss": 0.6625, "step": 16060 }, { "epoch": 0.468920615456483, "grad_norm": 0.5134950855645719, "learning_rate": 2.9505271695052717e-05, "loss": 0.6138, "step": 16061 }, { "epoch": 0.46894981168433036, "grad_norm": 0.5211473937486992, "learning_rate": 2.9503649635036495e-05, "loss": 0.5858, "step": 16062 }, { "epoch": 0.4689790079121777, "grad_norm": 0.5533312816871608, "learning_rate": 2.9502027575020273e-05, "loss": 0.6537, "step": 16063 }, { "epoch": 0.4690082041400251, "grad_norm": 0.5261746444660185, "learning_rate": 2.950040551500406e-05, "loss": 0.5956, "step": 16064 }, { "epoch": 0.46903740036787245, "grad_norm": 0.5214262553760404, "learning_rate": 2.9498783454987837e-05, "loss": 0.6009, "step": 16065 }, { "epoch": 0.4690665965957198, "grad_norm": 0.5824077189313233, "learning_rate": 2.949716139497162e-05, "loss": 0.7016, "step": 16066 }, { "epoch": 0.46909579282356717, "grad_norm": 0.5513301545302849, "learning_rate": 2.9495539334955397e-05, "loss": 0.677, "step": 16067 }, { "epoch": 0.46912498905141453, "grad_norm": 0.5089880538357342, "learning_rate": 2.9493917274939175e-05, "loss": 0.575, "step": 16068 }, { "epoch": 0.4691541852792619, "grad_norm": 0.5242793806854641, "learning_rate": 2.9492295214922954e-05, "loss": 0.6361, "step": 16069 }, { "epoch": 0.46918338150710925, "grad_norm": 0.4785982925200574, "learning_rate": 2.9490673154906735e-05, "loss": 0.4817, "step": 16070 }, { "epoch": 0.46921257773495667, "grad_norm": 0.5591065076626822, "learning_rate": 2.9489051094890514e-05, "loss": 0.6989, "step": 16071 }, { "epoch": 0.46924177396280403, "grad_norm": 0.5004639914929537, "learning_rate": 2.9487429034874292e-05, "loss": 0.6056, "step": 16072 }, { "epoch": 0.4692709701906514, "grad_norm": 0.5193927695624878, "learning_rate": 2.948580697485807e-05, "loss": 0.5892, "step": 16073 }, { "epoch": 0.46930016641849875, "grad_norm": 0.4864055381719913, "learning_rate": 2.948418491484185e-05, "loss": 0.5414, "step": 16074 }, { "epoch": 0.4693293626463461, "grad_norm": 0.5781539212794383, "learning_rate": 2.948256285482563e-05, "loss": 0.6685, "step": 16075 }, { "epoch": 0.4693585588741935, "grad_norm": 0.5048724339941824, "learning_rate": 2.948094079480941e-05, "loss": 0.5866, "step": 16076 }, { "epoch": 0.46938775510204084, "grad_norm": 0.5344379433054253, "learning_rate": 2.9479318734793187e-05, "loss": 0.6147, "step": 16077 }, { "epoch": 0.4694169513298882, "grad_norm": 0.5819507901775824, "learning_rate": 2.9477696674776966e-05, "loss": 0.7017, "step": 16078 }, { "epoch": 0.46944614755773556, "grad_norm": 0.5590110623689524, "learning_rate": 2.9476074614760744e-05, "loss": 0.7513, "step": 16079 }, { "epoch": 0.4694753437855829, "grad_norm": 0.5227453037069623, "learning_rate": 2.9474452554744526e-05, "loss": 0.6445, "step": 16080 }, { "epoch": 0.4695045400134303, "grad_norm": 0.5598916730745842, "learning_rate": 2.9472830494728304e-05, "loss": 0.7281, "step": 16081 }, { "epoch": 0.46953373624127764, "grad_norm": 0.47150649944150336, "learning_rate": 2.9471208434712083e-05, "loss": 0.5635, "step": 16082 }, { "epoch": 0.469562932469125, "grad_norm": 0.5542519150126279, "learning_rate": 2.9469586374695868e-05, "loss": 0.6698, "step": 16083 }, { "epoch": 0.46959212869697237, "grad_norm": 0.5212809488490912, "learning_rate": 2.9467964314679646e-05, "loss": 0.6208, "step": 16084 }, { "epoch": 0.4696213249248197, "grad_norm": 0.5318609550485153, "learning_rate": 2.9466342254663425e-05, "loss": 0.6741, "step": 16085 }, { "epoch": 0.4696505211526671, "grad_norm": 0.5806471847283495, "learning_rate": 2.9464720194647206e-05, "loss": 0.664, "step": 16086 }, { "epoch": 0.46967971738051445, "grad_norm": 0.5614079742616642, "learning_rate": 2.9463098134630985e-05, "loss": 0.6691, "step": 16087 }, { "epoch": 0.4697089136083618, "grad_norm": 0.5306585359591649, "learning_rate": 2.9461476074614763e-05, "loss": 0.6346, "step": 16088 }, { "epoch": 0.4697381098362092, "grad_norm": 0.5050948020197413, "learning_rate": 2.945985401459854e-05, "loss": 0.5816, "step": 16089 }, { "epoch": 0.46976730606405653, "grad_norm": 0.5267845403620067, "learning_rate": 2.945823195458232e-05, "loss": 0.6312, "step": 16090 }, { "epoch": 0.4697965022919039, "grad_norm": 0.5426160398451055, "learning_rate": 2.94566098945661e-05, "loss": 0.6756, "step": 16091 }, { "epoch": 0.46982569851975126, "grad_norm": 0.5616626963500794, "learning_rate": 2.945498783454988e-05, "loss": 0.6885, "step": 16092 }, { "epoch": 0.4698548947475986, "grad_norm": 0.5671332527068514, "learning_rate": 2.945336577453366e-05, "loss": 0.6763, "step": 16093 }, { "epoch": 0.469884090975446, "grad_norm": 0.5396776020733072, "learning_rate": 2.9451743714517437e-05, "loss": 0.6607, "step": 16094 }, { "epoch": 0.46991328720329334, "grad_norm": 0.5148089570112143, "learning_rate": 2.945012165450122e-05, "loss": 0.574, "step": 16095 }, { "epoch": 0.4699424834311407, "grad_norm": 0.49606115201829387, "learning_rate": 2.9448499594484997e-05, "loss": 0.5658, "step": 16096 }, { "epoch": 0.46997167965898806, "grad_norm": 0.5026735955077235, "learning_rate": 2.9446877534468775e-05, "loss": 0.5607, "step": 16097 }, { "epoch": 0.4700008758868354, "grad_norm": 0.5644578896234438, "learning_rate": 2.9445255474452554e-05, "loss": 0.6458, "step": 16098 }, { "epoch": 0.4700300721146828, "grad_norm": 0.5031161061121645, "learning_rate": 2.9443633414436332e-05, "loss": 0.6012, "step": 16099 }, { "epoch": 0.47005926834253015, "grad_norm": 0.5374021089858715, "learning_rate": 2.9442011354420114e-05, "loss": 0.6466, "step": 16100 }, { "epoch": 0.4700884645703775, "grad_norm": 0.5081541243260427, "learning_rate": 2.9440389294403892e-05, "loss": 0.589, "step": 16101 }, { "epoch": 0.47011766079822487, "grad_norm": 0.5252632019776864, "learning_rate": 2.9438767234387677e-05, "loss": 0.6106, "step": 16102 }, { "epoch": 0.47014685702607223, "grad_norm": 0.5932361886359374, "learning_rate": 2.9437145174371456e-05, "loss": 0.6512, "step": 16103 }, { "epoch": 0.4701760532539196, "grad_norm": 0.5186826799493988, "learning_rate": 2.9435523114355234e-05, "loss": 0.6507, "step": 16104 }, { "epoch": 0.47020524948176695, "grad_norm": 0.5182506004519992, "learning_rate": 2.9433901054339012e-05, "loss": 0.61, "step": 16105 }, { "epoch": 0.4702344457096143, "grad_norm": 0.5220437755958713, "learning_rate": 2.9432278994322794e-05, "loss": 0.5816, "step": 16106 }, { "epoch": 0.4702636419374617, "grad_norm": 0.49633195418746123, "learning_rate": 2.9430656934306573e-05, "loss": 0.5912, "step": 16107 }, { "epoch": 0.47029283816530904, "grad_norm": 0.5218352940255382, "learning_rate": 2.942903487429035e-05, "loss": 0.6387, "step": 16108 }, { "epoch": 0.4703220343931564, "grad_norm": 0.543273870981059, "learning_rate": 2.942741281427413e-05, "loss": 0.6226, "step": 16109 }, { "epoch": 0.47035123062100376, "grad_norm": 0.49079110990407976, "learning_rate": 2.9425790754257908e-05, "loss": 0.5624, "step": 16110 }, { "epoch": 0.4703804268488511, "grad_norm": 0.5140635644386518, "learning_rate": 2.942416869424169e-05, "loss": 0.6028, "step": 16111 }, { "epoch": 0.4704096230766985, "grad_norm": 0.51724006447531, "learning_rate": 2.9422546634225468e-05, "loss": 0.6026, "step": 16112 }, { "epoch": 0.47043881930454584, "grad_norm": 0.5390315777540186, "learning_rate": 2.9420924574209246e-05, "loss": 0.63, "step": 16113 }, { "epoch": 0.4704680155323932, "grad_norm": 0.5582935982783858, "learning_rate": 2.9419302514193025e-05, "loss": 0.627, "step": 16114 }, { "epoch": 0.47049721176024056, "grad_norm": 0.5003246802901108, "learning_rate": 2.9417680454176806e-05, "loss": 0.5768, "step": 16115 }, { "epoch": 0.4705264079880879, "grad_norm": 0.5578526751251567, "learning_rate": 2.9416058394160585e-05, "loss": 0.6738, "step": 16116 }, { "epoch": 0.4705556042159353, "grad_norm": 0.5612563441813485, "learning_rate": 2.9414436334144363e-05, "loss": 0.7205, "step": 16117 }, { "epoch": 0.47058480044378265, "grad_norm": 0.512895607018912, "learning_rate": 2.941281427412814e-05, "loss": 0.6007, "step": 16118 }, { "epoch": 0.47061399667163, "grad_norm": 0.49539771812526545, "learning_rate": 2.941119221411192e-05, "loss": 0.5748, "step": 16119 }, { "epoch": 0.47064319289947737, "grad_norm": 0.48619827605031746, "learning_rate": 2.94095701540957e-05, "loss": 0.5825, "step": 16120 }, { "epoch": 0.47067238912732473, "grad_norm": 0.5178454643980055, "learning_rate": 2.9407948094079483e-05, "loss": 0.6041, "step": 16121 }, { "epoch": 0.4707015853551721, "grad_norm": 0.5326249991631143, "learning_rate": 2.9406326034063265e-05, "loss": 0.678, "step": 16122 }, { "epoch": 0.47073078158301945, "grad_norm": 0.49483441978823073, "learning_rate": 2.9404703974047043e-05, "loss": 0.5743, "step": 16123 }, { "epoch": 0.4707599778108668, "grad_norm": 0.5330498116821536, "learning_rate": 2.9403081914030822e-05, "loss": 0.6363, "step": 16124 }, { "epoch": 0.4707891740387142, "grad_norm": 0.5153042920069276, "learning_rate": 2.94014598540146e-05, "loss": 0.6228, "step": 16125 }, { "epoch": 0.47081837026656154, "grad_norm": 0.5230531669390887, "learning_rate": 2.9399837793998382e-05, "loss": 0.6132, "step": 16126 }, { "epoch": 0.4708475664944089, "grad_norm": 0.524391894023959, "learning_rate": 2.939821573398216e-05, "loss": 0.5838, "step": 16127 }, { "epoch": 0.47087676272225626, "grad_norm": 0.495961465041498, "learning_rate": 2.939659367396594e-05, "loss": 0.5954, "step": 16128 }, { "epoch": 0.4709059589501036, "grad_norm": 0.5543785981944991, "learning_rate": 2.9394971613949717e-05, "loss": 0.7185, "step": 16129 }, { "epoch": 0.470935155177951, "grad_norm": 0.5224692365901944, "learning_rate": 2.9393349553933495e-05, "loss": 0.6213, "step": 16130 }, { "epoch": 0.4709643514057984, "grad_norm": 0.5196768800833739, "learning_rate": 2.9391727493917277e-05, "loss": 0.6189, "step": 16131 }, { "epoch": 0.47099354763364576, "grad_norm": 0.511538950860125, "learning_rate": 2.9390105433901056e-05, "loss": 0.5992, "step": 16132 }, { "epoch": 0.4710227438614931, "grad_norm": 0.47492223351079565, "learning_rate": 2.9388483373884834e-05, "loss": 0.533, "step": 16133 }, { "epoch": 0.4710519400893405, "grad_norm": 0.549965528944535, "learning_rate": 2.9386861313868612e-05, "loss": 0.66, "step": 16134 }, { "epoch": 0.47108113631718784, "grad_norm": 0.5499893654274828, "learning_rate": 2.938523925385239e-05, "loss": 0.6206, "step": 16135 }, { "epoch": 0.4711103325450352, "grad_norm": 0.5033457171799326, "learning_rate": 2.9383617193836172e-05, "loss": 0.5947, "step": 16136 }, { "epoch": 0.47113952877288257, "grad_norm": 0.49086992223230397, "learning_rate": 2.938199513381995e-05, "loss": 0.5381, "step": 16137 }, { "epoch": 0.4711687250007299, "grad_norm": 0.4910301932464503, "learning_rate": 2.938037307380373e-05, "loss": 0.6091, "step": 16138 }, { "epoch": 0.4711979212285773, "grad_norm": 0.5831177780518696, "learning_rate": 2.9378751013787508e-05, "loss": 0.7116, "step": 16139 }, { "epoch": 0.47122711745642465, "grad_norm": 0.5603370813174015, "learning_rate": 2.9377128953771293e-05, "loss": 0.7552, "step": 16140 }, { "epoch": 0.471256313684272, "grad_norm": 0.5161813651702744, "learning_rate": 2.937550689375507e-05, "loss": 0.6335, "step": 16141 }, { "epoch": 0.47128550991211937, "grad_norm": 0.5312142951521986, "learning_rate": 2.9373884833738853e-05, "loss": 0.6008, "step": 16142 }, { "epoch": 0.47131470613996673, "grad_norm": 0.5317229224304698, "learning_rate": 2.937226277372263e-05, "loss": 0.6242, "step": 16143 }, { "epoch": 0.4713439023678141, "grad_norm": 0.5451969579187314, "learning_rate": 2.937064071370641e-05, "loss": 0.6343, "step": 16144 }, { "epoch": 0.47137309859566145, "grad_norm": 0.5214263425365628, "learning_rate": 2.9369018653690188e-05, "loss": 0.6196, "step": 16145 }, { "epoch": 0.4714022948235088, "grad_norm": 0.5236500728748011, "learning_rate": 2.936739659367397e-05, "loss": 0.5981, "step": 16146 }, { "epoch": 0.4714314910513562, "grad_norm": 0.5617288455899436, "learning_rate": 2.9365774533657748e-05, "loss": 0.7023, "step": 16147 }, { "epoch": 0.47146068727920354, "grad_norm": 0.49886142845016795, "learning_rate": 2.9364152473641527e-05, "loss": 0.5761, "step": 16148 }, { "epoch": 0.4714898835070509, "grad_norm": 0.5781869484098356, "learning_rate": 2.9362530413625305e-05, "loss": 0.7133, "step": 16149 }, { "epoch": 0.47151907973489826, "grad_norm": 0.48359053463727286, "learning_rate": 2.9360908353609083e-05, "loss": 0.5343, "step": 16150 }, { "epoch": 0.4715482759627456, "grad_norm": 0.5891183091375561, "learning_rate": 2.9359286293592865e-05, "loss": 0.7123, "step": 16151 }, { "epoch": 0.471577472190593, "grad_norm": 0.5400829327065204, "learning_rate": 2.9357664233576643e-05, "loss": 0.6415, "step": 16152 }, { "epoch": 0.47160666841844034, "grad_norm": 0.5051256627100053, "learning_rate": 2.9356042173560422e-05, "loss": 0.5806, "step": 16153 }, { "epoch": 0.4716358646462877, "grad_norm": 0.6476423283242543, "learning_rate": 2.93544201135442e-05, "loss": 0.6345, "step": 16154 }, { "epoch": 0.47166506087413507, "grad_norm": 0.5252015212962935, "learning_rate": 2.935279805352798e-05, "loss": 0.6235, "step": 16155 }, { "epoch": 0.47169425710198243, "grad_norm": 0.5150150803307664, "learning_rate": 2.935117599351176e-05, "loss": 0.6234, "step": 16156 }, { "epoch": 0.4717234533298298, "grad_norm": 0.512661237756957, "learning_rate": 2.934955393349554e-05, "loss": 0.6173, "step": 16157 }, { "epoch": 0.47175264955767715, "grad_norm": 0.5014839350270321, "learning_rate": 2.9347931873479317e-05, "loss": 0.5695, "step": 16158 }, { "epoch": 0.4717818457855245, "grad_norm": 0.5762857794053199, "learning_rate": 2.9346309813463102e-05, "loss": 0.6997, "step": 16159 }, { "epoch": 0.4718110420133719, "grad_norm": 0.5125728895065149, "learning_rate": 2.934468775344688e-05, "loss": 0.5747, "step": 16160 }, { "epoch": 0.47184023824121923, "grad_norm": 0.6094922621458881, "learning_rate": 2.934306569343066e-05, "loss": 0.618, "step": 16161 }, { "epoch": 0.4718694344690666, "grad_norm": 0.5322484993371297, "learning_rate": 2.934144363341444e-05, "loss": 0.6206, "step": 16162 }, { "epoch": 0.47189863069691396, "grad_norm": 0.5573455546777716, "learning_rate": 2.933982157339822e-05, "loss": 0.6683, "step": 16163 }, { "epoch": 0.4719278269247613, "grad_norm": 0.553055137724728, "learning_rate": 2.9338199513381997e-05, "loss": 0.6413, "step": 16164 }, { "epoch": 0.4719570231526087, "grad_norm": 0.4819376159014264, "learning_rate": 2.9336577453365776e-05, "loss": 0.5193, "step": 16165 }, { "epoch": 0.47198621938045604, "grad_norm": 0.5510073374694887, "learning_rate": 2.9334955393349554e-05, "loss": 0.6224, "step": 16166 }, { "epoch": 0.4720154156083034, "grad_norm": 0.51290006039424, "learning_rate": 2.9333333333333336e-05, "loss": 0.611, "step": 16167 }, { "epoch": 0.47204461183615076, "grad_norm": 0.516646305765871, "learning_rate": 2.9331711273317114e-05, "loss": 0.586, "step": 16168 }, { "epoch": 0.4720738080639981, "grad_norm": 0.49317904484511627, "learning_rate": 2.9330089213300893e-05, "loss": 0.5361, "step": 16169 }, { "epoch": 0.4721030042918455, "grad_norm": 0.5647747198833157, "learning_rate": 2.932846715328467e-05, "loss": 0.6658, "step": 16170 }, { "epoch": 0.47213220051969285, "grad_norm": 0.5171449359728463, "learning_rate": 2.9326845093268453e-05, "loss": 0.5905, "step": 16171 }, { "epoch": 0.4721613967475402, "grad_norm": 0.5604007034300765, "learning_rate": 2.932522303325223e-05, "loss": 0.6749, "step": 16172 }, { "epoch": 0.47219059297538757, "grad_norm": 0.5217798594811667, "learning_rate": 2.932360097323601e-05, "loss": 0.5843, "step": 16173 }, { "epoch": 0.47221978920323493, "grad_norm": 0.5037228030897979, "learning_rate": 2.9321978913219788e-05, "loss": 0.6147, "step": 16174 }, { "epoch": 0.4722489854310823, "grad_norm": 0.523859458950313, "learning_rate": 2.9320356853203566e-05, "loss": 0.5701, "step": 16175 }, { "epoch": 0.47227818165892965, "grad_norm": 0.5280214356911384, "learning_rate": 2.9318734793187348e-05, "loss": 0.6433, "step": 16176 }, { "epoch": 0.472307377886777, "grad_norm": 0.49919337064053615, "learning_rate": 2.931711273317113e-05, "loss": 0.6113, "step": 16177 }, { "epoch": 0.4723365741146244, "grad_norm": 0.5199306796774774, "learning_rate": 2.931549067315491e-05, "loss": 0.6119, "step": 16178 }, { "epoch": 0.47236577034247174, "grad_norm": 0.5490844056803997, "learning_rate": 2.931386861313869e-05, "loss": 0.6356, "step": 16179 }, { "epoch": 0.4723949665703191, "grad_norm": 0.5342921672186975, "learning_rate": 2.931224655312247e-05, "loss": 0.6507, "step": 16180 }, { "epoch": 0.47242416279816646, "grad_norm": 0.5064508776284491, "learning_rate": 2.9310624493106247e-05, "loss": 0.6009, "step": 16181 }, { "epoch": 0.4724533590260138, "grad_norm": 0.5198025142589402, "learning_rate": 2.930900243309003e-05, "loss": 0.5999, "step": 16182 }, { "epoch": 0.4724825552538612, "grad_norm": 0.5338037435730388, "learning_rate": 2.9307380373073807e-05, "loss": 0.6298, "step": 16183 }, { "epoch": 0.47251175148170854, "grad_norm": 0.5172435990065214, "learning_rate": 2.9305758313057585e-05, "loss": 0.6173, "step": 16184 }, { "epoch": 0.4725409477095559, "grad_norm": 0.5738579306273852, "learning_rate": 2.9304136253041364e-05, "loss": 0.6853, "step": 16185 }, { "epoch": 0.47257014393740326, "grad_norm": 0.48824454743512374, "learning_rate": 2.9302514193025142e-05, "loss": 0.5282, "step": 16186 }, { "epoch": 0.4725993401652506, "grad_norm": 0.49811907143193446, "learning_rate": 2.9300892133008924e-05, "loss": 0.5582, "step": 16187 }, { "epoch": 0.472628536393098, "grad_norm": 0.5428389737001876, "learning_rate": 2.9299270072992702e-05, "loss": 0.6609, "step": 16188 }, { "epoch": 0.47265773262094535, "grad_norm": 0.5495364279946092, "learning_rate": 2.929764801297648e-05, "loss": 0.692, "step": 16189 }, { "epoch": 0.4726869288487927, "grad_norm": 0.5210299628380215, "learning_rate": 2.929602595296026e-05, "loss": 0.6227, "step": 16190 }, { "epoch": 0.4727161250766401, "grad_norm": 0.5508344753595676, "learning_rate": 2.929440389294404e-05, "loss": 0.6324, "step": 16191 }, { "epoch": 0.4727453213044875, "grad_norm": 0.5481871466461088, "learning_rate": 2.929278183292782e-05, "loss": 0.6089, "step": 16192 }, { "epoch": 0.47277451753233485, "grad_norm": 0.5178479393707112, "learning_rate": 2.9291159772911597e-05, "loss": 0.5988, "step": 16193 }, { "epoch": 0.4728037137601822, "grad_norm": 0.497963327491385, "learning_rate": 2.9289537712895376e-05, "loss": 0.5645, "step": 16194 }, { "epoch": 0.47283290998802957, "grad_norm": 0.5394372698791472, "learning_rate": 2.9287915652879154e-05, "loss": 0.6581, "step": 16195 }, { "epoch": 0.47286210621587693, "grad_norm": 0.5181401418092227, "learning_rate": 2.928629359286294e-05, "loss": 0.6198, "step": 16196 }, { "epoch": 0.4728913024437243, "grad_norm": 0.543672293375687, "learning_rate": 2.9284671532846718e-05, "loss": 0.6209, "step": 16197 }, { "epoch": 0.47292049867157165, "grad_norm": 0.4778307238933381, "learning_rate": 2.92830494728305e-05, "loss": 0.5212, "step": 16198 }, { "epoch": 0.472949694899419, "grad_norm": 0.5166778965237349, "learning_rate": 2.9281427412814278e-05, "loss": 0.6026, "step": 16199 }, { "epoch": 0.4729788911272664, "grad_norm": 0.5778120198218919, "learning_rate": 2.9279805352798056e-05, "loss": 0.627, "step": 16200 }, { "epoch": 0.47300808735511374, "grad_norm": 0.4884153269444193, "learning_rate": 2.9278183292781835e-05, "loss": 0.5735, "step": 16201 }, { "epoch": 0.4730372835829611, "grad_norm": 0.5438277144346926, "learning_rate": 2.9276561232765616e-05, "loss": 0.6711, "step": 16202 }, { "epoch": 0.47306647981080846, "grad_norm": 0.4796235709007036, "learning_rate": 2.9274939172749395e-05, "loss": 0.496, "step": 16203 }, { "epoch": 0.4730956760386558, "grad_norm": 0.4982233046890715, "learning_rate": 2.9273317112733173e-05, "loss": 0.5814, "step": 16204 }, { "epoch": 0.4731248722665032, "grad_norm": 0.543239455869565, "learning_rate": 2.927169505271695e-05, "loss": 0.68, "step": 16205 }, { "epoch": 0.47315406849435054, "grad_norm": 0.5416132003835491, "learning_rate": 2.927007299270073e-05, "loss": 0.6872, "step": 16206 }, { "epoch": 0.4731832647221979, "grad_norm": 0.5033906806116022, "learning_rate": 2.926845093268451e-05, "loss": 0.586, "step": 16207 }, { "epoch": 0.47321246095004527, "grad_norm": 0.5096862270035866, "learning_rate": 2.926682887266829e-05, "loss": 0.5918, "step": 16208 }, { "epoch": 0.4732416571778926, "grad_norm": 0.49226841806810534, "learning_rate": 2.926520681265207e-05, "loss": 0.5894, "step": 16209 }, { "epoch": 0.47327085340574, "grad_norm": 0.5144734934373486, "learning_rate": 2.9263584752635847e-05, "loss": 0.638, "step": 16210 }, { "epoch": 0.47330004963358735, "grad_norm": 0.4858364488861031, "learning_rate": 2.9261962692619625e-05, "loss": 0.5716, "step": 16211 }, { "epoch": 0.4733292458614347, "grad_norm": 0.5286705540749236, "learning_rate": 2.9260340632603407e-05, "loss": 0.6362, "step": 16212 }, { "epoch": 0.47335844208928207, "grad_norm": 0.5097320859819278, "learning_rate": 2.9258718572587185e-05, "loss": 0.589, "step": 16213 }, { "epoch": 0.47338763831712943, "grad_norm": 0.5205765793367566, "learning_rate": 2.9257096512570964e-05, "loss": 0.633, "step": 16214 }, { "epoch": 0.4734168345449768, "grad_norm": 0.47706227557955744, "learning_rate": 2.925547445255475e-05, "loss": 0.5649, "step": 16215 }, { "epoch": 0.47344603077282416, "grad_norm": 0.5256655303110696, "learning_rate": 2.9253852392538527e-05, "loss": 0.6336, "step": 16216 }, { "epoch": 0.4734752270006715, "grad_norm": 0.5483350079704892, "learning_rate": 2.9252230332522305e-05, "loss": 0.596, "step": 16217 }, { "epoch": 0.4735044232285189, "grad_norm": 0.4922942529178897, "learning_rate": 2.9250608272506087e-05, "loss": 0.5606, "step": 16218 }, { "epoch": 0.47353361945636624, "grad_norm": 0.5422940166640061, "learning_rate": 2.9248986212489866e-05, "loss": 0.679, "step": 16219 }, { "epoch": 0.4735628156842136, "grad_norm": 0.4943132486990071, "learning_rate": 2.9247364152473644e-05, "loss": 0.585, "step": 16220 }, { "epoch": 0.47359201191206096, "grad_norm": 0.5213777316854509, "learning_rate": 2.9245742092457422e-05, "loss": 0.6252, "step": 16221 }, { "epoch": 0.4736212081399083, "grad_norm": 0.5351675395252322, "learning_rate": 2.92441200324412e-05, "loss": 0.6502, "step": 16222 }, { "epoch": 0.4736504043677557, "grad_norm": 0.5526956216888123, "learning_rate": 2.9242497972424982e-05, "loss": 0.6614, "step": 16223 }, { "epoch": 0.47367960059560305, "grad_norm": 0.5253167873272518, "learning_rate": 2.924087591240876e-05, "loss": 0.6621, "step": 16224 }, { "epoch": 0.4737087968234504, "grad_norm": 0.5124513076642985, "learning_rate": 2.923925385239254e-05, "loss": 0.5953, "step": 16225 }, { "epoch": 0.47373799305129777, "grad_norm": 0.5253836975738286, "learning_rate": 2.9237631792376318e-05, "loss": 0.6205, "step": 16226 }, { "epoch": 0.47376718927914513, "grad_norm": 0.557701392494582, "learning_rate": 2.92360097323601e-05, "loss": 0.6621, "step": 16227 }, { "epoch": 0.4737963855069925, "grad_norm": 0.5386521694849424, "learning_rate": 2.9234387672343878e-05, "loss": 0.5896, "step": 16228 }, { "epoch": 0.47382558173483985, "grad_norm": 0.5000250656102414, "learning_rate": 2.9232765612327656e-05, "loss": 0.5605, "step": 16229 }, { "epoch": 0.4738547779626872, "grad_norm": 0.5051060761445074, "learning_rate": 2.9231143552311435e-05, "loss": 0.6078, "step": 16230 }, { "epoch": 0.4738839741905346, "grad_norm": 0.513668019012778, "learning_rate": 2.9229521492295213e-05, "loss": 0.5556, "step": 16231 }, { "epoch": 0.47391317041838193, "grad_norm": 0.5350040705069028, "learning_rate": 2.9227899432278995e-05, "loss": 0.6585, "step": 16232 }, { "epoch": 0.4739423666462293, "grad_norm": 0.5509002127643975, "learning_rate": 2.9226277372262773e-05, "loss": 0.6778, "step": 16233 }, { "epoch": 0.47397156287407666, "grad_norm": 0.5396017949547146, "learning_rate": 2.9224655312246558e-05, "loss": 0.6817, "step": 16234 }, { "epoch": 0.474000759101924, "grad_norm": 0.509816849282417, "learning_rate": 2.9223033252230337e-05, "loss": 0.5733, "step": 16235 }, { "epoch": 0.4740299553297714, "grad_norm": 0.5617213001863054, "learning_rate": 2.9221411192214115e-05, "loss": 0.7133, "step": 16236 }, { "epoch": 0.47405915155761874, "grad_norm": 0.5229666929279825, "learning_rate": 2.9219789132197893e-05, "loss": 0.6021, "step": 16237 }, { "epoch": 0.4740883477854661, "grad_norm": 0.4969116334854414, "learning_rate": 2.9218167072181675e-05, "loss": 0.5695, "step": 16238 }, { "epoch": 0.47411754401331346, "grad_norm": 0.48882646997736423, "learning_rate": 2.9216545012165453e-05, "loss": 0.5471, "step": 16239 }, { "epoch": 0.4741467402411608, "grad_norm": 0.530959519642566, "learning_rate": 2.9214922952149232e-05, "loss": 0.6362, "step": 16240 }, { "epoch": 0.4741759364690082, "grad_norm": 0.501205710742295, "learning_rate": 2.921330089213301e-05, "loss": 0.5469, "step": 16241 }, { "epoch": 0.47420513269685555, "grad_norm": 0.5541467394506976, "learning_rate": 2.921167883211679e-05, "loss": 0.6838, "step": 16242 }, { "epoch": 0.4742343289247029, "grad_norm": 0.544641761392252, "learning_rate": 2.921005677210057e-05, "loss": 0.7, "step": 16243 }, { "epoch": 0.47426352515255027, "grad_norm": 0.5330316332875429, "learning_rate": 2.920843471208435e-05, "loss": 0.6082, "step": 16244 }, { "epoch": 0.47429272138039763, "grad_norm": 0.5276152563881946, "learning_rate": 2.9206812652068127e-05, "loss": 0.6551, "step": 16245 }, { "epoch": 0.474321917608245, "grad_norm": 0.5400266721332292, "learning_rate": 2.9205190592051905e-05, "loss": 0.6319, "step": 16246 }, { "epoch": 0.47435111383609235, "grad_norm": 0.5900739614192234, "learning_rate": 2.9203568532035687e-05, "loss": 0.737, "step": 16247 }, { "epoch": 0.4743803100639397, "grad_norm": 0.5371454647859242, "learning_rate": 2.9201946472019466e-05, "loss": 0.6364, "step": 16248 }, { "epoch": 0.4744095062917871, "grad_norm": 0.5010761018069232, "learning_rate": 2.9200324412003244e-05, "loss": 0.5222, "step": 16249 }, { "epoch": 0.47443870251963444, "grad_norm": 0.5142901156146718, "learning_rate": 2.9198702351987022e-05, "loss": 0.625, "step": 16250 }, { "epoch": 0.4744678987474818, "grad_norm": 0.5411073787249163, "learning_rate": 2.91970802919708e-05, "loss": 0.695, "step": 16251 }, { "epoch": 0.4744970949753292, "grad_norm": 0.5021341176280608, "learning_rate": 2.9195458231954582e-05, "loss": 0.5352, "step": 16252 }, { "epoch": 0.4745262912031766, "grad_norm": 0.5158826030257488, "learning_rate": 2.9193836171938364e-05, "loss": 0.6294, "step": 16253 }, { "epoch": 0.47455548743102394, "grad_norm": 0.5257929601468866, "learning_rate": 2.9192214111922146e-05, "loss": 0.6658, "step": 16254 }, { "epoch": 0.4745846836588713, "grad_norm": 0.5178677230621437, "learning_rate": 2.9190592051905924e-05, "loss": 0.6122, "step": 16255 }, { "epoch": 0.47461387988671866, "grad_norm": 0.5231409352867995, "learning_rate": 2.9188969991889703e-05, "loss": 0.6174, "step": 16256 }, { "epoch": 0.474643076114566, "grad_norm": 0.5172161071264562, "learning_rate": 2.918734793187348e-05, "loss": 0.6036, "step": 16257 }, { "epoch": 0.4746722723424134, "grad_norm": 0.5498300221205048, "learning_rate": 2.9185725871857263e-05, "loss": 0.6526, "step": 16258 }, { "epoch": 0.47470146857026074, "grad_norm": 0.49614486340704744, "learning_rate": 2.918410381184104e-05, "loss": 0.5922, "step": 16259 }, { "epoch": 0.4747306647981081, "grad_norm": 0.5191798859243163, "learning_rate": 2.918248175182482e-05, "loss": 0.6203, "step": 16260 }, { "epoch": 0.47475986102595547, "grad_norm": 0.5464887221829224, "learning_rate": 2.9180859691808598e-05, "loss": 0.6802, "step": 16261 }, { "epoch": 0.4747890572538028, "grad_norm": 0.49594581095069995, "learning_rate": 2.9179237631792376e-05, "loss": 0.5858, "step": 16262 }, { "epoch": 0.4748182534816502, "grad_norm": 0.5548185872857306, "learning_rate": 2.9177615571776158e-05, "loss": 0.6498, "step": 16263 }, { "epoch": 0.47484744970949755, "grad_norm": 0.6337291785514143, "learning_rate": 2.9175993511759936e-05, "loss": 0.7937, "step": 16264 }, { "epoch": 0.4748766459373449, "grad_norm": 0.5340946339854641, "learning_rate": 2.9174371451743715e-05, "loss": 0.662, "step": 16265 }, { "epoch": 0.47490584216519227, "grad_norm": 0.5256184976292819, "learning_rate": 2.9172749391727493e-05, "loss": 0.6044, "step": 16266 }, { "epoch": 0.47493503839303963, "grad_norm": 0.5442924459260606, "learning_rate": 2.917112733171127e-05, "loss": 0.676, "step": 16267 }, { "epoch": 0.474964234620887, "grad_norm": 0.5096386372613738, "learning_rate": 2.9169505271695053e-05, "loss": 0.6249, "step": 16268 }, { "epoch": 0.47499343084873435, "grad_norm": 0.48559778846857365, "learning_rate": 2.9167883211678832e-05, "loss": 0.5206, "step": 16269 }, { "epoch": 0.4750226270765817, "grad_norm": 0.513167727896951, "learning_rate": 2.916626115166261e-05, "loss": 0.6222, "step": 16270 }, { "epoch": 0.4750518233044291, "grad_norm": 0.5145651232176945, "learning_rate": 2.916463909164639e-05, "loss": 0.6328, "step": 16271 }, { "epoch": 0.47508101953227644, "grad_norm": 0.5593559003607242, "learning_rate": 2.9163017031630174e-05, "loss": 0.6503, "step": 16272 }, { "epoch": 0.4751102157601238, "grad_norm": 0.5401855692393953, "learning_rate": 2.9161394971613952e-05, "loss": 0.6304, "step": 16273 }, { "epoch": 0.47513941198797116, "grad_norm": 0.5338570926687135, "learning_rate": 2.9159772911597734e-05, "loss": 0.6431, "step": 16274 }, { "epoch": 0.4751686082158185, "grad_norm": 0.5474043095208099, "learning_rate": 2.9158150851581512e-05, "loss": 0.6989, "step": 16275 }, { "epoch": 0.4751978044436659, "grad_norm": 0.5300177957155824, "learning_rate": 2.915652879156529e-05, "loss": 0.6846, "step": 16276 }, { "epoch": 0.47522700067151324, "grad_norm": 0.4966725086453041, "learning_rate": 2.915490673154907e-05, "loss": 0.5557, "step": 16277 }, { "epoch": 0.4752561968993606, "grad_norm": 0.48873128799922005, "learning_rate": 2.9153284671532847e-05, "loss": 0.5759, "step": 16278 }, { "epoch": 0.47528539312720797, "grad_norm": 0.5433847144203887, "learning_rate": 2.915166261151663e-05, "loss": 0.6548, "step": 16279 }, { "epoch": 0.47531458935505533, "grad_norm": 0.5300553209452753, "learning_rate": 2.9150040551500407e-05, "loss": 0.6131, "step": 16280 }, { "epoch": 0.4753437855829027, "grad_norm": 0.5084178004237624, "learning_rate": 2.9148418491484186e-05, "loss": 0.5901, "step": 16281 }, { "epoch": 0.47537298181075005, "grad_norm": 0.513056181867409, "learning_rate": 2.9146796431467964e-05, "loss": 0.5454, "step": 16282 }, { "epoch": 0.4754021780385974, "grad_norm": 0.5535882331262927, "learning_rate": 2.9145174371451746e-05, "loss": 0.6446, "step": 16283 }, { "epoch": 0.4754313742664448, "grad_norm": 0.5064677689075959, "learning_rate": 2.9143552311435524e-05, "loss": 0.5891, "step": 16284 }, { "epoch": 0.47546057049429213, "grad_norm": 0.6816130804293209, "learning_rate": 2.9141930251419303e-05, "loss": 0.7485, "step": 16285 }, { "epoch": 0.4754897667221395, "grad_norm": 0.48591218762267946, "learning_rate": 2.914030819140308e-05, "loss": 0.5681, "step": 16286 }, { "epoch": 0.47551896294998686, "grad_norm": 0.5108010971902149, "learning_rate": 2.913868613138686e-05, "loss": 0.5625, "step": 16287 }, { "epoch": 0.4755481591778342, "grad_norm": 0.5031580051159624, "learning_rate": 2.913706407137064e-05, "loss": 0.5842, "step": 16288 }, { "epoch": 0.4755773554056816, "grad_norm": 0.5504068443974464, "learning_rate": 2.913544201135442e-05, "loss": 0.6735, "step": 16289 }, { "epoch": 0.47560655163352894, "grad_norm": 0.5716920124582631, "learning_rate": 2.9133819951338198e-05, "loss": 0.7131, "step": 16290 }, { "epoch": 0.4756357478613763, "grad_norm": 0.5291767646461818, "learning_rate": 2.9132197891321983e-05, "loss": 0.6248, "step": 16291 }, { "epoch": 0.47566494408922366, "grad_norm": 0.5354103777969066, "learning_rate": 2.913057583130576e-05, "loss": 0.6292, "step": 16292 }, { "epoch": 0.475694140317071, "grad_norm": 0.5812059277257464, "learning_rate": 2.912895377128954e-05, "loss": 0.606, "step": 16293 }, { "epoch": 0.4757233365449184, "grad_norm": 0.5138688958018597, "learning_rate": 2.912733171127332e-05, "loss": 0.6002, "step": 16294 }, { "epoch": 0.47575253277276575, "grad_norm": 0.5294860604974903, "learning_rate": 2.91257096512571e-05, "loss": 0.6087, "step": 16295 }, { "epoch": 0.4757817290006131, "grad_norm": 0.5262083061444592, "learning_rate": 2.912408759124088e-05, "loss": 0.6207, "step": 16296 }, { "epoch": 0.47581092522846047, "grad_norm": 0.5422196186640291, "learning_rate": 2.9122465531224657e-05, "loss": 0.65, "step": 16297 }, { "epoch": 0.47584012145630783, "grad_norm": 0.5317074317610027, "learning_rate": 2.9120843471208435e-05, "loss": 0.6227, "step": 16298 }, { "epoch": 0.4758693176841552, "grad_norm": 0.5535394579876122, "learning_rate": 2.9119221411192217e-05, "loss": 0.6728, "step": 16299 }, { "epoch": 0.47589851391200255, "grad_norm": 0.5448979279495232, "learning_rate": 2.9117599351175995e-05, "loss": 0.6437, "step": 16300 }, { "epoch": 0.4759277101398499, "grad_norm": 0.4951606343850258, "learning_rate": 2.9115977291159774e-05, "loss": 0.5706, "step": 16301 }, { "epoch": 0.4759569063676973, "grad_norm": 0.4955935453796687, "learning_rate": 2.9114355231143552e-05, "loss": 0.5508, "step": 16302 }, { "epoch": 0.47598610259554464, "grad_norm": 0.5420325014830705, "learning_rate": 2.9112733171127334e-05, "loss": 0.7083, "step": 16303 }, { "epoch": 0.476015298823392, "grad_norm": 0.5152825867378868, "learning_rate": 2.9111111111111112e-05, "loss": 0.619, "step": 16304 }, { "epoch": 0.47604449505123936, "grad_norm": 0.5148837719664445, "learning_rate": 2.910948905109489e-05, "loss": 0.6071, "step": 16305 }, { "epoch": 0.4760736912790867, "grad_norm": 0.5180733451611703, "learning_rate": 2.910786699107867e-05, "loss": 0.6356, "step": 16306 }, { "epoch": 0.4761028875069341, "grad_norm": 0.5605003567639794, "learning_rate": 2.9106244931062447e-05, "loss": 0.6802, "step": 16307 }, { "epoch": 0.47613208373478144, "grad_norm": 0.5145730780461151, "learning_rate": 2.910462287104623e-05, "loss": 0.5911, "step": 16308 }, { "epoch": 0.4761612799626288, "grad_norm": 0.527012950076643, "learning_rate": 2.9103000811030007e-05, "loss": 0.6164, "step": 16309 }, { "epoch": 0.47619047619047616, "grad_norm": 0.5133346228452902, "learning_rate": 2.9101378751013793e-05, "loss": 0.6031, "step": 16310 }, { "epoch": 0.4762196724183235, "grad_norm": 0.5286191641666178, "learning_rate": 2.909975669099757e-05, "loss": 0.638, "step": 16311 }, { "epoch": 0.47624886864617094, "grad_norm": 0.5460544545427941, "learning_rate": 2.909813463098135e-05, "loss": 0.6041, "step": 16312 }, { "epoch": 0.4762780648740183, "grad_norm": 0.5765292343227063, "learning_rate": 2.9096512570965128e-05, "loss": 0.695, "step": 16313 }, { "epoch": 0.47630726110186566, "grad_norm": 0.5225218649958877, "learning_rate": 2.909489051094891e-05, "loss": 0.6407, "step": 16314 }, { "epoch": 0.476336457329713, "grad_norm": 0.5304257714089508, "learning_rate": 2.9093268450932688e-05, "loss": 0.6206, "step": 16315 }, { "epoch": 0.4763656535575604, "grad_norm": 0.5237462321283014, "learning_rate": 2.9091646390916466e-05, "loss": 0.632, "step": 16316 }, { "epoch": 0.47639484978540775, "grad_norm": 0.5193576823561897, "learning_rate": 2.9090024330900245e-05, "loss": 0.5941, "step": 16317 }, { "epoch": 0.4764240460132551, "grad_norm": 0.5078293503577026, "learning_rate": 2.9088402270884023e-05, "loss": 0.5537, "step": 16318 }, { "epoch": 0.47645324224110247, "grad_norm": 0.5204842494337366, "learning_rate": 2.9086780210867805e-05, "loss": 0.6154, "step": 16319 }, { "epoch": 0.47648243846894983, "grad_norm": 0.5697392020302166, "learning_rate": 2.9085158150851583e-05, "loss": 0.7303, "step": 16320 }, { "epoch": 0.4765116346967972, "grad_norm": 0.5162646321546281, "learning_rate": 2.908353609083536e-05, "loss": 0.6423, "step": 16321 }, { "epoch": 0.47654083092464455, "grad_norm": 0.5057120437807912, "learning_rate": 2.908191403081914e-05, "loss": 0.5661, "step": 16322 }, { "epoch": 0.4765700271524919, "grad_norm": 0.5707954241836964, "learning_rate": 2.9080291970802918e-05, "loss": 0.7087, "step": 16323 }, { "epoch": 0.4765992233803393, "grad_norm": 0.504034925639294, "learning_rate": 2.90786699107867e-05, "loss": 0.5806, "step": 16324 }, { "epoch": 0.47662841960818664, "grad_norm": 0.5760015714739506, "learning_rate": 2.9077047850770478e-05, "loss": 0.6934, "step": 16325 }, { "epoch": 0.476657615836034, "grad_norm": 0.49411563330029296, "learning_rate": 2.9075425790754257e-05, "loss": 0.5465, "step": 16326 }, { "epoch": 0.47668681206388136, "grad_norm": 0.5344322330214585, "learning_rate": 2.9073803730738035e-05, "loss": 0.6237, "step": 16327 }, { "epoch": 0.4767160082917287, "grad_norm": 0.5537233685440409, "learning_rate": 2.907218167072182e-05, "loss": 0.6601, "step": 16328 }, { "epoch": 0.4767452045195761, "grad_norm": 0.5353594612314652, "learning_rate": 2.90705596107056e-05, "loss": 0.6745, "step": 16329 }, { "epoch": 0.47677440074742344, "grad_norm": 0.505290657623289, "learning_rate": 2.906893755068938e-05, "loss": 0.541, "step": 16330 }, { "epoch": 0.4768035969752708, "grad_norm": 0.5042895006563859, "learning_rate": 2.906731549067316e-05, "loss": 0.563, "step": 16331 }, { "epoch": 0.47683279320311817, "grad_norm": 0.547057925473927, "learning_rate": 2.9065693430656937e-05, "loss": 0.6498, "step": 16332 }, { "epoch": 0.4768619894309655, "grad_norm": 0.5388742622052207, "learning_rate": 2.9064071370640715e-05, "loss": 0.6496, "step": 16333 }, { "epoch": 0.4768911856588129, "grad_norm": 0.5558760972569453, "learning_rate": 2.9062449310624494e-05, "loss": 0.6526, "step": 16334 }, { "epoch": 0.47692038188666025, "grad_norm": 0.5218829519564878, "learning_rate": 2.9060827250608276e-05, "loss": 0.6453, "step": 16335 }, { "epoch": 0.4769495781145076, "grad_norm": 0.5094983856820658, "learning_rate": 2.9059205190592054e-05, "loss": 0.5516, "step": 16336 }, { "epoch": 0.47697877434235497, "grad_norm": 0.49798167653521436, "learning_rate": 2.9057583130575832e-05, "loss": 0.5567, "step": 16337 }, { "epoch": 0.47700797057020233, "grad_norm": 0.5269198056252826, "learning_rate": 2.905596107055961e-05, "loss": 0.5789, "step": 16338 }, { "epoch": 0.4770371667980497, "grad_norm": 0.5275927700430765, "learning_rate": 2.9054339010543392e-05, "loss": 0.5856, "step": 16339 }, { "epoch": 0.47706636302589706, "grad_norm": 0.5121239519020915, "learning_rate": 2.905271695052717e-05, "loss": 0.5928, "step": 16340 }, { "epoch": 0.4770955592537444, "grad_norm": 0.4855344254541104, "learning_rate": 2.905109489051095e-05, "loss": 0.5559, "step": 16341 }, { "epoch": 0.4771247554815918, "grad_norm": 0.5061729542711778, "learning_rate": 2.9049472830494728e-05, "loss": 0.5527, "step": 16342 }, { "epoch": 0.47715395170943914, "grad_norm": 0.5084355999753304, "learning_rate": 2.9047850770478506e-05, "loss": 0.5879, "step": 16343 }, { "epoch": 0.4771831479372865, "grad_norm": 0.5527550603975415, "learning_rate": 2.9046228710462288e-05, "loss": 0.6564, "step": 16344 }, { "epoch": 0.47721234416513386, "grad_norm": 0.522197870435522, "learning_rate": 2.9044606650446066e-05, "loss": 0.618, "step": 16345 }, { "epoch": 0.4772415403929812, "grad_norm": 0.5446822065088028, "learning_rate": 2.9042984590429844e-05, "loss": 0.6239, "step": 16346 }, { "epoch": 0.4772707366208286, "grad_norm": 0.5049788997943457, "learning_rate": 2.904136253041363e-05, "loss": 0.5845, "step": 16347 }, { "epoch": 0.47729993284867595, "grad_norm": 0.5486281939539486, "learning_rate": 2.9039740470397408e-05, "loss": 0.6826, "step": 16348 }, { "epoch": 0.4773291290765233, "grad_norm": 0.5395051679398021, "learning_rate": 2.9038118410381186e-05, "loss": 0.6198, "step": 16349 }, { "epoch": 0.47735832530437067, "grad_norm": 0.5066799126884516, "learning_rate": 2.9036496350364968e-05, "loss": 0.6144, "step": 16350 }, { "epoch": 0.47738752153221803, "grad_norm": 0.5161286334563441, "learning_rate": 2.9034874290348746e-05, "loss": 0.5739, "step": 16351 }, { "epoch": 0.4774167177600654, "grad_norm": 0.5378043348061514, "learning_rate": 2.9033252230332525e-05, "loss": 0.5839, "step": 16352 }, { "epoch": 0.47744591398791275, "grad_norm": 0.5320101809549613, "learning_rate": 2.9031630170316303e-05, "loss": 0.5923, "step": 16353 }, { "epoch": 0.4774751102157601, "grad_norm": 0.5174534564344481, "learning_rate": 2.903000811030008e-05, "loss": 0.6033, "step": 16354 }, { "epoch": 0.4775043064436075, "grad_norm": 0.5330343762720113, "learning_rate": 2.9028386050283863e-05, "loss": 0.6284, "step": 16355 }, { "epoch": 0.47753350267145483, "grad_norm": 0.5699038462794647, "learning_rate": 2.9026763990267642e-05, "loss": 0.5969, "step": 16356 }, { "epoch": 0.4775626988993022, "grad_norm": 0.5788771547235753, "learning_rate": 2.902514193025142e-05, "loss": 0.5856, "step": 16357 }, { "epoch": 0.47759189512714956, "grad_norm": 0.5784565007190247, "learning_rate": 2.90235198702352e-05, "loss": 0.6517, "step": 16358 }, { "epoch": 0.4776210913549969, "grad_norm": 0.5284092287471319, "learning_rate": 2.902189781021898e-05, "loss": 0.6253, "step": 16359 }, { "epoch": 0.4776502875828443, "grad_norm": 0.49356153295303584, "learning_rate": 2.902027575020276e-05, "loss": 0.5772, "step": 16360 }, { "epoch": 0.47767948381069164, "grad_norm": 0.5357872824513654, "learning_rate": 2.9018653690186537e-05, "loss": 0.6273, "step": 16361 }, { "epoch": 0.477708680038539, "grad_norm": 0.5802251473634651, "learning_rate": 2.9017031630170315e-05, "loss": 0.6759, "step": 16362 }, { "epoch": 0.47773787626638636, "grad_norm": 0.5216436863051873, "learning_rate": 2.9015409570154094e-05, "loss": 0.6253, "step": 16363 }, { "epoch": 0.4777670724942337, "grad_norm": 0.5306182508298533, "learning_rate": 2.9013787510137876e-05, "loss": 0.6381, "step": 16364 }, { "epoch": 0.4777962687220811, "grad_norm": 0.4986189766586418, "learning_rate": 2.9012165450121654e-05, "loss": 0.5431, "step": 16365 }, { "epoch": 0.47782546494992845, "grad_norm": 0.5390520029308199, "learning_rate": 2.901054339010544e-05, "loss": 0.6292, "step": 16366 }, { "epoch": 0.4778546611777758, "grad_norm": 0.5718917153549877, "learning_rate": 2.9008921330089217e-05, "loss": 0.6749, "step": 16367 }, { "epoch": 0.47788385740562317, "grad_norm": 0.6050875518826675, "learning_rate": 2.9007299270072996e-05, "loss": 0.7803, "step": 16368 }, { "epoch": 0.47791305363347053, "grad_norm": 0.4748204000928858, "learning_rate": 2.9005677210056774e-05, "loss": 0.5242, "step": 16369 }, { "epoch": 0.4779422498613179, "grad_norm": 0.5593117699330743, "learning_rate": 2.9004055150040556e-05, "loss": 0.6755, "step": 16370 }, { "epoch": 0.47797144608916525, "grad_norm": 0.5096462701701774, "learning_rate": 2.9002433090024334e-05, "loss": 0.5884, "step": 16371 }, { "epoch": 0.47800064231701267, "grad_norm": 0.4995728666534035, "learning_rate": 2.9000811030008113e-05, "loss": 0.5849, "step": 16372 }, { "epoch": 0.47802983854486003, "grad_norm": 0.5648663921539802, "learning_rate": 2.899918896999189e-05, "loss": 0.6914, "step": 16373 }, { "epoch": 0.4780590347727074, "grad_norm": 0.5440996649950987, "learning_rate": 2.899756690997567e-05, "loss": 0.6386, "step": 16374 }, { "epoch": 0.47808823100055475, "grad_norm": 0.4953323999875216, "learning_rate": 2.899594484995945e-05, "loss": 0.5659, "step": 16375 }, { "epoch": 0.4781174272284021, "grad_norm": 0.5313586061936648, "learning_rate": 2.899432278994323e-05, "loss": 0.614, "step": 16376 }, { "epoch": 0.4781466234562495, "grad_norm": 0.582252624166532, "learning_rate": 2.8992700729927008e-05, "loss": 0.656, "step": 16377 }, { "epoch": 0.47817581968409684, "grad_norm": 0.5422937263730694, "learning_rate": 2.8991078669910786e-05, "loss": 0.7138, "step": 16378 }, { "epoch": 0.4782050159119442, "grad_norm": 0.5631280847496527, "learning_rate": 2.8989456609894565e-05, "loss": 0.7232, "step": 16379 }, { "epoch": 0.47823421213979156, "grad_norm": 0.5408518493680834, "learning_rate": 2.8987834549878346e-05, "loss": 0.6506, "step": 16380 }, { "epoch": 0.4782634083676389, "grad_norm": 0.5100614086562771, "learning_rate": 2.8986212489862125e-05, "loss": 0.5695, "step": 16381 }, { "epoch": 0.4782926045954863, "grad_norm": 0.5192767699249571, "learning_rate": 2.8984590429845903e-05, "loss": 0.5901, "step": 16382 }, { "epoch": 0.47832180082333364, "grad_norm": 0.5146825139883563, "learning_rate": 2.898296836982968e-05, "loss": 0.6225, "step": 16383 }, { "epoch": 0.478350997051181, "grad_norm": 0.5230967566638595, "learning_rate": 2.8981346309813463e-05, "loss": 0.5603, "step": 16384 }, { "epoch": 0.47838019327902837, "grad_norm": 0.518806256197688, "learning_rate": 2.8979724249797245e-05, "loss": 0.592, "step": 16385 }, { "epoch": 0.4784093895068757, "grad_norm": 0.5930004121558206, "learning_rate": 2.8978102189781027e-05, "loss": 0.6895, "step": 16386 }, { "epoch": 0.4784385857347231, "grad_norm": 0.5282648055864476, "learning_rate": 2.8976480129764805e-05, "loss": 0.6284, "step": 16387 }, { "epoch": 0.47846778196257045, "grad_norm": 0.5218490852404607, "learning_rate": 2.8974858069748584e-05, "loss": 0.6363, "step": 16388 }, { "epoch": 0.4784969781904178, "grad_norm": 0.530841531064941, "learning_rate": 2.8973236009732362e-05, "loss": 0.6255, "step": 16389 }, { "epoch": 0.47852617441826517, "grad_norm": 0.5461517021379372, "learning_rate": 2.8971613949716144e-05, "loss": 0.5786, "step": 16390 }, { "epoch": 0.47855537064611253, "grad_norm": 0.5523158505166921, "learning_rate": 2.8969991889699922e-05, "loss": 0.6194, "step": 16391 }, { "epoch": 0.4785845668739599, "grad_norm": 0.5109890993459687, "learning_rate": 2.89683698296837e-05, "loss": 0.6093, "step": 16392 }, { "epoch": 0.47861376310180725, "grad_norm": 0.51121896170337, "learning_rate": 2.896674776966748e-05, "loss": 0.6112, "step": 16393 }, { "epoch": 0.4786429593296546, "grad_norm": 0.5389147793267508, "learning_rate": 2.8965125709651257e-05, "loss": 0.6121, "step": 16394 }, { "epoch": 0.478672155557502, "grad_norm": 0.562587986400191, "learning_rate": 2.896350364963504e-05, "loss": 0.6775, "step": 16395 }, { "epoch": 0.47870135178534934, "grad_norm": 0.5138804847837958, "learning_rate": 2.8961881589618817e-05, "loss": 0.6088, "step": 16396 }, { "epoch": 0.4787305480131967, "grad_norm": 0.5333321206222497, "learning_rate": 2.8960259529602596e-05, "loss": 0.6233, "step": 16397 }, { "epoch": 0.47875974424104406, "grad_norm": 0.5218075463331742, "learning_rate": 2.8958637469586374e-05, "loss": 0.6404, "step": 16398 }, { "epoch": 0.4787889404688914, "grad_norm": 0.5512757812160767, "learning_rate": 2.8957015409570152e-05, "loss": 0.6948, "step": 16399 }, { "epoch": 0.4788181366967388, "grad_norm": 0.4889879526900514, "learning_rate": 2.8955393349553934e-05, "loss": 0.5436, "step": 16400 }, { "epoch": 0.47884733292458614, "grad_norm": 0.5621377948802174, "learning_rate": 2.8953771289537713e-05, "loss": 0.694, "step": 16401 }, { "epoch": 0.4788765291524335, "grad_norm": 0.49982844790568015, "learning_rate": 2.895214922952149e-05, "loss": 0.6028, "step": 16402 }, { "epoch": 0.47890572538028087, "grad_norm": 0.5260885719276269, "learning_rate": 2.895052716950527e-05, "loss": 0.6364, "step": 16403 }, { "epoch": 0.47893492160812823, "grad_norm": 0.5395222077571754, "learning_rate": 2.8948905109489055e-05, "loss": 0.6545, "step": 16404 }, { "epoch": 0.4789641178359756, "grad_norm": 0.5441975442059056, "learning_rate": 2.8947283049472833e-05, "loss": 0.6587, "step": 16405 }, { "epoch": 0.47899331406382295, "grad_norm": 0.5163637854302643, "learning_rate": 2.8945660989456615e-05, "loss": 0.5828, "step": 16406 }, { "epoch": 0.4790225102916703, "grad_norm": 0.505234164514243, "learning_rate": 2.8944038929440393e-05, "loss": 0.5565, "step": 16407 }, { "epoch": 0.4790517065195177, "grad_norm": 0.5624028020178969, "learning_rate": 2.894241686942417e-05, "loss": 0.6492, "step": 16408 }, { "epoch": 0.47908090274736503, "grad_norm": 0.524417982357541, "learning_rate": 2.894079480940795e-05, "loss": 0.6268, "step": 16409 }, { "epoch": 0.4791100989752124, "grad_norm": 0.5566151353378597, "learning_rate": 2.8939172749391728e-05, "loss": 0.6515, "step": 16410 }, { "epoch": 0.47913929520305976, "grad_norm": 0.5546739835043886, "learning_rate": 2.893755068937551e-05, "loss": 0.6377, "step": 16411 }, { "epoch": 0.4791684914309071, "grad_norm": 0.5365992271240796, "learning_rate": 2.8935928629359288e-05, "loss": 0.6543, "step": 16412 }, { "epoch": 0.4791976876587545, "grad_norm": 0.541107005781674, "learning_rate": 2.8934306569343067e-05, "loss": 0.5969, "step": 16413 }, { "epoch": 0.47922688388660184, "grad_norm": 0.46889280404835193, "learning_rate": 2.8932684509326845e-05, "loss": 0.5009, "step": 16414 }, { "epoch": 0.4792560801144492, "grad_norm": 0.5011654658961698, "learning_rate": 2.8931062449310627e-05, "loss": 0.5548, "step": 16415 }, { "epoch": 0.47928527634229656, "grad_norm": 0.5386168073092478, "learning_rate": 2.8929440389294405e-05, "loss": 0.6686, "step": 16416 }, { "epoch": 0.4793144725701439, "grad_norm": 0.5621890037313378, "learning_rate": 2.8927818329278184e-05, "loss": 0.7098, "step": 16417 }, { "epoch": 0.4793436687979913, "grad_norm": 0.5219360703734214, "learning_rate": 2.8926196269261962e-05, "loss": 0.5876, "step": 16418 }, { "epoch": 0.47937286502583865, "grad_norm": 0.5146428272167782, "learning_rate": 2.892457420924574e-05, "loss": 0.597, "step": 16419 }, { "epoch": 0.479402061253686, "grad_norm": 0.5550398099914543, "learning_rate": 2.8922952149229522e-05, "loss": 0.6581, "step": 16420 }, { "epoch": 0.47943125748153337, "grad_norm": 0.521320666189883, "learning_rate": 2.89213300892133e-05, "loss": 0.6357, "step": 16421 }, { "epoch": 0.47946045370938073, "grad_norm": 0.5482377792272878, "learning_rate": 2.891970802919708e-05, "loss": 0.6544, "step": 16422 }, { "epoch": 0.4794896499372281, "grad_norm": 0.5558801863639546, "learning_rate": 2.8918085969180864e-05, "loss": 0.6046, "step": 16423 }, { "epoch": 0.47951884616507545, "grad_norm": 0.5525534483432941, "learning_rate": 2.8916463909164642e-05, "loss": 0.6525, "step": 16424 }, { "epoch": 0.4795480423929228, "grad_norm": 0.5366543361708765, "learning_rate": 2.891484184914842e-05, "loss": 0.6471, "step": 16425 }, { "epoch": 0.4795772386207702, "grad_norm": 0.5368939543913203, "learning_rate": 2.8913219789132202e-05, "loss": 0.6483, "step": 16426 }, { "epoch": 0.47960643484861754, "grad_norm": 0.4824820586894495, "learning_rate": 2.891159772911598e-05, "loss": 0.5453, "step": 16427 }, { "epoch": 0.4796356310764649, "grad_norm": 0.5102618848208857, "learning_rate": 2.890997566909976e-05, "loss": 0.5837, "step": 16428 }, { "epoch": 0.47966482730431226, "grad_norm": 0.5330398835607761, "learning_rate": 2.8908353609083538e-05, "loss": 0.638, "step": 16429 }, { "epoch": 0.4796940235321596, "grad_norm": 0.5144394545941773, "learning_rate": 2.8906731549067316e-05, "loss": 0.5929, "step": 16430 }, { "epoch": 0.479723219760007, "grad_norm": 0.5509364116785641, "learning_rate": 2.8905109489051098e-05, "loss": 0.6093, "step": 16431 }, { "epoch": 0.47975241598785434, "grad_norm": 0.5820696886820472, "learning_rate": 2.8903487429034876e-05, "loss": 0.6701, "step": 16432 }, { "epoch": 0.47978161221570176, "grad_norm": 0.48025403097486286, "learning_rate": 2.8901865369018654e-05, "loss": 0.5439, "step": 16433 }, { "epoch": 0.4798108084435491, "grad_norm": 0.4892905201356896, "learning_rate": 2.8900243309002433e-05, "loss": 0.5712, "step": 16434 }, { "epoch": 0.4798400046713965, "grad_norm": 0.4987513129979521, "learning_rate": 2.8898621248986215e-05, "loss": 0.5858, "step": 16435 }, { "epoch": 0.47986920089924384, "grad_norm": 0.5023253586600624, "learning_rate": 2.8896999188969993e-05, "loss": 0.5856, "step": 16436 }, { "epoch": 0.4798983971270912, "grad_norm": 0.495203112478467, "learning_rate": 2.889537712895377e-05, "loss": 0.6061, "step": 16437 }, { "epoch": 0.47992759335493856, "grad_norm": 0.49814163331951233, "learning_rate": 2.889375506893755e-05, "loss": 0.5817, "step": 16438 }, { "epoch": 0.4799567895827859, "grad_norm": 0.5343279057468733, "learning_rate": 2.8892133008921328e-05, "loss": 0.6332, "step": 16439 }, { "epoch": 0.4799859858106333, "grad_norm": 0.5161459123101516, "learning_rate": 2.889051094890511e-05, "loss": 0.569, "step": 16440 }, { "epoch": 0.48001518203848065, "grad_norm": 0.5220421545658955, "learning_rate": 2.8888888888888888e-05, "loss": 0.6146, "step": 16441 }, { "epoch": 0.480044378266328, "grad_norm": 0.532093640169964, "learning_rate": 2.8887266828872673e-05, "loss": 0.664, "step": 16442 }, { "epoch": 0.48007357449417537, "grad_norm": 0.520820724130787, "learning_rate": 2.8885644768856452e-05, "loss": 0.6222, "step": 16443 }, { "epoch": 0.48010277072202273, "grad_norm": 0.5009288423608362, "learning_rate": 2.888402270884023e-05, "loss": 0.6235, "step": 16444 }, { "epoch": 0.4801319669498701, "grad_norm": 0.5012391742689235, "learning_rate": 2.888240064882401e-05, "loss": 0.5784, "step": 16445 }, { "epoch": 0.48016116317771745, "grad_norm": 0.5171522782576804, "learning_rate": 2.888077858880779e-05, "loss": 0.607, "step": 16446 }, { "epoch": 0.4801903594055648, "grad_norm": 0.5200867977313933, "learning_rate": 2.887915652879157e-05, "loss": 0.6153, "step": 16447 }, { "epoch": 0.4802195556334122, "grad_norm": 0.5096707691431642, "learning_rate": 2.8877534468775347e-05, "loss": 0.5772, "step": 16448 }, { "epoch": 0.48024875186125954, "grad_norm": 0.48661654708822494, "learning_rate": 2.8875912408759125e-05, "loss": 0.5701, "step": 16449 }, { "epoch": 0.4802779480891069, "grad_norm": 0.5136480312083703, "learning_rate": 2.8874290348742904e-05, "loss": 0.5987, "step": 16450 }, { "epoch": 0.48030714431695426, "grad_norm": 0.4880055204897672, "learning_rate": 2.8872668288726686e-05, "loss": 0.5561, "step": 16451 }, { "epoch": 0.4803363405448016, "grad_norm": 0.5415761828223239, "learning_rate": 2.8871046228710464e-05, "loss": 0.6319, "step": 16452 }, { "epoch": 0.480365536772649, "grad_norm": 0.5263387548483458, "learning_rate": 2.8869424168694242e-05, "loss": 0.6102, "step": 16453 }, { "epoch": 0.48039473300049634, "grad_norm": 0.4876671816518919, "learning_rate": 2.886780210867802e-05, "loss": 0.5399, "step": 16454 }, { "epoch": 0.4804239292283437, "grad_norm": 0.49494184131744434, "learning_rate": 2.88661800486618e-05, "loss": 0.5532, "step": 16455 }, { "epoch": 0.48045312545619107, "grad_norm": 0.4911620900728184, "learning_rate": 2.886455798864558e-05, "loss": 0.5232, "step": 16456 }, { "epoch": 0.4804823216840384, "grad_norm": 0.5243355427823724, "learning_rate": 2.886293592862936e-05, "loss": 0.6213, "step": 16457 }, { "epoch": 0.4805115179118858, "grad_norm": 0.49261426725035173, "learning_rate": 2.8861313868613138e-05, "loss": 0.5145, "step": 16458 }, { "epoch": 0.48054071413973315, "grad_norm": 0.5284850172050979, "learning_rate": 2.8859691808596916e-05, "loss": 0.6068, "step": 16459 }, { "epoch": 0.4805699103675805, "grad_norm": 0.5076342951207105, "learning_rate": 2.8858069748580698e-05, "loss": 0.558, "step": 16460 }, { "epoch": 0.48059910659542787, "grad_norm": 0.5609292442904253, "learning_rate": 2.885644768856448e-05, "loss": 0.7226, "step": 16461 }, { "epoch": 0.48062830282327523, "grad_norm": 0.5118762301840297, "learning_rate": 2.885482562854826e-05, "loss": 0.5899, "step": 16462 }, { "epoch": 0.4806574990511226, "grad_norm": 0.5419684407679456, "learning_rate": 2.885320356853204e-05, "loss": 0.6333, "step": 16463 }, { "epoch": 0.48068669527896996, "grad_norm": 0.568229037921359, "learning_rate": 2.8851581508515818e-05, "loss": 0.7023, "step": 16464 }, { "epoch": 0.4807158915068173, "grad_norm": 0.5114369509552088, "learning_rate": 2.8849959448499596e-05, "loss": 0.5829, "step": 16465 }, { "epoch": 0.4807450877346647, "grad_norm": 0.5605997518278443, "learning_rate": 2.8848337388483375e-05, "loss": 0.6787, "step": 16466 }, { "epoch": 0.48077428396251204, "grad_norm": 0.5542955280416214, "learning_rate": 2.8846715328467156e-05, "loss": 0.6777, "step": 16467 }, { "epoch": 0.4808034801903594, "grad_norm": 0.5297581860644048, "learning_rate": 2.8845093268450935e-05, "loss": 0.6029, "step": 16468 }, { "epoch": 0.48083267641820676, "grad_norm": 0.5449302503784205, "learning_rate": 2.8843471208434713e-05, "loss": 0.6415, "step": 16469 }, { "epoch": 0.4808618726460541, "grad_norm": 0.5253875912448627, "learning_rate": 2.884184914841849e-05, "loss": 0.6678, "step": 16470 }, { "epoch": 0.4808910688739015, "grad_norm": 0.4967899988667824, "learning_rate": 2.8840227088402273e-05, "loss": 0.5329, "step": 16471 }, { "epoch": 0.48092026510174885, "grad_norm": 0.5190099304584256, "learning_rate": 2.8838605028386052e-05, "loss": 0.6177, "step": 16472 }, { "epoch": 0.4809494613295962, "grad_norm": 0.5827217696618062, "learning_rate": 2.883698296836983e-05, "loss": 0.6757, "step": 16473 }, { "epoch": 0.48097865755744357, "grad_norm": 0.5291817998396925, "learning_rate": 2.883536090835361e-05, "loss": 0.5722, "step": 16474 }, { "epoch": 0.48100785378529093, "grad_norm": 0.5028626143587777, "learning_rate": 2.8833738848337387e-05, "loss": 0.5575, "step": 16475 }, { "epoch": 0.4810370500131383, "grad_norm": 0.5339957207023404, "learning_rate": 2.883211678832117e-05, "loss": 0.6007, "step": 16476 }, { "epoch": 0.48106624624098565, "grad_norm": 0.6186915976134485, "learning_rate": 2.8830494728304947e-05, "loss": 0.5951, "step": 16477 }, { "epoch": 0.481095442468833, "grad_norm": 0.5456860406791226, "learning_rate": 2.8828872668288725e-05, "loss": 0.6571, "step": 16478 }, { "epoch": 0.4811246386966804, "grad_norm": 0.5169344110279592, "learning_rate": 2.882725060827251e-05, "loss": 0.6191, "step": 16479 }, { "epoch": 0.48115383492452773, "grad_norm": 0.539955235737439, "learning_rate": 2.882562854825629e-05, "loss": 0.6602, "step": 16480 }, { "epoch": 0.4811830311523751, "grad_norm": 0.5528731446484235, "learning_rate": 2.8824006488240067e-05, "loss": 0.6279, "step": 16481 }, { "epoch": 0.48121222738022246, "grad_norm": 0.5118622298516114, "learning_rate": 2.882238442822385e-05, "loss": 0.5945, "step": 16482 }, { "epoch": 0.4812414236080698, "grad_norm": 0.5259718097300938, "learning_rate": 2.8820762368207627e-05, "loss": 0.6026, "step": 16483 }, { "epoch": 0.4812706198359172, "grad_norm": 0.5173393064613582, "learning_rate": 2.8819140308191406e-05, "loss": 0.6605, "step": 16484 }, { "epoch": 0.48129981606376454, "grad_norm": 0.528961951996096, "learning_rate": 2.8817518248175184e-05, "loss": 0.6596, "step": 16485 }, { "epoch": 0.4813290122916119, "grad_norm": 0.5401383070693653, "learning_rate": 2.8815896188158963e-05, "loss": 0.6318, "step": 16486 }, { "epoch": 0.48135820851945926, "grad_norm": 0.5527513256117781, "learning_rate": 2.8814274128142744e-05, "loss": 0.6448, "step": 16487 }, { "epoch": 0.4813874047473066, "grad_norm": 0.5256989079602082, "learning_rate": 2.8812652068126523e-05, "loss": 0.6366, "step": 16488 }, { "epoch": 0.481416600975154, "grad_norm": 0.5508948201418981, "learning_rate": 2.88110300081103e-05, "loss": 0.666, "step": 16489 }, { "epoch": 0.48144579720300135, "grad_norm": 0.5041639886657531, "learning_rate": 2.880940794809408e-05, "loss": 0.6296, "step": 16490 }, { "epoch": 0.4814749934308487, "grad_norm": 0.5446089126523127, "learning_rate": 2.880778588807786e-05, "loss": 0.598, "step": 16491 }, { "epoch": 0.48150418965869607, "grad_norm": 0.5028543502113758, "learning_rate": 2.880616382806164e-05, "loss": 0.5588, "step": 16492 }, { "epoch": 0.4815333858865435, "grad_norm": 0.516231609333659, "learning_rate": 2.8804541768045418e-05, "loss": 0.5854, "step": 16493 }, { "epoch": 0.48156258211439085, "grad_norm": 0.48468047165853534, "learning_rate": 2.8802919708029196e-05, "loss": 0.5413, "step": 16494 }, { "epoch": 0.4815917783422382, "grad_norm": 0.49527541648447404, "learning_rate": 2.8801297648012975e-05, "loss": 0.5683, "step": 16495 }, { "epoch": 0.48162097457008557, "grad_norm": 0.5354774035266041, "learning_rate": 2.8799675587996756e-05, "loss": 0.6649, "step": 16496 }, { "epoch": 0.48165017079793293, "grad_norm": 0.5331878641742913, "learning_rate": 2.8798053527980535e-05, "loss": 0.663, "step": 16497 }, { "epoch": 0.4816793670257803, "grad_norm": 0.527272575192118, "learning_rate": 2.879643146796432e-05, "loss": 0.5954, "step": 16498 }, { "epoch": 0.48170856325362765, "grad_norm": 0.5174652431390382, "learning_rate": 2.8794809407948098e-05, "loss": 0.6052, "step": 16499 }, { "epoch": 0.481737759481475, "grad_norm": 0.5488394614468235, "learning_rate": 2.8793187347931877e-05, "loss": 0.6661, "step": 16500 }, { "epoch": 0.4817669557093224, "grad_norm": 0.5528460015594255, "learning_rate": 2.8791565287915655e-05, "loss": 0.7091, "step": 16501 }, { "epoch": 0.48179615193716974, "grad_norm": 0.48351141085068605, "learning_rate": 2.8789943227899437e-05, "loss": 0.5422, "step": 16502 }, { "epoch": 0.4818253481650171, "grad_norm": 0.4765303913437088, "learning_rate": 2.8788321167883215e-05, "loss": 0.5279, "step": 16503 }, { "epoch": 0.48185454439286446, "grad_norm": 0.5366668835333235, "learning_rate": 2.8786699107866994e-05, "loss": 0.5482, "step": 16504 }, { "epoch": 0.4818837406207118, "grad_norm": 0.5286337083769924, "learning_rate": 2.8785077047850772e-05, "loss": 0.6323, "step": 16505 }, { "epoch": 0.4819129368485592, "grad_norm": 0.5035163854814999, "learning_rate": 2.878345498783455e-05, "loss": 0.5483, "step": 16506 }, { "epoch": 0.48194213307640654, "grad_norm": 0.5186863287839386, "learning_rate": 2.8781832927818332e-05, "loss": 0.5946, "step": 16507 }, { "epoch": 0.4819713293042539, "grad_norm": 0.5427923349272974, "learning_rate": 2.878021086780211e-05, "loss": 0.6356, "step": 16508 }, { "epoch": 0.48200052553210126, "grad_norm": 0.5354825259086521, "learning_rate": 2.877858880778589e-05, "loss": 0.6245, "step": 16509 }, { "epoch": 0.4820297217599486, "grad_norm": 0.5152966532965041, "learning_rate": 2.8776966747769667e-05, "loss": 0.5757, "step": 16510 }, { "epoch": 0.482058917987796, "grad_norm": 0.5313224002956134, "learning_rate": 2.8775344687753446e-05, "loss": 0.5574, "step": 16511 }, { "epoch": 0.48208811421564335, "grad_norm": 0.5363465476254327, "learning_rate": 2.8773722627737227e-05, "loss": 0.6408, "step": 16512 }, { "epoch": 0.4821173104434907, "grad_norm": 0.5302637302794688, "learning_rate": 2.8772100567721006e-05, "loss": 0.6402, "step": 16513 }, { "epoch": 0.48214650667133807, "grad_norm": 0.5322208952124455, "learning_rate": 2.8770478507704784e-05, "loss": 0.6094, "step": 16514 }, { "epoch": 0.48217570289918543, "grad_norm": 0.48585429980528555, "learning_rate": 2.8768856447688562e-05, "loss": 0.5995, "step": 16515 }, { "epoch": 0.4822048991270328, "grad_norm": 0.4852489614598468, "learning_rate": 2.8767234387672344e-05, "loss": 0.5353, "step": 16516 }, { "epoch": 0.48223409535488015, "grad_norm": 0.5038651584963705, "learning_rate": 2.8765612327656126e-05, "loss": 0.5666, "step": 16517 }, { "epoch": 0.4822632915827275, "grad_norm": 0.5074614484275443, "learning_rate": 2.8763990267639908e-05, "loss": 0.5808, "step": 16518 }, { "epoch": 0.4822924878105749, "grad_norm": 0.4796607451272135, "learning_rate": 2.8762368207623686e-05, "loss": 0.5605, "step": 16519 }, { "epoch": 0.48232168403842224, "grad_norm": 0.4920516050301641, "learning_rate": 2.8760746147607464e-05, "loss": 0.5126, "step": 16520 }, { "epoch": 0.4823508802662696, "grad_norm": 0.4896989774272259, "learning_rate": 2.8759124087591243e-05, "loss": 0.5585, "step": 16521 }, { "epoch": 0.48238007649411696, "grad_norm": 0.47867378825244855, "learning_rate": 2.875750202757502e-05, "loss": 0.5214, "step": 16522 }, { "epoch": 0.4824092727219643, "grad_norm": 0.5240199405081961, "learning_rate": 2.8755879967558803e-05, "loss": 0.6539, "step": 16523 }, { "epoch": 0.4824384689498117, "grad_norm": 0.5321514316538298, "learning_rate": 2.875425790754258e-05, "loss": 0.6444, "step": 16524 }, { "epoch": 0.48246766517765904, "grad_norm": 0.5215194476831576, "learning_rate": 2.875263584752636e-05, "loss": 0.5983, "step": 16525 }, { "epoch": 0.4824968614055064, "grad_norm": 0.507176304633781, "learning_rate": 2.8751013787510138e-05, "loss": 0.5984, "step": 16526 }, { "epoch": 0.48252605763335377, "grad_norm": 0.47416137870436487, "learning_rate": 2.874939172749392e-05, "loss": 0.5208, "step": 16527 }, { "epoch": 0.4825552538612011, "grad_norm": 0.5484629531578472, "learning_rate": 2.8747769667477698e-05, "loss": 0.6594, "step": 16528 }, { "epoch": 0.4825844500890485, "grad_norm": 0.5011220592794198, "learning_rate": 2.8746147607461477e-05, "loss": 0.5691, "step": 16529 }, { "epoch": 0.48261364631689585, "grad_norm": 0.48906860157109716, "learning_rate": 2.8744525547445255e-05, "loss": 0.5436, "step": 16530 }, { "epoch": 0.4826428425447432, "grad_norm": 0.5185235332592181, "learning_rate": 2.8742903487429033e-05, "loss": 0.6097, "step": 16531 }, { "epoch": 0.4826720387725906, "grad_norm": 0.5273052411260634, "learning_rate": 2.8741281427412815e-05, "loss": 0.6494, "step": 16532 }, { "epoch": 0.48270123500043793, "grad_norm": 0.5478738014855585, "learning_rate": 2.8739659367396594e-05, "loss": 0.6736, "step": 16533 }, { "epoch": 0.4827304312282853, "grad_norm": 0.5280527459959325, "learning_rate": 2.8738037307380372e-05, "loss": 0.6181, "step": 16534 }, { "epoch": 0.48275962745613266, "grad_norm": 0.4929918142451629, "learning_rate": 2.873641524736415e-05, "loss": 0.5753, "step": 16535 }, { "epoch": 0.48278882368398, "grad_norm": 0.5181310864414124, "learning_rate": 2.8734793187347935e-05, "loss": 0.5457, "step": 16536 }, { "epoch": 0.4828180199118274, "grad_norm": 0.5546049670809046, "learning_rate": 2.8733171127331714e-05, "loss": 0.6972, "step": 16537 }, { "epoch": 0.48284721613967474, "grad_norm": 0.5034345273059916, "learning_rate": 2.8731549067315496e-05, "loss": 0.6116, "step": 16538 }, { "epoch": 0.4828764123675221, "grad_norm": 0.5798000621488181, "learning_rate": 2.8729927007299274e-05, "loss": 0.7103, "step": 16539 }, { "epoch": 0.48290560859536946, "grad_norm": 0.5248387499553774, "learning_rate": 2.8728304947283052e-05, "loss": 0.6233, "step": 16540 }, { "epoch": 0.4829348048232168, "grad_norm": 0.5250574243215865, "learning_rate": 2.872668288726683e-05, "loss": 0.6239, "step": 16541 }, { "epoch": 0.4829640010510642, "grad_norm": 0.49869236479353446, "learning_rate": 2.872506082725061e-05, "loss": 0.5965, "step": 16542 }, { "epoch": 0.48299319727891155, "grad_norm": 0.5638853451370003, "learning_rate": 2.872343876723439e-05, "loss": 0.6817, "step": 16543 }, { "epoch": 0.4830223935067589, "grad_norm": 0.567499012020362, "learning_rate": 2.872181670721817e-05, "loss": 0.6652, "step": 16544 }, { "epoch": 0.48305158973460627, "grad_norm": 0.5433222225777953, "learning_rate": 2.8720194647201948e-05, "loss": 0.6736, "step": 16545 }, { "epoch": 0.48308078596245363, "grad_norm": 0.5240557276251142, "learning_rate": 2.8718572587185726e-05, "loss": 0.592, "step": 16546 }, { "epoch": 0.483109982190301, "grad_norm": 0.48550158009733363, "learning_rate": 2.8716950527169508e-05, "loss": 0.5487, "step": 16547 }, { "epoch": 0.48313917841814835, "grad_norm": 0.5016292109596442, "learning_rate": 2.8715328467153286e-05, "loss": 0.5706, "step": 16548 }, { "epoch": 0.4831683746459957, "grad_norm": 0.4949572711454232, "learning_rate": 2.8713706407137064e-05, "loss": 0.5622, "step": 16549 }, { "epoch": 0.4831975708738431, "grad_norm": 0.5432978266350429, "learning_rate": 2.8712084347120843e-05, "loss": 0.6544, "step": 16550 }, { "epoch": 0.48322676710169044, "grad_norm": 0.5507087772252939, "learning_rate": 2.871046228710462e-05, "loss": 0.6984, "step": 16551 }, { "epoch": 0.4832559633295378, "grad_norm": 0.5178116156666533, "learning_rate": 2.8708840227088403e-05, "loss": 0.5801, "step": 16552 }, { "epoch": 0.4832851595573852, "grad_norm": 0.5162253264974368, "learning_rate": 2.870721816707218e-05, "loss": 0.6072, "step": 16553 }, { "epoch": 0.4833143557852326, "grad_norm": 0.5605348640064065, "learning_rate": 2.870559610705596e-05, "loss": 0.6744, "step": 16554 }, { "epoch": 0.48334355201307994, "grad_norm": 0.5148023967590777, "learning_rate": 2.8703974047039745e-05, "loss": 0.6485, "step": 16555 }, { "epoch": 0.4833727482409273, "grad_norm": 0.5094552232473353, "learning_rate": 2.8702351987023523e-05, "loss": 0.6066, "step": 16556 }, { "epoch": 0.48340194446877466, "grad_norm": 0.4798269807332473, "learning_rate": 2.87007299270073e-05, "loss": 0.5839, "step": 16557 }, { "epoch": 0.483431140696622, "grad_norm": 0.5130916761038802, "learning_rate": 2.8699107866991083e-05, "loss": 0.6251, "step": 16558 }, { "epoch": 0.4834603369244694, "grad_norm": 0.46455497800476314, "learning_rate": 2.8697485806974862e-05, "loss": 0.5015, "step": 16559 }, { "epoch": 0.48348953315231674, "grad_norm": 0.5770701255540189, "learning_rate": 2.869586374695864e-05, "loss": 0.6957, "step": 16560 }, { "epoch": 0.4835187293801641, "grad_norm": 0.5574796099641127, "learning_rate": 2.869424168694242e-05, "loss": 0.6522, "step": 16561 }, { "epoch": 0.48354792560801146, "grad_norm": 0.5478487300213974, "learning_rate": 2.8692619626926197e-05, "loss": 0.5954, "step": 16562 }, { "epoch": 0.4835771218358588, "grad_norm": 0.536469512839249, "learning_rate": 2.869099756690998e-05, "loss": 0.6234, "step": 16563 }, { "epoch": 0.4836063180637062, "grad_norm": 0.514154797670638, "learning_rate": 2.8689375506893757e-05, "loss": 0.6451, "step": 16564 }, { "epoch": 0.48363551429155355, "grad_norm": 0.564808320921826, "learning_rate": 2.8687753446877535e-05, "loss": 0.6551, "step": 16565 }, { "epoch": 0.4836647105194009, "grad_norm": 0.5465629163870603, "learning_rate": 2.8686131386861314e-05, "loss": 0.6478, "step": 16566 }, { "epoch": 0.48369390674724827, "grad_norm": 0.5131980661040506, "learning_rate": 2.8684509326845092e-05, "loss": 0.6451, "step": 16567 }, { "epoch": 0.48372310297509563, "grad_norm": 0.4914583622076551, "learning_rate": 2.8682887266828874e-05, "loss": 0.5314, "step": 16568 }, { "epoch": 0.483752299202943, "grad_norm": 0.5817985530572841, "learning_rate": 2.8681265206812652e-05, "loss": 0.7175, "step": 16569 }, { "epoch": 0.48378149543079035, "grad_norm": 0.5218436651315911, "learning_rate": 2.867964314679643e-05, "loss": 0.5936, "step": 16570 }, { "epoch": 0.4838106916586377, "grad_norm": 0.48247257490457324, "learning_rate": 2.867802108678021e-05, "loss": 0.5499, "step": 16571 }, { "epoch": 0.4838398878864851, "grad_norm": 0.5945058535457977, "learning_rate": 2.867639902676399e-05, "loss": 0.7359, "step": 16572 }, { "epoch": 0.48386908411433244, "grad_norm": 0.5612359275066312, "learning_rate": 2.867477696674777e-05, "loss": 0.6471, "step": 16573 }, { "epoch": 0.4838982803421798, "grad_norm": 0.5484848210711849, "learning_rate": 2.8673154906731554e-05, "loss": 0.6828, "step": 16574 }, { "epoch": 0.48392747657002716, "grad_norm": 0.5036858092329101, "learning_rate": 2.8671532846715333e-05, "loss": 0.5755, "step": 16575 }, { "epoch": 0.4839566727978745, "grad_norm": 0.5150454258632761, "learning_rate": 2.866991078669911e-05, "loss": 0.5843, "step": 16576 }, { "epoch": 0.4839858690257219, "grad_norm": 0.5366138601228735, "learning_rate": 2.866828872668289e-05, "loss": 0.658, "step": 16577 }, { "epoch": 0.48401506525356924, "grad_norm": 0.5470754718939667, "learning_rate": 2.8666666666666668e-05, "loss": 0.634, "step": 16578 }, { "epoch": 0.4840442614814166, "grad_norm": 0.5078650137240809, "learning_rate": 2.866504460665045e-05, "loss": 0.5835, "step": 16579 }, { "epoch": 0.48407345770926397, "grad_norm": 0.5655112740686478, "learning_rate": 2.8663422546634228e-05, "loss": 0.7041, "step": 16580 }, { "epoch": 0.4841026539371113, "grad_norm": 0.5530477231820349, "learning_rate": 2.8661800486618006e-05, "loss": 0.7128, "step": 16581 }, { "epoch": 0.4841318501649587, "grad_norm": 0.584520763622783, "learning_rate": 2.8660178426601785e-05, "loss": 0.6842, "step": 16582 }, { "epoch": 0.48416104639280605, "grad_norm": 0.5308249851572149, "learning_rate": 2.8658556366585566e-05, "loss": 0.6547, "step": 16583 }, { "epoch": 0.4841902426206534, "grad_norm": 0.5391293645156433, "learning_rate": 2.8656934306569345e-05, "loss": 0.6417, "step": 16584 }, { "epoch": 0.48421943884850077, "grad_norm": 0.5368395161080746, "learning_rate": 2.8655312246553123e-05, "loss": 0.6617, "step": 16585 }, { "epoch": 0.48424863507634813, "grad_norm": 0.5533466172480347, "learning_rate": 2.86536901865369e-05, "loss": 0.67, "step": 16586 }, { "epoch": 0.4842778313041955, "grad_norm": 0.5306254738061954, "learning_rate": 2.865206812652068e-05, "loss": 0.6419, "step": 16587 }, { "epoch": 0.48430702753204286, "grad_norm": 0.5055868372968596, "learning_rate": 2.865044606650446e-05, "loss": 0.5559, "step": 16588 }, { "epoch": 0.4843362237598902, "grad_norm": 0.5742251179989832, "learning_rate": 2.864882400648824e-05, "loss": 0.7159, "step": 16589 }, { "epoch": 0.4843654199877376, "grad_norm": 0.522607314002669, "learning_rate": 2.864720194647202e-05, "loss": 0.6195, "step": 16590 }, { "epoch": 0.48439461621558494, "grad_norm": 0.4975869207588831, "learning_rate": 2.8645579886455797e-05, "loss": 0.5881, "step": 16591 }, { "epoch": 0.4844238124434323, "grad_norm": 0.5125498757735704, "learning_rate": 2.864395782643958e-05, "loss": 0.6072, "step": 16592 }, { "epoch": 0.48445300867127966, "grad_norm": 0.5658149814726331, "learning_rate": 2.864233576642336e-05, "loss": 0.7244, "step": 16593 }, { "epoch": 0.484482204899127, "grad_norm": 0.4781778557210572, "learning_rate": 2.8640713706407142e-05, "loss": 0.5598, "step": 16594 }, { "epoch": 0.4845114011269744, "grad_norm": 0.5452585785176101, "learning_rate": 2.863909164639092e-05, "loss": 0.6374, "step": 16595 }, { "epoch": 0.48454059735482174, "grad_norm": 0.4924134890782518, "learning_rate": 2.86374695863747e-05, "loss": 0.5477, "step": 16596 }, { "epoch": 0.4845697935826691, "grad_norm": 0.5156972411543829, "learning_rate": 2.8635847526358477e-05, "loss": 0.5859, "step": 16597 }, { "epoch": 0.48459898981051647, "grad_norm": 0.5366857027110722, "learning_rate": 2.8634225466342256e-05, "loss": 0.6219, "step": 16598 }, { "epoch": 0.48462818603836383, "grad_norm": 0.5266692310368776, "learning_rate": 2.8632603406326037e-05, "loss": 0.6133, "step": 16599 }, { "epoch": 0.4846573822662112, "grad_norm": 0.5372975412659116, "learning_rate": 2.8630981346309816e-05, "loss": 0.6518, "step": 16600 }, { "epoch": 0.48468657849405855, "grad_norm": 0.5099265083549627, "learning_rate": 2.8629359286293594e-05, "loss": 0.5625, "step": 16601 }, { "epoch": 0.4847157747219059, "grad_norm": 0.5091020967074342, "learning_rate": 2.8627737226277372e-05, "loss": 0.5444, "step": 16602 }, { "epoch": 0.4847449709497533, "grad_norm": 0.5431855606929497, "learning_rate": 2.8626115166261154e-05, "loss": 0.625, "step": 16603 }, { "epoch": 0.48477416717760063, "grad_norm": 0.5071296052390836, "learning_rate": 2.8624493106244933e-05, "loss": 0.5655, "step": 16604 }, { "epoch": 0.484803363405448, "grad_norm": 0.5527705101892239, "learning_rate": 2.862287104622871e-05, "loss": 0.6991, "step": 16605 }, { "epoch": 0.48483255963329536, "grad_norm": 0.5443193752945412, "learning_rate": 2.862124898621249e-05, "loss": 0.6025, "step": 16606 }, { "epoch": 0.4848617558611427, "grad_norm": 0.5287475700613904, "learning_rate": 2.8619626926196268e-05, "loss": 0.5808, "step": 16607 }, { "epoch": 0.4848909520889901, "grad_norm": 0.5049484649051162, "learning_rate": 2.861800486618005e-05, "loss": 0.5605, "step": 16608 }, { "epoch": 0.48492014831683744, "grad_norm": 0.5553485235778799, "learning_rate": 2.8616382806163828e-05, "loss": 0.6179, "step": 16609 }, { "epoch": 0.4849493445446848, "grad_norm": 0.523073454602352, "learning_rate": 2.8614760746147606e-05, "loss": 0.6462, "step": 16610 }, { "epoch": 0.48497854077253216, "grad_norm": 0.5171544853743214, "learning_rate": 2.8613138686131385e-05, "loss": 0.5923, "step": 16611 }, { "epoch": 0.4850077370003795, "grad_norm": 0.5141379131087809, "learning_rate": 2.861151662611517e-05, "loss": 0.6056, "step": 16612 }, { "epoch": 0.48503693322822694, "grad_norm": 0.5030930193200328, "learning_rate": 2.8609894566098948e-05, "loss": 0.5732, "step": 16613 }, { "epoch": 0.4850661294560743, "grad_norm": 0.4780751351318983, "learning_rate": 2.860827250608273e-05, "loss": 0.5167, "step": 16614 }, { "epoch": 0.48509532568392166, "grad_norm": 0.5489210374689159, "learning_rate": 2.8606650446066508e-05, "loss": 0.5982, "step": 16615 }, { "epoch": 0.485124521911769, "grad_norm": 0.7388561404286953, "learning_rate": 2.8605028386050287e-05, "loss": 0.5252, "step": 16616 }, { "epoch": 0.4851537181396164, "grad_norm": 0.5421790049601325, "learning_rate": 2.8603406326034065e-05, "loss": 0.6347, "step": 16617 }, { "epoch": 0.48518291436746375, "grad_norm": 0.4941438037612601, "learning_rate": 2.8601784266017843e-05, "loss": 0.5744, "step": 16618 }, { "epoch": 0.4852121105953111, "grad_norm": 0.496713642737197, "learning_rate": 2.8600162206001625e-05, "loss": 0.5329, "step": 16619 }, { "epoch": 0.48524130682315847, "grad_norm": 0.5114116134920172, "learning_rate": 2.8598540145985404e-05, "loss": 0.6274, "step": 16620 }, { "epoch": 0.48527050305100583, "grad_norm": 0.5917396212729161, "learning_rate": 2.8596918085969182e-05, "loss": 0.5827, "step": 16621 }, { "epoch": 0.4852996992788532, "grad_norm": 0.4805162455552059, "learning_rate": 2.859529602595296e-05, "loss": 0.5427, "step": 16622 }, { "epoch": 0.48532889550670055, "grad_norm": 0.5030288658090902, "learning_rate": 2.859367396593674e-05, "loss": 0.601, "step": 16623 }, { "epoch": 0.4853580917345479, "grad_norm": 0.5684803824614032, "learning_rate": 2.859205190592052e-05, "loss": 0.5945, "step": 16624 }, { "epoch": 0.4853872879623953, "grad_norm": 0.5547164359180973, "learning_rate": 2.85904298459043e-05, "loss": 0.668, "step": 16625 }, { "epoch": 0.48541648419024264, "grad_norm": 0.4942120259499705, "learning_rate": 2.8588807785888077e-05, "loss": 0.5499, "step": 16626 }, { "epoch": 0.48544568041809, "grad_norm": 0.5279078911388603, "learning_rate": 2.8587185725871856e-05, "loss": 0.618, "step": 16627 }, { "epoch": 0.48547487664593736, "grad_norm": 0.5426711280262575, "learning_rate": 2.8585563665855637e-05, "loss": 0.6493, "step": 16628 }, { "epoch": 0.4855040728737847, "grad_norm": 0.5379439910331034, "learning_rate": 2.8583941605839416e-05, "loss": 0.6598, "step": 16629 }, { "epoch": 0.4855332691016321, "grad_norm": 0.48856870030243443, "learning_rate": 2.85823195458232e-05, "loss": 0.5702, "step": 16630 }, { "epoch": 0.48556246532947944, "grad_norm": 0.47451545703433823, "learning_rate": 2.858069748580698e-05, "loss": 0.516, "step": 16631 }, { "epoch": 0.4855916615573268, "grad_norm": 0.5495400847480453, "learning_rate": 2.8579075425790758e-05, "loss": 0.6723, "step": 16632 }, { "epoch": 0.48562085778517416, "grad_norm": 0.5221644292052138, "learning_rate": 2.8577453365774536e-05, "loss": 0.6531, "step": 16633 }, { "epoch": 0.4856500540130215, "grad_norm": 0.5131702190888193, "learning_rate": 2.8575831305758318e-05, "loss": 0.6012, "step": 16634 }, { "epoch": 0.4856792502408689, "grad_norm": 0.5585459231669947, "learning_rate": 2.8574209245742096e-05, "loss": 0.7001, "step": 16635 }, { "epoch": 0.48570844646871625, "grad_norm": 0.5719604630068787, "learning_rate": 2.8572587185725874e-05, "loss": 0.7208, "step": 16636 }, { "epoch": 0.4857376426965636, "grad_norm": 0.5129329011157012, "learning_rate": 2.8570965125709653e-05, "loss": 0.6015, "step": 16637 }, { "epoch": 0.48576683892441097, "grad_norm": 0.5173549523219932, "learning_rate": 2.856934306569343e-05, "loss": 0.5948, "step": 16638 }, { "epoch": 0.48579603515225833, "grad_norm": 0.5149151886099196, "learning_rate": 2.8567721005677213e-05, "loss": 0.6112, "step": 16639 }, { "epoch": 0.4858252313801057, "grad_norm": 0.5005822010015252, "learning_rate": 2.856609894566099e-05, "loss": 0.5811, "step": 16640 }, { "epoch": 0.48585442760795305, "grad_norm": 0.5228942197919207, "learning_rate": 2.856447688564477e-05, "loss": 0.601, "step": 16641 }, { "epoch": 0.4858836238358004, "grad_norm": 0.5375660580866224, "learning_rate": 2.8562854825628548e-05, "loss": 0.6543, "step": 16642 }, { "epoch": 0.4859128200636478, "grad_norm": 0.5422286837802873, "learning_rate": 2.8561232765612326e-05, "loss": 0.6944, "step": 16643 }, { "epoch": 0.48594201629149514, "grad_norm": 0.5135492732092031, "learning_rate": 2.8559610705596108e-05, "loss": 0.5061, "step": 16644 }, { "epoch": 0.4859712125193425, "grad_norm": 0.5481009100353352, "learning_rate": 2.8557988645579887e-05, "loss": 0.6721, "step": 16645 }, { "epoch": 0.48600040874718986, "grad_norm": 0.5488790192963935, "learning_rate": 2.8556366585563665e-05, "loss": 0.6358, "step": 16646 }, { "epoch": 0.4860296049750372, "grad_norm": 0.5204821707708903, "learning_rate": 2.8554744525547443e-05, "loss": 0.6069, "step": 16647 }, { "epoch": 0.4860588012028846, "grad_norm": 0.5277789164095015, "learning_rate": 2.8553122465531225e-05, "loss": 0.6065, "step": 16648 }, { "epoch": 0.48608799743073194, "grad_norm": 0.5434151417659867, "learning_rate": 2.8551500405515007e-05, "loss": 0.6664, "step": 16649 }, { "epoch": 0.4861171936585793, "grad_norm": 0.5326170062768641, "learning_rate": 2.854987834549879e-05, "loss": 0.6374, "step": 16650 }, { "epoch": 0.48614638988642667, "grad_norm": 0.5123821930663621, "learning_rate": 2.8548256285482567e-05, "loss": 0.6091, "step": 16651 }, { "epoch": 0.486175586114274, "grad_norm": 0.5184928852173978, "learning_rate": 2.8546634225466345e-05, "loss": 0.6046, "step": 16652 }, { "epoch": 0.4862047823421214, "grad_norm": 0.5156135721418216, "learning_rate": 2.8545012165450124e-05, "loss": 0.5661, "step": 16653 }, { "epoch": 0.48623397856996875, "grad_norm": 0.507787533041271, "learning_rate": 2.8543390105433902e-05, "loss": 0.5937, "step": 16654 }, { "epoch": 0.4862631747978161, "grad_norm": 0.5009589795944799, "learning_rate": 2.8541768045417684e-05, "loss": 0.5415, "step": 16655 }, { "epoch": 0.4862923710256635, "grad_norm": 0.5772016826035206, "learning_rate": 2.8540145985401462e-05, "loss": 0.734, "step": 16656 }, { "epoch": 0.48632156725351083, "grad_norm": 0.5326557125837161, "learning_rate": 2.853852392538524e-05, "loss": 0.5653, "step": 16657 }, { "epoch": 0.4863507634813582, "grad_norm": 0.5035535452190409, "learning_rate": 2.853690186536902e-05, "loss": 0.5715, "step": 16658 }, { "epoch": 0.48637995970920556, "grad_norm": 0.5716990618175567, "learning_rate": 2.85352798053528e-05, "loss": 0.6217, "step": 16659 }, { "epoch": 0.4864091559370529, "grad_norm": 0.485783434377367, "learning_rate": 2.853365774533658e-05, "loss": 0.5134, "step": 16660 }, { "epoch": 0.4864383521649003, "grad_norm": 0.49539552162056005, "learning_rate": 2.8532035685320358e-05, "loss": 0.5666, "step": 16661 }, { "epoch": 0.48646754839274764, "grad_norm": 0.5526929809947666, "learning_rate": 2.8530413625304136e-05, "loss": 0.6809, "step": 16662 }, { "epoch": 0.486496744620595, "grad_norm": 0.48638519158904847, "learning_rate": 2.8528791565287914e-05, "loss": 0.5227, "step": 16663 }, { "epoch": 0.48652594084844236, "grad_norm": 0.5222752035000596, "learning_rate": 2.8527169505271696e-05, "loss": 0.5867, "step": 16664 }, { "epoch": 0.4865551370762897, "grad_norm": 0.5454817096396939, "learning_rate": 2.8525547445255474e-05, "loss": 0.6699, "step": 16665 }, { "epoch": 0.4865843333041371, "grad_norm": 0.524998412902013, "learning_rate": 2.8523925385239253e-05, "loss": 0.6229, "step": 16666 }, { "epoch": 0.48661352953198445, "grad_norm": 0.5505674125090191, "learning_rate": 2.852230332522303e-05, "loss": 0.6736, "step": 16667 }, { "epoch": 0.4866427257598318, "grad_norm": 0.5549995537827948, "learning_rate": 2.8520681265206816e-05, "loss": 0.6748, "step": 16668 }, { "epoch": 0.48667192198767917, "grad_norm": 0.49643513295800606, "learning_rate": 2.8519059205190595e-05, "loss": 0.5619, "step": 16669 }, { "epoch": 0.48670111821552653, "grad_norm": 0.5913254801606641, "learning_rate": 2.8517437145174376e-05, "loss": 0.6935, "step": 16670 }, { "epoch": 0.4867303144433739, "grad_norm": 0.5655172984397898, "learning_rate": 2.8515815085158155e-05, "loss": 0.6557, "step": 16671 }, { "epoch": 0.48675951067122125, "grad_norm": 0.49857765197511084, "learning_rate": 2.8514193025141933e-05, "loss": 0.564, "step": 16672 }, { "epoch": 0.4867887068990686, "grad_norm": 0.518997366567924, "learning_rate": 2.851257096512571e-05, "loss": 0.5798, "step": 16673 }, { "epoch": 0.48681790312691603, "grad_norm": 0.5178631603364018, "learning_rate": 2.851094890510949e-05, "loss": 0.624, "step": 16674 }, { "epoch": 0.4868470993547634, "grad_norm": 0.4943113044852568, "learning_rate": 2.850932684509327e-05, "loss": 0.6075, "step": 16675 }, { "epoch": 0.48687629558261075, "grad_norm": 0.5191261047565365, "learning_rate": 2.850770478507705e-05, "loss": 0.6068, "step": 16676 }, { "epoch": 0.4869054918104581, "grad_norm": 0.5293444167144473, "learning_rate": 2.850608272506083e-05, "loss": 0.6109, "step": 16677 }, { "epoch": 0.4869346880383055, "grad_norm": 0.5356432536567058, "learning_rate": 2.8504460665044607e-05, "loss": 0.6349, "step": 16678 }, { "epoch": 0.48696388426615284, "grad_norm": 0.5363071232609486, "learning_rate": 2.850283860502839e-05, "loss": 0.6932, "step": 16679 }, { "epoch": 0.4869930804940002, "grad_norm": 0.5420432321707123, "learning_rate": 2.8501216545012167e-05, "loss": 0.6017, "step": 16680 }, { "epoch": 0.48702227672184756, "grad_norm": 0.5227132150546324, "learning_rate": 2.8499594484995945e-05, "loss": 0.6318, "step": 16681 }, { "epoch": 0.4870514729496949, "grad_norm": 0.5108817998789341, "learning_rate": 2.8497972424979724e-05, "loss": 0.5607, "step": 16682 }, { "epoch": 0.4870806691775423, "grad_norm": 0.5984104311877911, "learning_rate": 2.8496350364963502e-05, "loss": 0.6605, "step": 16683 }, { "epoch": 0.48710986540538964, "grad_norm": 0.521771328476158, "learning_rate": 2.8494728304947284e-05, "loss": 0.5981, "step": 16684 }, { "epoch": 0.487139061633237, "grad_norm": 0.5187880030295662, "learning_rate": 2.8493106244931062e-05, "loss": 0.5676, "step": 16685 }, { "epoch": 0.48716825786108436, "grad_norm": 0.5398455601523013, "learning_rate": 2.849148418491484e-05, "loss": 0.63, "step": 16686 }, { "epoch": 0.4871974540889317, "grad_norm": 0.5794485483077555, "learning_rate": 2.8489862124898626e-05, "loss": 0.74, "step": 16687 }, { "epoch": 0.4872266503167791, "grad_norm": 0.5204150496821025, "learning_rate": 2.8488240064882404e-05, "loss": 0.5597, "step": 16688 }, { "epoch": 0.48725584654462645, "grad_norm": 0.5357826122389032, "learning_rate": 2.8486618004866182e-05, "loss": 0.6469, "step": 16689 }, { "epoch": 0.4872850427724738, "grad_norm": 0.5234324728854671, "learning_rate": 2.8484995944849964e-05, "loss": 0.6173, "step": 16690 }, { "epoch": 0.48731423900032117, "grad_norm": 0.5333480931175936, "learning_rate": 2.8483373884833743e-05, "loss": 0.6395, "step": 16691 }, { "epoch": 0.48734343522816853, "grad_norm": 0.5165901092097515, "learning_rate": 2.848175182481752e-05, "loss": 0.6402, "step": 16692 }, { "epoch": 0.4873726314560159, "grad_norm": 0.5496671737631484, "learning_rate": 2.84801297648013e-05, "loss": 0.6617, "step": 16693 }, { "epoch": 0.48740182768386325, "grad_norm": 0.5106668808128966, "learning_rate": 2.8478507704785078e-05, "loss": 0.5394, "step": 16694 }, { "epoch": 0.4874310239117106, "grad_norm": 0.533531919103041, "learning_rate": 2.847688564476886e-05, "loss": 0.6032, "step": 16695 }, { "epoch": 0.487460220139558, "grad_norm": 0.4582105070645569, "learning_rate": 2.8475263584752638e-05, "loss": 0.4746, "step": 16696 }, { "epoch": 0.48748941636740534, "grad_norm": 0.5276065594751704, "learning_rate": 2.8473641524736416e-05, "loss": 0.6019, "step": 16697 }, { "epoch": 0.4875186125952527, "grad_norm": 0.5509244853098506, "learning_rate": 2.8472019464720195e-05, "loss": 0.6998, "step": 16698 }, { "epoch": 0.48754780882310006, "grad_norm": 0.5401870570637728, "learning_rate": 2.8470397404703973e-05, "loss": 0.6641, "step": 16699 }, { "epoch": 0.4875770050509474, "grad_norm": 0.5420176686979256, "learning_rate": 2.8468775344687755e-05, "loss": 0.6027, "step": 16700 }, { "epoch": 0.4876062012787948, "grad_norm": 0.47684460436408843, "learning_rate": 2.8467153284671533e-05, "loss": 0.5167, "step": 16701 }, { "epoch": 0.48763539750664214, "grad_norm": 0.5438792250291721, "learning_rate": 2.846553122465531e-05, "loss": 0.6081, "step": 16702 }, { "epoch": 0.4876645937344895, "grad_norm": 8.748450659238555, "learning_rate": 2.846390916463909e-05, "loss": 0.6636, "step": 16703 }, { "epoch": 0.48769378996233687, "grad_norm": 0.5316059265844825, "learning_rate": 2.846228710462287e-05, "loss": 0.6465, "step": 16704 }, { "epoch": 0.4877229861901842, "grad_norm": 0.5098079421110489, "learning_rate": 2.846066504460665e-05, "loss": 0.5681, "step": 16705 }, { "epoch": 0.4877521824180316, "grad_norm": 0.5286624142793274, "learning_rate": 2.8459042984590435e-05, "loss": 0.6032, "step": 16706 }, { "epoch": 0.48778137864587895, "grad_norm": 0.511672880198064, "learning_rate": 2.8457420924574214e-05, "loss": 0.6381, "step": 16707 }, { "epoch": 0.4878105748737263, "grad_norm": 0.48882021854666036, "learning_rate": 2.8455798864557992e-05, "loss": 0.5264, "step": 16708 }, { "epoch": 0.48783977110157367, "grad_norm": 0.48566640164532165, "learning_rate": 2.845417680454177e-05, "loss": 0.5269, "step": 16709 }, { "epoch": 0.48786896732942103, "grad_norm": 0.5640688378210769, "learning_rate": 2.845255474452555e-05, "loss": 0.544, "step": 16710 }, { "epoch": 0.4878981635572684, "grad_norm": 0.5130061082759877, "learning_rate": 2.845093268450933e-05, "loss": 0.6229, "step": 16711 }, { "epoch": 0.48792735978511576, "grad_norm": 0.4845037482726214, "learning_rate": 2.844931062449311e-05, "loss": 0.5384, "step": 16712 }, { "epoch": 0.4879565560129631, "grad_norm": 0.575440237670637, "learning_rate": 2.8447688564476887e-05, "loss": 0.6584, "step": 16713 }, { "epoch": 0.4879857522408105, "grad_norm": 0.472529072461852, "learning_rate": 2.8446066504460666e-05, "loss": 0.5132, "step": 16714 }, { "epoch": 0.48801494846865784, "grad_norm": 0.58009212251572, "learning_rate": 2.8444444444444447e-05, "loss": 0.7468, "step": 16715 }, { "epoch": 0.4880441446965052, "grad_norm": 0.45328910327049865, "learning_rate": 2.8442822384428226e-05, "loss": 0.5061, "step": 16716 }, { "epoch": 0.48807334092435256, "grad_norm": 0.555457017939686, "learning_rate": 2.8441200324412004e-05, "loss": 0.5828, "step": 16717 }, { "epoch": 0.4881025371521999, "grad_norm": 0.5134312187500677, "learning_rate": 2.8439578264395782e-05, "loss": 0.6059, "step": 16718 }, { "epoch": 0.4881317333800473, "grad_norm": 0.501307053196732, "learning_rate": 2.843795620437956e-05, "loss": 0.553, "step": 16719 }, { "epoch": 0.48816092960789464, "grad_norm": 0.5494661824701504, "learning_rate": 2.8436334144363343e-05, "loss": 0.6031, "step": 16720 }, { "epoch": 0.488190125835742, "grad_norm": 0.46334947990020714, "learning_rate": 2.843471208434712e-05, "loss": 0.5367, "step": 16721 }, { "epoch": 0.48821932206358937, "grad_norm": 0.5040125393976557, "learning_rate": 2.84330900243309e-05, "loss": 0.5712, "step": 16722 }, { "epoch": 0.48824851829143673, "grad_norm": 0.5140417585763032, "learning_rate": 2.8431467964314678e-05, "loss": 0.5999, "step": 16723 }, { "epoch": 0.4882777145192841, "grad_norm": 0.46720613829605584, "learning_rate": 2.8429845904298456e-05, "loss": 0.5089, "step": 16724 }, { "epoch": 0.48830691074713145, "grad_norm": 0.5265676397996506, "learning_rate": 2.842822384428224e-05, "loss": 0.6555, "step": 16725 }, { "epoch": 0.4883361069749788, "grad_norm": 0.5138055922993713, "learning_rate": 2.8426601784266023e-05, "loss": 0.6166, "step": 16726 }, { "epoch": 0.4883653032028262, "grad_norm": 0.514466152355157, "learning_rate": 2.84249797242498e-05, "loss": 0.624, "step": 16727 }, { "epoch": 0.48839449943067353, "grad_norm": 0.4790558062999754, "learning_rate": 2.842335766423358e-05, "loss": 0.4773, "step": 16728 }, { "epoch": 0.4884236956585209, "grad_norm": 0.5356569060742139, "learning_rate": 2.8421735604217358e-05, "loss": 0.6683, "step": 16729 }, { "epoch": 0.48845289188636826, "grad_norm": 0.5198420379977723, "learning_rate": 2.8420113544201136e-05, "loss": 0.6184, "step": 16730 }, { "epoch": 0.4884820881142156, "grad_norm": 0.5675948826907629, "learning_rate": 2.8418491484184918e-05, "loss": 0.7252, "step": 16731 }, { "epoch": 0.488511284342063, "grad_norm": 0.5128778015009454, "learning_rate": 2.8416869424168697e-05, "loss": 0.6043, "step": 16732 }, { "epoch": 0.48854048056991034, "grad_norm": 0.5335147475332762, "learning_rate": 2.8415247364152475e-05, "loss": 0.5742, "step": 16733 }, { "epoch": 0.48856967679775776, "grad_norm": 0.506143954637501, "learning_rate": 2.8413625304136253e-05, "loss": 0.6084, "step": 16734 }, { "epoch": 0.4885988730256051, "grad_norm": 0.5264830053510243, "learning_rate": 2.8412003244120035e-05, "loss": 0.6203, "step": 16735 }, { "epoch": 0.4886280692534525, "grad_norm": 0.5639739091055462, "learning_rate": 2.8410381184103813e-05, "loss": 0.6938, "step": 16736 }, { "epoch": 0.48865726548129984, "grad_norm": 0.5822262116485784, "learning_rate": 2.8408759124087592e-05, "loss": 0.7638, "step": 16737 }, { "epoch": 0.4886864617091472, "grad_norm": 0.491959828439598, "learning_rate": 2.840713706407137e-05, "loss": 0.5453, "step": 16738 }, { "epoch": 0.48871565793699456, "grad_norm": 0.5140218462186038, "learning_rate": 2.840551500405515e-05, "loss": 0.576, "step": 16739 }, { "epoch": 0.4887448541648419, "grad_norm": 0.5393692360794399, "learning_rate": 2.840389294403893e-05, "loss": 0.6869, "step": 16740 }, { "epoch": 0.4887740503926893, "grad_norm": 0.5219508130598259, "learning_rate": 2.840227088402271e-05, "loss": 0.6145, "step": 16741 }, { "epoch": 0.48880324662053665, "grad_norm": 0.4933260724733979, "learning_rate": 2.8400648824006487e-05, "loss": 0.5577, "step": 16742 }, { "epoch": 0.488832442848384, "grad_norm": 0.5146443912837382, "learning_rate": 2.8399026763990265e-05, "loss": 0.5843, "step": 16743 }, { "epoch": 0.48886163907623137, "grad_norm": 0.5711478322804344, "learning_rate": 2.839740470397405e-05, "loss": 0.6729, "step": 16744 }, { "epoch": 0.48889083530407873, "grad_norm": 0.523777935919129, "learning_rate": 2.839578264395783e-05, "loss": 0.6366, "step": 16745 }, { "epoch": 0.4889200315319261, "grad_norm": 0.5721116746763166, "learning_rate": 2.839416058394161e-05, "loss": 0.6076, "step": 16746 }, { "epoch": 0.48894922775977345, "grad_norm": 0.5575778827239166, "learning_rate": 2.839253852392539e-05, "loss": 0.6726, "step": 16747 }, { "epoch": 0.4889784239876208, "grad_norm": 0.5645435166142773, "learning_rate": 2.8390916463909168e-05, "loss": 0.7136, "step": 16748 }, { "epoch": 0.4890076202154682, "grad_norm": 0.5202898231054955, "learning_rate": 2.8389294403892946e-05, "loss": 0.6073, "step": 16749 }, { "epoch": 0.48903681644331554, "grad_norm": 0.5376272277488158, "learning_rate": 2.8387672343876724e-05, "loss": 0.6333, "step": 16750 }, { "epoch": 0.4890660126711629, "grad_norm": 0.5220510870925417, "learning_rate": 2.8386050283860506e-05, "loss": 0.5868, "step": 16751 }, { "epoch": 0.48909520889901026, "grad_norm": 0.51669037854089, "learning_rate": 2.8384428223844284e-05, "loss": 0.6198, "step": 16752 }, { "epoch": 0.4891244051268576, "grad_norm": 0.525374698835465, "learning_rate": 2.8382806163828063e-05, "loss": 0.6105, "step": 16753 }, { "epoch": 0.489153601354705, "grad_norm": 0.5271627540956556, "learning_rate": 2.838118410381184e-05, "loss": 0.6473, "step": 16754 }, { "epoch": 0.48918279758255234, "grad_norm": 0.5341918544999307, "learning_rate": 2.837956204379562e-05, "loss": 0.5964, "step": 16755 }, { "epoch": 0.4892119938103997, "grad_norm": 0.5436204186859791, "learning_rate": 2.83779399837794e-05, "loss": 0.6248, "step": 16756 }, { "epoch": 0.48924119003824706, "grad_norm": 0.5589006950330417, "learning_rate": 2.837631792376318e-05, "loss": 0.7035, "step": 16757 }, { "epoch": 0.4892703862660944, "grad_norm": 0.48759032978798306, "learning_rate": 2.8374695863746958e-05, "loss": 0.4943, "step": 16758 }, { "epoch": 0.4892995824939418, "grad_norm": 0.5842407177426524, "learning_rate": 2.8373073803730736e-05, "loss": 0.704, "step": 16759 }, { "epoch": 0.48932877872178915, "grad_norm": 0.5431834845884217, "learning_rate": 2.8371451743714518e-05, "loss": 0.6713, "step": 16760 }, { "epoch": 0.4893579749496365, "grad_norm": 0.5171901042214857, "learning_rate": 2.8369829683698297e-05, "loss": 0.6272, "step": 16761 }, { "epoch": 0.48938717117748387, "grad_norm": 0.5122282979806684, "learning_rate": 2.8368207623682075e-05, "loss": 0.6158, "step": 16762 }, { "epoch": 0.48941636740533123, "grad_norm": 0.5597826836748122, "learning_rate": 2.836658556366586e-05, "loss": 0.665, "step": 16763 }, { "epoch": 0.4894455636331786, "grad_norm": 0.5126243956214227, "learning_rate": 2.836496350364964e-05, "loss": 0.6054, "step": 16764 }, { "epoch": 0.48947475986102595, "grad_norm": 0.5141685421480185, "learning_rate": 2.8363341443633417e-05, "loss": 0.5414, "step": 16765 }, { "epoch": 0.4895039560888733, "grad_norm": 0.5236711326460147, "learning_rate": 2.8361719383617195e-05, "loss": 0.6147, "step": 16766 }, { "epoch": 0.4895331523167207, "grad_norm": 0.4771956903210594, "learning_rate": 2.8360097323600977e-05, "loss": 0.5233, "step": 16767 }, { "epoch": 0.48956234854456804, "grad_norm": 0.49963703808811744, "learning_rate": 2.8358475263584755e-05, "loss": 0.574, "step": 16768 }, { "epoch": 0.4895915447724154, "grad_norm": 0.5243693806532875, "learning_rate": 2.8356853203568534e-05, "loss": 0.6048, "step": 16769 }, { "epoch": 0.48962074100026276, "grad_norm": 0.5216231985444603, "learning_rate": 2.8355231143552312e-05, "loss": 0.6033, "step": 16770 }, { "epoch": 0.4896499372281101, "grad_norm": 0.5615834149582518, "learning_rate": 2.8353609083536094e-05, "loss": 0.5874, "step": 16771 }, { "epoch": 0.4896791334559575, "grad_norm": 0.6467896202772193, "learning_rate": 2.8351987023519872e-05, "loss": 0.6342, "step": 16772 }, { "epoch": 0.48970832968380484, "grad_norm": 0.49568874905223664, "learning_rate": 2.835036496350365e-05, "loss": 0.5818, "step": 16773 }, { "epoch": 0.4897375259116522, "grad_norm": 0.5122283867386089, "learning_rate": 2.834874290348743e-05, "loss": 0.5974, "step": 16774 }, { "epoch": 0.48976672213949957, "grad_norm": 0.5295382322075712, "learning_rate": 2.8347120843471207e-05, "loss": 0.597, "step": 16775 }, { "epoch": 0.4897959183673469, "grad_norm": 0.5379725285279888, "learning_rate": 2.834549878345499e-05, "loss": 0.6171, "step": 16776 }, { "epoch": 0.4898251145951943, "grad_norm": 0.5319941747537389, "learning_rate": 2.8343876723438767e-05, "loss": 0.6636, "step": 16777 }, { "epoch": 0.48985431082304165, "grad_norm": 0.5282281854185513, "learning_rate": 2.8342254663422546e-05, "loss": 0.6081, "step": 16778 }, { "epoch": 0.489883507050889, "grad_norm": 0.573306300562475, "learning_rate": 2.8340632603406324e-05, "loss": 0.6644, "step": 16779 }, { "epoch": 0.4899127032787364, "grad_norm": 0.5426936431415409, "learning_rate": 2.8339010543390106e-05, "loss": 0.6332, "step": 16780 }, { "epoch": 0.48994189950658373, "grad_norm": 0.5678342993986081, "learning_rate": 2.8337388483373888e-05, "loss": 0.7336, "step": 16781 }, { "epoch": 0.4899710957344311, "grad_norm": 0.5014899888340651, "learning_rate": 2.833576642335767e-05, "loss": 0.5538, "step": 16782 }, { "epoch": 0.49000029196227846, "grad_norm": 0.5607612102762071, "learning_rate": 2.8334144363341448e-05, "loss": 0.6324, "step": 16783 }, { "epoch": 0.4900294881901258, "grad_norm": 0.5413370635072536, "learning_rate": 2.8332522303325226e-05, "loss": 0.5991, "step": 16784 }, { "epoch": 0.4900586844179732, "grad_norm": 0.5124720431632221, "learning_rate": 2.8330900243309005e-05, "loss": 0.553, "step": 16785 }, { "epoch": 0.49008788064582054, "grad_norm": 0.5344405463142703, "learning_rate": 2.8329278183292783e-05, "loss": 0.6498, "step": 16786 }, { "epoch": 0.4901170768736679, "grad_norm": 0.5207060454807088, "learning_rate": 2.8327656123276565e-05, "loss": 0.565, "step": 16787 }, { "epoch": 0.49014627310151526, "grad_norm": 0.5050085750922801, "learning_rate": 2.8326034063260343e-05, "loss": 0.5669, "step": 16788 }, { "epoch": 0.4901754693293626, "grad_norm": 0.5014169879507406, "learning_rate": 2.832441200324412e-05, "loss": 0.5977, "step": 16789 }, { "epoch": 0.49020466555721, "grad_norm": 0.531806977363206, "learning_rate": 2.83227899432279e-05, "loss": 0.6458, "step": 16790 }, { "epoch": 0.49023386178505735, "grad_norm": 0.5345180942988956, "learning_rate": 2.832116788321168e-05, "loss": 0.6332, "step": 16791 }, { "epoch": 0.4902630580129047, "grad_norm": 0.5045323739499717, "learning_rate": 2.831954582319546e-05, "loss": 0.5977, "step": 16792 }, { "epoch": 0.49029225424075207, "grad_norm": 0.5542383694181665, "learning_rate": 2.831792376317924e-05, "loss": 0.6705, "step": 16793 }, { "epoch": 0.4903214504685995, "grad_norm": 0.5250973936775312, "learning_rate": 2.8316301703163017e-05, "loss": 0.6067, "step": 16794 }, { "epoch": 0.49035064669644685, "grad_norm": 0.5652504330577464, "learning_rate": 2.8314679643146795e-05, "loss": 0.679, "step": 16795 }, { "epoch": 0.4903798429242942, "grad_norm": 0.5442543704214848, "learning_rate": 2.8313057583130577e-05, "loss": 0.6395, "step": 16796 }, { "epoch": 0.49040903915214157, "grad_norm": 0.5032095964180753, "learning_rate": 2.8311435523114355e-05, "loss": 0.5781, "step": 16797 }, { "epoch": 0.49043823537998893, "grad_norm": 0.48505973784854267, "learning_rate": 2.8309813463098134e-05, "loss": 0.5698, "step": 16798 }, { "epoch": 0.4904674316078363, "grad_norm": 0.5195243640861444, "learning_rate": 2.8308191403081912e-05, "loss": 0.6156, "step": 16799 }, { "epoch": 0.49049662783568365, "grad_norm": 0.5333760015377055, "learning_rate": 2.8306569343065697e-05, "loss": 0.6182, "step": 16800 }, { "epoch": 0.490525824063531, "grad_norm": 0.5096891030377217, "learning_rate": 2.8304947283049476e-05, "loss": 0.625, "step": 16801 }, { "epoch": 0.4905550202913784, "grad_norm": 0.508410766352143, "learning_rate": 2.8303325223033257e-05, "loss": 0.6088, "step": 16802 }, { "epoch": 0.49058421651922574, "grad_norm": 0.4661800407411877, "learning_rate": 2.8301703163017036e-05, "loss": 0.5222, "step": 16803 }, { "epoch": 0.4906134127470731, "grad_norm": 0.5647104269956298, "learning_rate": 2.8300081103000814e-05, "loss": 0.6348, "step": 16804 }, { "epoch": 0.49064260897492046, "grad_norm": 0.5599244270917175, "learning_rate": 2.8298459042984592e-05, "loss": 0.6161, "step": 16805 }, { "epoch": 0.4906718052027678, "grad_norm": 0.5092981697355647, "learning_rate": 2.829683698296837e-05, "loss": 0.6281, "step": 16806 }, { "epoch": 0.4907010014306152, "grad_norm": 0.5491310859719771, "learning_rate": 2.8295214922952153e-05, "loss": 0.6811, "step": 16807 }, { "epoch": 0.49073019765846254, "grad_norm": 0.5537621767426913, "learning_rate": 2.829359286293593e-05, "loss": 0.6017, "step": 16808 }, { "epoch": 0.4907593938863099, "grad_norm": 0.4671063544398384, "learning_rate": 2.829197080291971e-05, "loss": 0.5084, "step": 16809 }, { "epoch": 0.49078859011415726, "grad_norm": 0.5509621646021452, "learning_rate": 2.8290348742903488e-05, "loss": 0.65, "step": 16810 }, { "epoch": 0.4908177863420046, "grad_norm": 0.5066761471613507, "learning_rate": 2.8288726682887266e-05, "loss": 0.5665, "step": 16811 }, { "epoch": 0.490846982569852, "grad_norm": 0.5131254435699827, "learning_rate": 2.8287104622871048e-05, "loss": 0.6094, "step": 16812 }, { "epoch": 0.49087617879769935, "grad_norm": 0.4701152357679535, "learning_rate": 2.8285482562854826e-05, "loss": 0.5071, "step": 16813 }, { "epoch": 0.4909053750255467, "grad_norm": 0.5171617023195543, "learning_rate": 2.8283860502838605e-05, "loss": 0.6163, "step": 16814 }, { "epoch": 0.49093457125339407, "grad_norm": 0.5517069473580641, "learning_rate": 2.8282238442822383e-05, "loss": 0.6529, "step": 16815 }, { "epoch": 0.49096376748124143, "grad_norm": 0.5801444261042934, "learning_rate": 2.8280616382806165e-05, "loss": 0.5617, "step": 16816 }, { "epoch": 0.4909929637090888, "grad_norm": 0.5867247899143148, "learning_rate": 2.8278994322789943e-05, "loss": 0.6414, "step": 16817 }, { "epoch": 0.49102215993693615, "grad_norm": 0.5780008683812381, "learning_rate": 2.827737226277372e-05, "loss": 0.63, "step": 16818 }, { "epoch": 0.4910513561647835, "grad_norm": 0.4972315551177069, "learning_rate": 2.8275750202757507e-05, "loss": 0.5712, "step": 16819 }, { "epoch": 0.4910805523926309, "grad_norm": 0.5185073184838634, "learning_rate": 2.8274128142741285e-05, "loss": 0.6216, "step": 16820 }, { "epoch": 0.49110974862047824, "grad_norm": 0.5378664243363678, "learning_rate": 2.8272506082725063e-05, "loss": 0.6653, "step": 16821 }, { "epoch": 0.4911389448483256, "grad_norm": 0.5392833628459125, "learning_rate": 2.8270884022708842e-05, "loss": 0.6655, "step": 16822 }, { "epoch": 0.49116814107617296, "grad_norm": 0.5497855128340977, "learning_rate": 2.8269261962692623e-05, "loss": 0.6543, "step": 16823 }, { "epoch": 0.4911973373040203, "grad_norm": 0.5623358939589468, "learning_rate": 2.8267639902676402e-05, "loss": 0.688, "step": 16824 }, { "epoch": 0.4912265335318677, "grad_norm": 0.5431358229314751, "learning_rate": 2.826601784266018e-05, "loss": 0.6413, "step": 16825 }, { "epoch": 0.49125572975971504, "grad_norm": 0.55286448007284, "learning_rate": 2.826439578264396e-05, "loss": 0.6708, "step": 16826 }, { "epoch": 0.4912849259875624, "grad_norm": 0.5693356469242628, "learning_rate": 2.826277372262774e-05, "loss": 0.6568, "step": 16827 }, { "epoch": 0.49131412221540977, "grad_norm": 0.5206963817144178, "learning_rate": 2.826115166261152e-05, "loss": 0.6133, "step": 16828 }, { "epoch": 0.4913433184432571, "grad_norm": 0.4985389465746774, "learning_rate": 2.8259529602595297e-05, "loss": 0.6073, "step": 16829 }, { "epoch": 0.4913725146711045, "grad_norm": 0.5118713122953525, "learning_rate": 2.8257907542579075e-05, "loss": 0.598, "step": 16830 }, { "epoch": 0.49140171089895185, "grad_norm": 0.5281105957250842, "learning_rate": 2.8256285482562854e-05, "loss": 0.6014, "step": 16831 }, { "epoch": 0.4914309071267992, "grad_norm": 0.5055069605343043, "learning_rate": 2.8254663422546636e-05, "loss": 0.5935, "step": 16832 }, { "epoch": 0.49146010335464657, "grad_norm": 0.585864623211619, "learning_rate": 2.8253041362530414e-05, "loss": 0.6413, "step": 16833 }, { "epoch": 0.49148929958249393, "grad_norm": 0.536052886804796, "learning_rate": 2.8251419302514192e-05, "loss": 0.6138, "step": 16834 }, { "epoch": 0.4915184958103413, "grad_norm": 0.5027410267275659, "learning_rate": 2.824979724249797e-05, "loss": 0.5736, "step": 16835 }, { "epoch": 0.49154769203818865, "grad_norm": 0.4922491228443207, "learning_rate": 2.8248175182481753e-05, "loss": 0.542, "step": 16836 }, { "epoch": 0.491576888266036, "grad_norm": 0.5544074056353974, "learning_rate": 2.824655312246553e-05, "loss": 0.6368, "step": 16837 }, { "epoch": 0.4916060844938834, "grad_norm": 0.5159730134760996, "learning_rate": 2.8244931062449316e-05, "loss": 0.6274, "step": 16838 }, { "epoch": 0.49163528072173074, "grad_norm": 0.4970266983052289, "learning_rate": 2.8243309002433094e-05, "loss": 0.5526, "step": 16839 }, { "epoch": 0.4916644769495781, "grad_norm": 0.4894119886754868, "learning_rate": 2.8241686942416873e-05, "loss": 0.5401, "step": 16840 }, { "epoch": 0.49169367317742546, "grad_norm": 0.5032295416414814, "learning_rate": 2.824006488240065e-05, "loss": 0.6003, "step": 16841 }, { "epoch": 0.4917228694052728, "grad_norm": 0.5632706841672939, "learning_rate": 2.823844282238443e-05, "loss": 0.7015, "step": 16842 }, { "epoch": 0.4917520656331202, "grad_norm": 0.5328980677843124, "learning_rate": 2.823682076236821e-05, "loss": 0.6494, "step": 16843 }, { "epoch": 0.49178126186096754, "grad_norm": 0.5075073340506798, "learning_rate": 2.823519870235199e-05, "loss": 0.6124, "step": 16844 }, { "epoch": 0.4918104580888149, "grad_norm": 0.5332817385292865, "learning_rate": 2.8233576642335768e-05, "loss": 0.6181, "step": 16845 }, { "epoch": 0.49183965431666227, "grad_norm": 0.541892162664156, "learning_rate": 2.8231954582319546e-05, "loss": 0.6298, "step": 16846 }, { "epoch": 0.49186885054450963, "grad_norm": 0.5151119751421189, "learning_rate": 2.8230332522303328e-05, "loss": 0.6268, "step": 16847 }, { "epoch": 0.491898046772357, "grad_norm": 0.5464013263572848, "learning_rate": 2.8228710462287107e-05, "loss": 0.6323, "step": 16848 }, { "epoch": 0.49192724300020435, "grad_norm": 0.5678091766612661, "learning_rate": 2.8227088402270885e-05, "loss": 0.6436, "step": 16849 }, { "epoch": 0.4919564392280517, "grad_norm": 0.4964153189337451, "learning_rate": 2.8225466342254663e-05, "loss": 0.5934, "step": 16850 }, { "epoch": 0.4919856354558991, "grad_norm": 0.5263046711362735, "learning_rate": 2.822384428223844e-05, "loss": 0.599, "step": 16851 }, { "epoch": 0.49201483168374643, "grad_norm": 0.5322229155655189, "learning_rate": 2.8222222222222223e-05, "loss": 0.5715, "step": 16852 }, { "epoch": 0.4920440279115938, "grad_norm": 0.5014643312341565, "learning_rate": 2.8220600162206002e-05, "loss": 0.5821, "step": 16853 }, { "epoch": 0.49207322413944116, "grad_norm": 0.5527591095738271, "learning_rate": 2.821897810218978e-05, "loss": 0.6631, "step": 16854 }, { "epoch": 0.4921024203672886, "grad_norm": 0.48649903080232415, "learning_rate": 2.821735604217356e-05, "loss": 0.5372, "step": 16855 }, { "epoch": 0.49213161659513593, "grad_norm": 0.5421535987422014, "learning_rate": 2.8215733982157337e-05, "loss": 0.596, "step": 16856 }, { "epoch": 0.4921608128229833, "grad_norm": 0.5372097881601757, "learning_rate": 2.8214111922141122e-05, "loss": 0.6183, "step": 16857 }, { "epoch": 0.49219000905083066, "grad_norm": 0.5505840035672864, "learning_rate": 2.8212489862124904e-05, "loss": 0.704, "step": 16858 }, { "epoch": 0.492219205278678, "grad_norm": 0.537555626965655, "learning_rate": 2.8210867802108682e-05, "loss": 0.626, "step": 16859 }, { "epoch": 0.4922484015065254, "grad_norm": 0.5225043946643443, "learning_rate": 2.820924574209246e-05, "loss": 0.6243, "step": 16860 }, { "epoch": 0.49227759773437274, "grad_norm": 0.5154370985820418, "learning_rate": 2.820762368207624e-05, "loss": 0.6411, "step": 16861 }, { "epoch": 0.4923067939622201, "grad_norm": 0.5129387027027035, "learning_rate": 2.8206001622060017e-05, "loss": 0.6269, "step": 16862 }, { "epoch": 0.49233599019006746, "grad_norm": 0.47411813568814654, "learning_rate": 2.82043795620438e-05, "loss": 0.5124, "step": 16863 }, { "epoch": 0.4923651864179148, "grad_norm": 0.5446863583839643, "learning_rate": 2.8202757502027577e-05, "loss": 0.6825, "step": 16864 }, { "epoch": 0.4923943826457622, "grad_norm": 0.5336269702171818, "learning_rate": 2.8201135442011356e-05, "loss": 0.6499, "step": 16865 }, { "epoch": 0.49242357887360955, "grad_norm": 0.5138950778637787, "learning_rate": 2.8199513381995134e-05, "loss": 0.6308, "step": 16866 }, { "epoch": 0.4924527751014569, "grad_norm": 0.5214019133886458, "learning_rate": 2.8197891321978913e-05, "loss": 0.6197, "step": 16867 }, { "epoch": 0.49248197132930427, "grad_norm": 0.4959139369877354, "learning_rate": 2.8196269261962694e-05, "loss": 0.5574, "step": 16868 }, { "epoch": 0.49251116755715163, "grad_norm": 0.5038271677224666, "learning_rate": 2.8194647201946473e-05, "loss": 0.588, "step": 16869 }, { "epoch": 0.492540363784999, "grad_norm": 0.5465840492408156, "learning_rate": 2.819302514193025e-05, "loss": 0.5787, "step": 16870 }, { "epoch": 0.49256956001284635, "grad_norm": 0.5060692311901797, "learning_rate": 2.819140308191403e-05, "loss": 0.5934, "step": 16871 }, { "epoch": 0.4925987562406937, "grad_norm": 0.516935070674579, "learning_rate": 2.818978102189781e-05, "loss": 0.6081, "step": 16872 }, { "epoch": 0.4926279524685411, "grad_norm": 0.5647231908264645, "learning_rate": 2.818815896188159e-05, "loss": 0.7014, "step": 16873 }, { "epoch": 0.49265714869638844, "grad_norm": 0.5033657337296219, "learning_rate": 2.8186536901865368e-05, "loss": 0.6032, "step": 16874 }, { "epoch": 0.4926863449242358, "grad_norm": 0.5704481661850196, "learning_rate": 2.8184914841849146e-05, "loss": 0.6569, "step": 16875 }, { "epoch": 0.49271554115208316, "grad_norm": 0.6223190842163453, "learning_rate": 2.818329278183293e-05, "loss": 0.6657, "step": 16876 }, { "epoch": 0.4927447373799305, "grad_norm": 0.5094381486249577, "learning_rate": 2.818167072181671e-05, "loss": 0.6182, "step": 16877 }, { "epoch": 0.4927739336077779, "grad_norm": 0.5460564552693437, "learning_rate": 2.818004866180049e-05, "loss": 0.6328, "step": 16878 }, { "epoch": 0.49280312983562524, "grad_norm": 0.549774968567412, "learning_rate": 2.817842660178427e-05, "loss": 0.6762, "step": 16879 }, { "epoch": 0.4928323260634726, "grad_norm": 0.5400719403191389, "learning_rate": 2.817680454176805e-05, "loss": 0.6078, "step": 16880 }, { "epoch": 0.49286152229131996, "grad_norm": 0.5185867064730193, "learning_rate": 2.8175182481751827e-05, "loss": 0.6122, "step": 16881 }, { "epoch": 0.4928907185191673, "grad_norm": 0.4850784237663372, "learning_rate": 2.8173560421735605e-05, "loss": 0.5454, "step": 16882 }, { "epoch": 0.4929199147470147, "grad_norm": 0.5692935270667282, "learning_rate": 2.8171938361719387e-05, "loss": 0.7188, "step": 16883 }, { "epoch": 0.49294911097486205, "grad_norm": 0.5194043150587434, "learning_rate": 2.8170316301703165e-05, "loss": 0.6208, "step": 16884 }, { "epoch": 0.4929783072027094, "grad_norm": 0.5473553906847719, "learning_rate": 2.8168694241686944e-05, "loss": 0.5918, "step": 16885 }, { "epoch": 0.49300750343055677, "grad_norm": 0.5433445382176184, "learning_rate": 2.8167072181670722e-05, "loss": 0.7165, "step": 16886 }, { "epoch": 0.49303669965840413, "grad_norm": 0.5296910378768117, "learning_rate": 2.81654501216545e-05, "loss": 0.6903, "step": 16887 }, { "epoch": 0.4930658958862515, "grad_norm": 0.5329058074800387, "learning_rate": 2.8163828061638282e-05, "loss": 0.6063, "step": 16888 }, { "epoch": 0.49309509211409885, "grad_norm": 0.5415037719963719, "learning_rate": 2.816220600162206e-05, "loss": 0.6792, "step": 16889 }, { "epoch": 0.4931242883419462, "grad_norm": 0.5171148160270235, "learning_rate": 2.816058394160584e-05, "loss": 0.6169, "step": 16890 }, { "epoch": 0.4931534845697936, "grad_norm": 0.46039280789954734, "learning_rate": 2.8158961881589617e-05, "loss": 0.5142, "step": 16891 }, { "epoch": 0.49318268079764094, "grad_norm": 0.523539674292884, "learning_rate": 2.81573398215734e-05, "loss": 0.561, "step": 16892 }, { "epoch": 0.4932118770254883, "grad_norm": 0.5235668735859524, "learning_rate": 2.8155717761557177e-05, "loss": 0.6052, "step": 16893 }, { "epoch": 0.49324107325333566, "grad_norm": 0.49015846557731296, "learning_rate": 2.8154095701540956e-05, "loss": 0.5466, "step": 16894 }, { "epoch": 0.493270269481183, "grad_norm": 0.5890062748120394, "learning_rate": 2.815247364152474e-05, "loss": 0.6269, "step": 16895 }, { "epoch": 0.4932994657090304, "grad_norm": 0.5526408376345331, "learning_rate": 2.815085158150852e-05, "loss": 0.6735, "step": 16896 }, { "epoch": 0.49332866193687774, "grad_norm": 0.4701251076513097, "learning_rate": 2.8149229521492298e-05, "loss": 0.5136, "step": 16897 }, { "epoch": 0.4933578581647251, "grad_norm": 0.5175650330398635, "learning_rate": 2.8147607461476076e-05, "loss": 0.5827, "step": 16898 }, { "epoch": 0.49338705439257247, "grad_norm": 0.5129668219366889, "learning_rate": 2.8145985401459858e-05, "loss": 0.5692, "step": 16899 }, { "epoch": 0.4934162506204198, "grad_norm": 0.5643222421067756, "learning_rate": 2.8144363341443636e-05, "loss": 0.726, "step": 16900 }, { "epoch": 0.4934454468482672, "grad_norm": 0.5057322650313961, "learning_rate": 2.8142741281427415e-05, "loss": 0.5501, "step": 16901 }, { "epoch": 0.49347464307611455, "grad_norm": 0.5236581586992061, "learning_rate": 2.8141119221411193e-05, "loss": 0.6287, "step": 16902 }, { "epoch": 0.4935038393039619, "grad_norm": 0.5171676276329595, "learning_rate": 2.8139497161394975e-05, "loss": 0.617, "step": 16903 }, { "epoch": 0.49353303553180927, "grad_norm": 0.49676087801625574, "learning_rate": 2.8137875101378753e-05, "loss": 0.5569, "step": 16904 }, { "epoch": 0.49356223175965663, "grad_norm": 0.5401889078351025, "learning_rate": 2.813625304136253e-05, "loss": 0.6203, "step": 16905 }, { "epoch": 0.493591427987504, "grad_norm": 0.5117405628350947, "learning_rate": 2.813463098134631e-05, "loss": 0.5991, "step": 16906 }, { "epoch": 0.49362062421535136, "grad_norm": 0.4998312702572227, "learning_rate": 2.8133008921330088e-05, "loss": 0.575, "step": 16907 }, { "epoch": 0.4936498204431987, "grad_norm": 0.5164749894115441, "learning_rate": 2.813138686131387e-05, "loss": 0.5977, "step": 16908 }, { "epoch": 0.4936790166710461, "grad_norm": 0.5153706025502033, "learning_rate": 2.812976480129765e-05, "loss": 0.6121, "step": 16909 }, { "epoch": 0.49370821289889344, "grad_norm": 0.4983994279272461, "learning_rate": 2.8128142741281427e-05, "loss": 0.5528, "step": 16910 }, { "epoch": 0.4937374091267408, "grad_norm": 0.4792268876146649, "learning_rate": 2.8126520681265205e-05, "loss": 0.5465, "step": 16911 }, { "epoch": 0.49376660535458816, "grad_norm": 0.5151972406830132, "learning_rate": 2.8124898621248983e-05, "loss": 0.5937, "step": 16912 }, { "epoch": 0.4937958015824355, "grad_norm": 0.5214038381216975, "learning_rate": 2.8123276561232765e-05, "loss": 0.635, "step": 16913 }, { "epoch": 0.4938249978102829, "grad_norm": 0.6036363904184615, "learning_rate": 2.812165450121655e-05, "loss": 0.6938, "step": 16914 }, { "epoch": 0.4938541940381303, "grad_norm": 0.5336812497150332, "learning_rate": 2.812003244120033e-05, "loss": 0.6304, "step": 16915 }, { "epoch": 0.49388339026597766, "grad_norm": 0.5298717041073372, "learning_rate": 2.8118410381184107e-05, "loss": 0.6417, "step": 16916 }, { "epoch": 0.493912586493825, "grad_norm": 0.5123713216647353, "learning_rate": 2.8116788321167886e-05, "loss": 0.5802, "step": 16917 }, { "epoch": 0.4939417827216724, "grad_norm": 0.5399797371466479, "learning_rate": 2.8115166261151664e-05, "loss": 0.6333, "step": 16918 }, { "epoch": 0.49397097894951975, "grad_norm": 0.5326009936321644, "learning_rate": 2.8113544201135446e-05, "loss": 0.6163, "step": 16919 }, { "epoch": 0.4940001751773671, "grad_norm": 0.5738257830354728, "learning_rate": 2.8111922141119224e-05, "loss": 0.687, "step": 16920 }, { "epoch": 0.49402937140521447, "grad_norm": 0.5277719797330883, "learning_rate": 2.8110300081103002e-05, "loss": 0.6295, "step": 16921 }, { "epoch": 0.49405856763306183, "grad_norm": 0.5372988626301428, "learning_rate": 2.810867802108678e-05, "loss": 0.6786, "step": 16922 }, { "epoch": 0.4940877638609092, "grad_norm": 0.5268531531971758, "learning_rate": 2.810705596107056e-05, "loss": 0.6288, "step": 16923 }, { "epoch": 0.49411696008875655, "grad_norm": 0.49059576135928434, "learning_rate": 2.810543390105434e-05, "loss": 0.5814, "step": 16924 }, { "epoch": 0.4941461563166039, "grad_norm": 0.5659298507711136, "learning_rate": 2.810381184103812e-05, "loss": 0.704, "step": 16925 }, { "epoch": 0.4941753525444513, "grad_norm": 0.5015259332729969, "learning_rate": 2.8102189781021898e-05, "loss": 0.5682, "step": 16926 }, { "epoch": 0.49420454877229864, "grad_norm": 0.5531614997023644, "learning_rate": 2.8100567721005676e-05, "loss": 0.6565, "step": 16927 }, { "epoch": 0.494233745000146, "grad_norm": 0.5092593613627662, "learning_rate": 2.8098945660989458e-05, "loss": 0.5869, "step": 16928 }, { "epoch": 0.49426294122799336, "grad_norm": 0.4898357199934548, "learning_rate": 2.8097323600973236e-05, "loss": 0.5422, "step": 16929 }, { "epoch": 0.4942921374558407, "grad_norm": 0.4867497108214147, "learning_rate": 2.8095701540957015e-05, "loss": 0.5646, "step": 16930 }, { "epoch": 0.4943213336836881, "grad_norm": 0.5392248763470728, "learning_rate": 2.8094079480940793e-05, "loss": 0.6383, "step": 16931 }, { "epoch": 0.49435052991153544, "grad_norm": 0.5222883546903249, "learning_rate": 2.8092457420924578e-05, "loss": 0.6041, "step": 16932 }, { "epoch": 0.4943797261393828, "grad_norm": 0.5197933554933433, "learning_rate": 2.8090835360908356e-05, "loss": 0.5501, "step": 16933 }, { "epoch": 0.49440892236723016, "grad_norm": 0.5739716969472706, "learning_rate": 2.8089213300892138e-05, "loss": 0.6712, "step": 16934 }, { "epoch": 0.4944381185950775, "grad_norm": 0.5088394900868072, "learning_rate": 2.8087591240875917e-05, "loss": 0.601, "step": 16935 }, { "epoch": 0.4944673148229249, "grad_norm": 0.5236559144930771, "learning_rate": 2.8085969180859695e-05, "loss": 0.5998, "step": 16936 }, { "epoch": 0.49449651105077225, "grad_norm": 0.5202942636490944, "learning_rate": 2.8084347120843473e-05, "loss": 0.5843, "step": 16937 }, { "epoch": 0.4945257072786196, "grad_norm": 0.5091104508885441, "learning_rate": 2.808272506082725e-05, "loss": 0.6023, "step": 16938 }, { "epoch": 0.49455490350646697, "grad_norm": 0.5229441850035479, "learning_rate": 2.8081103000811033e-05, "loss": 0.6204, "step": 16939 }, { "epoch": 0.49458409973431433, "grad_norm": 0.5314696833622577, "learning_rate": 2.8079480940794812e-05, "loss": 0.6098, "step": 16940 }, { "epoch": 0.4946132959621617, "grad_norm": 0.5121918642701822, "learning_rate": 2.807785888077859e-05, "loss": 0.5714, "step": 16941 }, { "epoch": 0.49464249219000905, "grad_norm": 0.5416569652977424, "learning_rate": 2.807623682076237e-05, "loss": 0.6001, "step": 16942 }, { "epoch": 0.4946716884178564, "grad_norm": 0.5034526314831504, "learning_rate": 2.8074614760746147e-05, "loss": 0.5722, "step": 16943 }, { "epoch": 0.4947008846457038, "grad_norm": 0.5393752815352343, "learning_rate": 2.807299270072993e-05, "loss": 0.6462, "step": 16944 }, { "epoch": 0.49473008087355114, "grad_norm": 0.5287450393345423, "learning_rate": 2.8071370640713707e-05, "loss": 0.6348, "step": 16945 }, { "epoch": 0.4947592771013985, "grad_norm": 0.4993982148436333, "learning_rate": 2.8069748580697485e-05, "loss": 0.5927, "step": 16946 }, { "epoch": 0.49478847332924586, "grad_norm": 0.49746452016529685, "learning_rate": 2.8068126520681264e-05, "loss": 0.5126, "step": 16947 }, { "epoch": 0.4948176695570932, "grad_norm": 0.5064230468352233, "learning_rate": 2.8066504460665046e-05, "loss": 0.5801, "step": 16948 }, { "epoch": 0.4948468657849406, "grad_norm": 0.5170810709978134, "learning_rate": 2.8064882400648824e-05, "loss": 0.5939, "step": 16949 }, { "epoch": 0.49487606201278794, "grad_norm": 0.5166017581822383, "learning_rate": 2.8063260340632602e-05, "loss": 0.608, "step": 16950 }, { "epoch": 0.4949052582406353, "grad_norm": 0.5694913460739878, "learning_rate": 2.8061638280616387e-05, "loss": 0.6669, "step": 16951 }, { "epoch": 0.49493445446848267, "grad_norm": 0.5509076293750544, "learning_rate": 2.8060016220600166e-05, "loss": 0.649, "step": 16952 }, { "epoch": 0.49496365069633, "grad_norm": 0.5431920092897321, "learning_rate": 2.8058394160583944e-05, "loss": 0.663, "step": 16953 }, { "epoch": 0.4949928469241774, "grad_norm": 0.5121408415864029, "learning_rate": 2.8056772100567723e-05, "loss": 0.5802, "step": 16954 }, { "epoch": 0.49502204315202475, "grad_norm": 0.5014149086313916, "learning_rate": 2.8055150040551504e-05, "loss": 0.5807, "step": 16955 }, { "epoch": 0.4950512393798721, "grad_norm": 0.47347981030641617, "learning_rate": 2.8053527980535283e-05, "loss": 0.5508, "step": 16956 }, { "epoch": 0.49508043560771947, "grad_norm": 0.5262048442042861, "learning_rate": 2.805190592051906e-05, "loss": 0.6379, "step": 16957 }, { "epoch": 0.49510963183556683, "grad_norm": 0.5405291184587003, "learning_rate": 2.805028386050284e-05, "loss": 0.6472, "step": 16958 }, { "epoch": 0.4951388280634142, "grad_norm": 0.47632370259967544, "learning_rate": 2.804866180048662e-05, "loss": 0.5234, "step": 16959 }, { "epoch": 0.49516802429126155, "grad_norm": 0.5295890525278724, "learning_rate": 2.80470397404704e-05, "loss": 0.6049, "step": 16960 }, { "epoch": 0.4951972205191089, "grad_norm": 0.5354903320055667, "learning_rate": 2.8045417680454178e-05, "loss": 0.6627, "step": 16961 }, { "epoch": 0.4952264167469563, "grad_norm": 0.5295223265537006, "learning_rate": 2.8043795620437956e-05, "loss": 0.5775, "step": 16962 }, { "epoch": 0.49525561297480364, "grad_norm": 0.5201323946048761, "learning_rate": 2.8042173560421735e-05, "loss": 0.6176, "step": 16963 }, { "epoch": 0.495284809202651, "grad_norm": 0.5324852734502679, "learning_rate": 2.8040551500405517e-05, "loss": 0.6495, "step": 16964 }, { "epoch": 0.49531400543049836, "grad_norm": 0.5443653498901265, "learning_rate": 2.8038929440389295e-05, "loss": 0.6683, "step": 16965 }, { "epoch": 0.4953432016583457, "grad_norm": 0.4800741373282569, "learning_rate": 2.8037307380373073e-05, "loss": 0.5137, "step": 16966 }, { "epoch": 0.4953723978861931, "grad_norm": 0.5995381783371057, "learning_rate": 2.803568532035685e-05, "loss": 0.6907, "step": 16967 }, { "epoch": 0.49540159411404044, "grad_norm": 0.5871667924397862, "learning_rate": 2.803406326034063e-05, "loss": 0.6734, "step": 16968 }, { "epoch": 0.4954307903418878, "grad_norm": 0.560230419412359, "learning_rate": 2.8032441200324412e-05, "loss": 0.6843, "step": 16969 }, { "epoch": 0.49545998656973517, "grad_norm": 0.5852306812947854, "learning_rate": 2.8030819140308197e-05, "loss": 0.6979, "step": 16970 }, { "epoch": 0.49548918279758253, "grad_norm": 0.5649904964641715, "learning_rate": 2.8029197080291975e-05, "loss": 0.6758, "step": 16971 }, { "epoch": 0.4955183790254299, "grad_norm": 0.49489454005509587, "learning_rate": 2.8027575020275754e-05, "loss": 0.5598, "step": 16972 }, { "epoch": 0.49554757525327725, "grad_norm": 0.47528042411502186, "learning_rate": 2.8025952960259532e-05, "loss": 0.5576, "step": 16973 }, { "epoch": 0.4955767714811246, "grad_norm": 0.5247867520927206, "learning_rate": 2.802433090024331e-05, "loss": 0.5386, "step": 16974 }, { "epoch": 0.49560596770897203, "grad_norm": 0.5891839512229461, "learning_rate": 2.8022708840227092e-05, "loss": 0.749, "step": 16975 }, { "epoch": 0.4956351639368194, "grad_norm": 0.5083933074339338, "learning_rate": 2.802108678021087e-05, "loss": 0.6018, "step": 16976 }, { "epoch": 0.49566436016466675, "grad_norm": 0.5398871585300796, "learning_rate": 2.801946472019465e-05, "loss": 0.6489, "step": 16977 }, { "epoch": 0.4956935563925141, "grad_norm": 0.5601190534391114, "learning_rate": 2.8017842660178427e-05, "loss": 0.6452, "step": 16978 }, { "epoch": 0.4957227526203615, "grad_norm": 0.5136061052664843, "learning_rate": 2.801622060016221e-05, "loss": 0.596, "step": 16979 }, { "epoch": 0.49575194884820883, "grad_norm": 0.5254523427769054, "learning_rate": 2.8014598540145987e-05, "loss": 0.665, "step": 16980 }, { "epoch": 0.4957811450760562, "grad_norm": 0.5460439138396385, "learning_rate": 2.8012976480129766e-05, "loss": 0.6832, "step": 16981 }, { "epoch": 0.49581034130390356, "grad_norm": 0.5311311399410299, "learning_rate": 2.8011354420113544e-05, "loss": 0.6141, "step": 16982 }, { "epoch": 0.4958395375317509, "grad_norm": 0.5182902191810141, "learning_rate": 2.8009732360097323e-05, "loss": 0.6288, "step": 16983 }, { "epoch": 0.4958687337595983, "grad_norm": 0.5157662180301986, "learning_rate": 2.8008110300081104e-05, "loss": 0.583, "step": 16984 }, { "epoch": 0.49589792998744564, "grad_norm": 0.5063507424951998, "learning_rate": 2.8006488240064883e-05, "loss": 0.5436, "step": 16985 }, { "epoch": 0.495927126215293, "grad_norm": 0.5270426077067173, "learning_rate": 2.800486618004866e-05, "loss": 0.6343, "step": 16986 }, { "epoch": 0.49595632244314036, "grad_norm": 0.4937273649018053, "learning_rate": 2.800324412003244e-05, "loss": 0.5656, "step": 16987 }, { "epoch": 0.4959855186709877, "grad_norm": 0.5116626735662578, "learning_rate": 2.8001622060016218e-05, "loss": 0.6092, "step": 16988 }, { "epoch": 0.4960147148988351, "grad_norm": 0.4973278224553166, "learning_rate": 2.8000000000000003e-05, "loss": 0.6283, "step": 16989 }, { "epoch": 0.49604391112668245, "grad_norm": 0.501458103547044, "learning_rate": 2.7998377939983785e-05, "loss": 0.5658, "step": 16990 }, { "epoch": 0.4960731073545298, "grad_norm": 0.5299627752640439, "learning_rate": 2.7996755879967563e-05, "loss": 0.61, "step": 16991 }, { "epoch": 0.49610230358237717, "grad_norm": 0.4955187009971098, "learning_rate": 2.799513381995134e-05, "loss": 0.5839, "step": 16992 }, { "epoch": 0.49613149981022453, "grad_norm": 0.5457999862755321, "learning_rate": 2.799351175993512e-05, "loss": 0.6523, "step": 16993 }, { "epoch": 0.4961606960380719, "grad_norm": 0.49609040267684296, "learning_rate": 2.7991889699918898e-05, "loss": 0.5595, "step": 16994 }, { "epoch": 0.49618989226591925, "grad_norm": 0.5164927381068586, "learning_rate": 2.799026763990268e-05, "loss": 0.6071, "step": 16995 }, { "epoch": 0.4962190884937666, "grad_norm": 0.5488944686023666, "learning_rate": 2.798864557988646e-05, "loss": 0.6824, "step": 16996 }, { "epoch": 0.496248284721614, "grad_norm": 0.4980906320012256, "learning_rate": 2.7987023519870237e-05, "loss": 0.5875, "step": 16997 }, { "epoch": 0.49627748094946134, "grad_norm": 0.5770316551816564, "learning_rate": 2.7985401459854015e-05, "loss": 0.7012, "step": 16998 }, { "epoch": 0.4963066771773087, "grad_norm": 0.49961847660163516, "learning_rate": 2.7983779399837793e-05, "loss": 0.5405, "step": 16999 }, { "epoch": 0.49633587340515606, "grad_norm": 0.5303229446145917, "learning_rate": 2.7982157339821575e-05, "loss": 0.6041, "step": 17000 }, { "epoch": 0.4963650696330034, "grad_norm": 0.548961676388096, "learning_rate": 2.7980535279805354e-05, "loss": 0.6653, "step": 17001 }, { "epoch": 0.4963942658608508, "grad_norm": 0.50658596029717, "learning_rate": 2.7978913219789132e-05, "loss": 0.5687, "step": 17002 }, { "epoch": 0.49642346208869814, "grad_norm": 0.5355933772487729, "learning_rate": 2.797729115977291e-05, "loss": 0.5962, "step": 17003 }, { "epoch": 0.4964526583165455, "grad_norm": 0.5201532892371958, "learning_rate": 2.7975669099756692e-05, "loss": 0.557, "step": 17004 }, { "epoch": 0.49648185454439286, "grad_norm": 0.4946235209298727, "learning_rate": 2.797404703974047e-05, "loss": 0.5506, "step": 17005 }, { "epoch": 0.4965110507722402, "grad_norm": 0.5129848848405411, "learning_rate": 2.797242497972425e-05, "loss": 0.6341, "step": 17006 }, { "epoch": 0.4965402470000876, "grad_norm": 0.5897685887366506, "learning_rate": 2.7970802919708027e-05, "loss": 0.7508, "step": 17007 }, { "epoch": 0.49656944322793495, "grad_norm": 0.5864798073129532, "learning_rate": 2.7969180859691812e-05, "loss": 0.6679, "step": 17008 }, { "epoch": 0.4965986394557823, "grad_norm": 0.57707680659522, "learning_rate": 2.796755879967559e-05, "loss": 0.7078, "step": 17009 }, { "epoch": 0.49662783568362967, "grad_norm": 0.5456844793532035, "learning_rate": 2.796593673965937e-05, "loss": 0.6494, "step": 17010 }, { "epoch": 0.49665703191147703, "grad_norm": 0.5462157525867191, "learning_rate": 2.796431467964315e-05, "loss": 0.6595, "step": 17011 }, { "epoch": 0.4966862281393244, "grad_norm": 0.5303651028325301, "learning_rate": 2.796269261962693e-05, "loss": 0.628, "step": 17012 }, { "epoch": 0.49671542436717175, "grad_norm": 0.5028700231154934, "learning_rate": 2.7961070559610708e-05, "loss": 0.5582, "step": 17013 }, { "epoch": 0.4967446205950191, "grad_norm": 0.5065615117276798, "learning_rate": 2.7959448499594486e-05, "loss": 0.5761, "step": 17014 }, { "epoch": 0.4967738168228665, "grad_norm": 0.5564732334502691, "learning_rate": 2.7957826439578268e-05, "loss": 0.6538, "step": 17015 }, { "epoch": 0.49680301305071384, "grad_norm": 0.5105809522160046, "learning_rate": 2.7956204379562046e-05, "loss": 0.5947, "step": 17016 }, { "epoch": 0.4968322092785612, "grad_norm": 0.5261172118522056, "learning_rate": 2.7954582319545825e-05, "loss": 0.5601, "step": 17017 }, { "epoch": 0.49686140550640856, "grad_norm": 0.5968096061898234, "learning_rate": 2.7952960259529603e-05, "loss": 0.7096, "step": 17018 }, { "epoch": 0.4968906017342559, "grad_norm": 0.5418056461031233, "learning_rate": 2.795133819951338e-05, "loss": 0.6391, "step": 17019 }, { "epoch": 0.4969197979621033, "grad_norm": 0.5178698283306078, "learning_rate": 2.7949716139497163e-05, "loss": 0.6472, "step": 17020 }, { "epoch": 0.49694899418995064, "grad_norm": 0.5631866117989127, "learning_rate": 2.794809407948094e-05, "loss": 0.6273, "step": 17021 }, { "epoch": 0.496978190417798, "grad_norm": 0.5447495135188143, "learning_rate": 2.794647201946472e-05, "loss": 0.6864, "step": 17022 }, { "epoch": 0.49700738664564537, "grad_norm": 0.5358794821292207, "learning_rate": 2.7944849959448498e-05, "loss": 0.6145, "step": 17023 }, { "epoch": 0.4970365828734927, "grad_norm": 0.5463232987208112, "learning_rate": 2.794322789943228e-05, "loss": 0.6718, "step": 17024 }, { "epoch": 0.4970657791013401, "grad_norm": 0.49478004190927444, "learning_rate": 2.794160583941606e-05, "loss": 0.539, "step": 17025 }, { "epoch": 0.49709497532918745, "grad_norm": 0.5580723173505387, "learning_rate": 2.7939983779399837e-05, "loss": 0.6779, "step": 17026 }, { "epoch": 0.4971241715570348, "grad_norm": 0.5689583335614341, "learning_rate": 2.7938361719383622e-05, "loss": 0.6636, "step": 17027 }, { "epoch": 0.49715336778488217, "grad_norm": 0.5392380119674473, "learning_rate": 2.79367396593674e-05, "loss": 0.6675, "step": 17028 }, { "epoch": 0.49718256401272953, "grad_norm": 0.5117960160535416, "learning_rate": 2.793511759935118e-05, "loss": 0.5745, "step": 17029 }, { "epoch": 0.4972117602405769, "grad_norm": 0.479303461321707, "learning_rate": 2.7933495539334957e-05, "loss": 0.5051, "step": 17030 }, { "epoch": 0.49724095646842426, "grad_norm": 0.5404583626068568, "learning_rate": 2.793187347931874e-05, "loss": 0.6423, "step": 17031 }, { "epoch": 0.4972701526962716, "grad_norm": 0.5485782584384626, "learning_rate": 2.7930251419302517e-05, "loss": 0.6553, "step": 17032 }, { "epoch": 0.497299348924119, "grad_norm": 0.5042616220786954, "learning_rate": 2.7928629359286295e-05, "loss": 0.5703, "step": 17033 }, { "epoch": 0.49732854515196634, "grad_norm": 0.4781304327728063, "learning_rate": 2.7927007299270074e-05, "loss": 0.5089, "step": 17034 }, { "epoch": 0.4973577413798137, "grad_norm": 0.4707459903464369, "learning_rate": 2.7925385239253856e-05, "loss": 0.4875, "step": 17035 }, { "epoch": 0.4973869376076611, "grad_norm": 0.5250733517205123, "learning_rate": 2.7923763179237634e-05, "loss": 0.6025, "step": 17036 }, { "epoch": 0.4974161338355085, "grad_norm": 0.5155041210567497, "learning_rate": 2.7922141119221412e-05, "loss": 0.58, "step": 17037 }, { "epoch": 0.49744533006335584, "grad_norm": 0.5092464178560387, "learning_rate": 2.792051905920519e-05, "loss": 0.6051, "step": 17038 }, { "epoch": 0.4974745262912032, "grad_norm": 0.500785389841955, "learning_rate": 2.791889699918897e-05, "loss": 0.534, "step": 17039 }, { "epoch": 0.49750372251905056, "grad_norm": 0.5105141073707256, "learning_rate": 2.791727493917275e-05, "loss": 0.5843, "step": 17040 }, { "epoch": 0.4975329187468979, "grad_norm": 0.5162473134808482, "learning_rate": 2.791565287915653e-05, "loss": 0.5932, "step": 17041 }, { "epoch": 0.4975621149747453, "grad_norm": 0.5617031614193404, "learning_rate": 2.7914030819140308e-05, "loss": 0.665, "step": 17042 }, { "epoch": 0.49759131120259265, "grad_norm": 0.5339124869622329, "learning_rate": 2.7912408759124086e-05, "loss": 0.637, "step": 17043 }, { "epoch": 0.49762050743044, "grad_norm": 0.5238529843347732, "learning_rate": 2.7910786699107864e-05, "loss": 0.5975, "step": 17044 }, { "epoch": 0.49764970365828737, "grad_norm": 0.5296441363989968, "learning_rate": 2.7909164639091646e-05, "loss": 0.6419, "step": 17045 }, { "epoch": 0.49767889988613473, "grad_norm": 0.5095065905534617, "learning_rate": 2.790754257907543e-05, "loss": 0.595, "step": 17046 }, { "epoch": 0.4977080961139821, "grad_norm": 0.5342216573725087, "learning_rate": 2.790592051905921e-05, "loss": 0.6686, "step": 17047 }, { "epoch": 0.49773729234182945, "grad_norm": 0.5652002452293031, "learning_rate": 2.7904298459042988e-05, "loss": 0.6552, "step": 17048 }, { "epoch": 0.4977664885696768, "grad_norm": 0.5082265424991362, "learning_rate": 2.7902676399026766e-05, "loss": 0.6052, "step": 17049 }, { "epoch": 0.4977956847975242, "grad_norm": 0.508718912270316, "learning_rate": 2.7901054339010545e-05, "loss": 0.5953, "step": 17050 }, { "epoch": 0.49782488102537154, "grad_norm": 0.5361043969520721, "learning_rate": 2.7899432278994327e-05, "loss": 0.617, "step": 17051 }, { "epoch": 0.4978540772532189, "grad_norm": 0.48798005399918404, "learning_rate": 2.7897810218978105e-05, "loss": 0.5761, "step": 17052 }, { "epoch": 0.49788327348106626, "grad_norm": 0.5475306660551265, "learning_rate": 2.7896188158961883e-05, "loss": 0.6838, "step": 17053 }, { "epoch": 0.4979124697089136, "grad_norm": 0.506864728300653, "learning_rate": 2.789456609894566e-05, "loss": 0.5649, "step": 17054 }, { "epoch": 0.497941665936761, "grad_norm": 0.5461066047660661, "learning_rate": 2.789294403892944e-05, "loss": 0.6999, "step": 17055 }, { "epoch": 0.49797086216460834, "grad_norm": 0.5400495540403958, "learning_rate": 2.7891321978913222e-05, "loss": 0.6446, "step": 17056 }, { "epoch": 0.4980000583924557, "grad_norm": 0.5429483191103994, "learning_rate": 2.7889699918897e-05, "loss": 0.6105, "step": 17057 }, { "epoch": 0.49802925462030306, "grad_norm": 0.5463780587006845, "learning_rate": 2.788807785888078e-05, "loss": 0.5538, "step": 17058 }, { "epoch": 0.4980584508481504, "grad_norm": 0.4919405935471704, "learning_rate": 2.7886455798864557e-05, "loss": 0.5471, "step": 17059 }, { "epoch": 0.4980876470759978, "grad_norm": 0.565500125883659, "learning_rate": 2.788483373884834e-05, "loss": 0.6754, "step": 17060 }, { "epoch": 0.49811684330384515, "grad_norm": 0.5928171737253102, "learning_rate": 2.7883211678832117e-05, "loss": 0.7165, "step": 17061 }, { "epoch": 0.4981460395316925, "grad_norm": 0.5145942517009957, "learning_rate": 2.7881589618815895e-05, "loss": 0.5519, "step": 17062 }, { "epoch": 0.49817523575953987, "grad_norm": 0.5056358883541561, "learning_rate": 2.7879967558799674e-05, "loss": 0.5477, "step": 17063 }, { "epoch": 0.49820443198738723, "grad_norm": 0.5207435168715088, "learning_rate": 2.7878345498783452e-05, "loss": 0.6103, "step": 17064 }, { "epoch": 0.4982336282152346, "grad_norm": 0.5040704153361127, "learning_rate": 2.7876723438767237e-05, "loss": 0.5689, "step": 17065 }, { "epoch": 0.49826282444308195, "grad_norm": 0.5847180096614732, "learning_rate": 2.7875101378751016e-05, "loss": 0.7185, "step": 17066 }, { "epoch": 0.4982920206709293, "grad_norm": 0.5004144672238964, "learning_rate": 2.7873479318734797e-05, "loss": 0.568, "step": 17067 }, { "epoch": 0.4983212168987767, "grad_norm": 0.525529104172215, "learning_rate": 2.7871857258718576e-05, "loss": 0.6011, "step": 17068 }, { "epoch": 0.49835041312662404, "grad_norm": 0.5451263237425208, "learning_rate": 2.7870235198702354e-05, "loss": 0.6848, "step": 17069 }, { "epoch": 0.4983796093544714, "grad_norm": 0.5038309346979016, "learning_rate": 2.7868613138686133e-05, "loss": 0.5955, "step": 17070 }, { "epoch": 0.49840880558231876, "grad_norm": 0.504611932631355, "learning_rate": 2.7866991078669914e-05, "loss": 0.5873, "step": 17071 }, { "epoch": 0.4984380018101661, "grad_norm": 0.5141334612007348, "learning_rate": 2.7865369018653693e-05, "loss": 0.6105, "step": 17072 }, { "epoch": 0.4984671980380135, "grad_norm": 0.5030893489140168, "learning_rate": 2.786374695863747e-05, "loss": 0.5253, "step": 17073 }, { "epoch": 0.49849639426586084, "grad_norm": 0.5469158504902574, "learning_rate": 2.786212489862125e-05, "loss": 0.6578, "step": 17074 }, { "epoch": 0.4985255904937082, "grad_norm": 0.492263341453447, "learning_rate": 2.7860502838605028e-05, "loss": 0.5694, "step": 17075 }, { "epoch": 0.49855478672155557, "grad_norm": 0.49913473386036816, "learning_rate": 2.785888077858881e-05, "loss": 0.534, "step": 17076 }, { "epoch": 0.4985839829494029, "grad_norm": 0.5494289563441256, "learning_rate": 2.7857258718572588e-05, "loss": 0.6773, "step": 17077 }, { "epoch": 0.4986131791772503, "grad_norm": 0.5495161747532343, "learning_rate": 2.7855636658556366e-05, "loss": 0.6606, "step": 17078 }, { "epoch": 0.49864237540509765, "grad_norm": 0.5193294966806367, "learning_rate": 2.7854014598540145e-05, "loss": 0.6095, "step": 17079 }, { "epoch": 0.498671571632945, "grad_norm": 0.5362603762513065, "learning_rate": 2.7852392538523926e-05, "loss": 0.6079, "step": 17080 }, { "epoch": 0.49870076786079237, "grad_norm": 0.5396545430696041, "learning_rate": 2.7850770478507705e-05, "loss": 0.6552, "step": 17081 }, { "epoch": 0.49872996408863973, "grad_norm": 0.5768934020383005, "learning_rate": 2.7849148418491483e-05, "loss": 0.7074, "step": 17082 }, { "epoch": 0.4987591603164871, "grad_norm": 0.5159645236481338, "learning_rate": 2.784752635847526e-05, "loss": 0.6199, "step": 17083 }, { "epoch": 0.49878835654433445, "grad_norm": 0.5248762747108067, "learning_rate": 2.7845904298459047e-05, "loss": 0.602, "step": 17084 }, { "epoch": 0.4988175527721818, "grad_norm": 0.5208161347071885, "learning_rate": 2.7844282238442825e-05, "loss": 0.5778, "step": 17085 }, { "epoch": 0.4988467490000292, "grad_norm": 0.5603933635198858, "learning_rate": 2.7842660178426603e-05, "loss": 0.6686, "step": 17086 }, { "epoch": 0.49887594522787654, "grad_norm": 0.4794822092447809, "learning_rate": 2.7841038118410385e-05, "loss": 0.5277, "step": 17087 }, { "epoch": 0.4989051414557239, "grad_norm": 0.5150473589256084, "learning_rate": 2.7839416058394164e-05, "loss": 0.6029, "step": 17088 }, { "epoch": 0.49893433768357126, "grad_norm": 0.5330455506700349, "learning_rate": 2.7837793998377942e-05, "loss": 0.5728, "step": 17089 }, { "epoch": 0.4989635339114186, "grad_norm": 0.5449962517579151, "learning_rate": 2.783617193836172e-05, "loss": 0.6666, "step": 17090 }, { "epoch": 0.498992730139266, "grad_norm": 0.4807403798615183, "learning_rate": 2.7834549878345502e-05, "loss": 0.5358, "step": 17091 }, { "epoch": 0.49902192636711334, "grad_norm": 0.5487384884666202, "learning_rate": 2.783292781832928e-05, "loss": 0.6596, "step": 17092 }, { "epoch": 0.4990511225949607, "grad_norm": 0.4860091470010998, "learning_rate": 2.783130575831306e-05, "loss": 0.514, "step": 17093 }, { "epoch": 0.49908031882280807, "grad_norm": 0.5123776533523938, "learning_rate": 2.7829683698296837e-05, "loss": 0.6033, "step": 17094 }, { "epoch": 0.49910951505065543, "grad_norm": 0.5593337406642029, "learning_rate": 2.7828061638280616e-05, "loss": 0.6622, "step": 17095 }, { "epoch": 0.49913871127850284, "grad_norm": 0.4867654153730626, "learning_rate": 2.7826439578264397e-05, "loss": 0.5518, "step": 17096 }, { "epoch": 0.4991679075063502, "grad_norm": 0.5224658596829284, "learning_rate": 2.7824817518248176e-05, "loss": 0.6128, "step": 17097 }, { "epoch": 0.49919710373419757, "grad_norm": 0.5301078059204712, "learning_rate": 2.7823195458231954e-05, "loss": 0.615, "step": 17098 }, { "epoch": 0.49922629996204493, "grad_norm": 0.5194325565251062, "learning_rate": 2.7821573398215733e-05, "loss": 0.5901, "step": 17099 }, { "epoch": 0.4992554961898923, "grad_norm": 0.5313867944319323, "learning_rate": 2.781995133819951e-05, "loss": 0.6151, "step": 17100 }, { "epoch": 0.49928469241773965, "grad_norm": 0.5124730560673187, "learning_rate": 2.7818329278183293e-05, "loss": 0.5768, "step": 17101 }, { "epoch": 0.499313888645587, "grad_norm": 0.5071443146042051, "learning_rate": 2.7816707218167078e-05, "loss": 0.5726, "step": 17102 }, { "epoch": 0.4993430848734344, "grad_norm": 0.5423177417441897, "learning_rate": 2.7815085158150856e-05, "loss": 0.6236, "step": 17103 }, { "epoch": 0.49937228110128173, "grad_norm": 0.5340217923048496, "learning_rate": 2.7813463098134635e-05, "loss": 0.6264, "step": 17104 }, { "epoch": 0.4994014773291291, "grad_norm": 0.5025034805391625, "learning_rate": 2.7811841038118413e-05, "loss": 0.5607, "step": 17105 }, { "epoch": 0.49943067355697646, "grad_norm": 0.6159935269195503, "learning_rate": 2.781021897810219e-05, "loss": 0.6318, "step": 17106 }, { "epoch": 0.4994598697848238, "grad_norm": 0.5072269158656307, "learning_rate": 2.7808596918085973e-05, "loss": 0.5809, "step": 17107 }, { "epoch": 0.4994890660126712, "grad_norm": 0.5457283804475661, "learning_rate": 2.780697485806975e-05, "loss": 0.6567, "step": 17108 }, { "epoch": 0.49951826224051854, "grad_norm": 0.5157077958157956, "learning_rate": 2.780535279805353e-05, "loss": 0.6003, "step": 17109 }, { "epoch": 0.4995474584683659, "grad_norm": 0.5293461533313482, "learning_rate": 2.7803730738037308e-05, "loss": 0.6016, "step": 17110 }, { "epoch": 0.49957665469621326, "grad_norm": 0.6015177297973687, "learning_rate": 2.7802108678021087e-05, "loss": 0.6288, "step": 17111 }, { "epoch": 0.4996058509240606, "grad_norm": 0.5220008175839264, "learning_rate": 2.780048661800487e-05, "loss": 0.6135, "step": 17112 }, { "epoch": 0.499635047151908, "grad_norm": 0.5331539735680707, "learning_rate": 2.7798864557988647e-05, "loss": 0.6587, "step": 17113 }, { "epoch": 0.49966424337975535, "grad_norm": 0.5491509309972569, "learning_rate": 2.7797242497972425e-05, "loss": 0.6257, "step": 17114 }, { "epoch": 0.4996934396076027, "grad_norm": 0.5011440846664107, "learning_rate": 2.7795620437956203e-05, "loss": 0.5599, "step": 17115 }, { "epoch": 0.49972263583545007, "grad_norm": 0.5364459401161847, "learning_rate": 2.7793998377939985e-05, "loss": 0.5917, "step": 17116 }, { "epoch": 0.49975183206329743, "grad_norm": 0.5065965959998491, "learning_rate": 2.7792376317923764e-05, "loss": 0.5762, "step": 17117 }, { "epoch": 0.4997810282911448, "grad_norm": 0.5359236867351498, "learning_rate": 2.7790754257907542e-05, "loss": 0.6402, "step": 17118 }, { "epoch": 0.49981022451899215, "grad_norm": 0.5396507762424503, "learning_rate": 2.778913219789132e-05, "loss": 0.6252, "step": 17119 }, { "epoch": 0.4998394207468395, "grad_norm": 0.5314940074892347, "learning_rate": 2.77875101378751e-05, "loss": 0.6514, "step": 17120 }, { "epoch": 0.4998686169746869, "grad_norm": 0.5564151895584997, "learning_rate": 2.7785888077858884e-05, "loss": 0.6305, "step": 17121 }, { "epoch": 0.49989781320253424, "grad_norm": 0.5602237660907873, "learning_rate": 2.7784266017842666e-05, "loss": 0.6464, "step": 17122 }, { "epoch": 0.4999270094303816, "grad_norm": 0.5238405878837783, "learning_rate": 2.7782643957826444e-05, "loss": 0.6332, "step": 17123 }, { "epoch": 0.49995620565822896, "grad_norm": 0.528890812424943, "learning_rate": 2.7781021897810222e-05, "loss": 0.6853, "step": 17124 }, { "epoch": 0.4999854018860763, "grad_norm": 0.5871384101617293, "learning_rate": 2.7779399837794e-05, "loss": 0.6198, "step": 17125 }, { "epoch": 0.5000145981139237, "grad_norm": 0.49163322818820626, "learning_rate": 2.777777777777778e-05, "loss": 0.5281, "step": 17126 }, { "epoch": 0.500043794341771, "grad_norm": 0.5404631138628394, "learning_rate": 2.777615571776156e-05, "loss": 0.6298, "step": 17127 }, { "epoch": 0.5000729905696184, "grad_norm": 0.517048265164883, "learning_rate": 2.777453365774534e-05, "loss": 0.628, "step": 17128 }, { "epoch": 0.5001021867974658, "grad_norm": 0.5070219813271433, "learning_rate": 2.7772911597729118e-05, "loss": 0.5978, "step": 17129 }, { "epoch": 0.5001313830253131, "grad_norm": 0.5328991429264472, "learning_rate": 2.7771289537712896e-05, "loss": 0.6466, "step": 17130 }, { "epoch": 0.5001605792531605, "grad_norm": 0.5147239018515681, "learning_rate": 2.7769667477696674e-05, "loss": 0.6544, "step": 17131 }, { "epoch": 0.5001897754810078, "grad_norm": 0.5317887677758438, "learning_rate": 2.7768045417680456e-05, "loss": 0.6079, "step": 17132 }, { "epoch": 0.5002189717088552, "grad_norm": 0.49841025866421185, "learning_rate": 2.7766423357664235e-05, "loss": 0.5607, "step": 17133 }, { "epoch": 0.5002481679367026, "grad_norm": 0.508673268939643, "learning_rate": 2.7764801297648013e-05, "loss": 0.605, "step": 17134 }, { "epoch": 0.5002773641645499, "grad_norm": 0.5493995907152777, "learning_rate": 2.776317923763179e-05, "loss": 0.6209, "step": 17135 }, { "epoch": 0.5003065603923973, "grad_norm": 0.5655670315897007, "learning_rate": 2.7761557177615573e-05, "loss": 0.6998, "step": 17136 }, { "epoch": 0.5003357566202447, "grad_norm": 0.5128779562617541, "learning_rate": 2.775993511759935e-05, "loss": 0.6268, "step": 17137 }, { "epoch": 0.500364952848092, "grad_norm": 0.5303186045531786, "learning_rate": 2.775831305758313e-05, "loss": 0.6229, "step": 17138 }, { "epoch": 0.5003941490759394, "grad_norm": 0.5199964677517506, "learning_rate": 2.7756690997566908e-05, "loss": 0.608, "step": 17139 }, { "epoch": 0.5004233453037867, "grad_norm": 0.5296794122408485, "learning_rate": 2.7755068937550693e-05, "loss": 0.6115, "step": 17140 }, { "epoch": 0.5004525415316341, "grad_norm": 0.5701646050032471, "learning_rate": 2.775344687753447e-05, "loss": 0.6867, "step": 17141 }, { "epoch": 0.5004817377594815, "grad_norm": 0.5076615165334979, "learning_rate": 2.775182481751825e-05, "loss": 0.595, "step": 17142 }, { "epoch": 0.5005109339873288, "grad_norm": 0.5335006314327698, "learning_rate": 2.7750202757502032e-05, "loss": 0.6229, "step": 17143 }, { "epoch": 0.5005401302151762, "grad_norm": 0.5309746870642784, "learning_rate": 2.774858069748581e-05, "loss": 0.6458, "step": 17144 }, { "epoch": 0.5005693264430235, "grad_norm": 0.5282021403924216, "learning_rate": 2.774695863746959e-05, "loss": 0.6097, "step": 17145 }, { "epoch": 0.5005985226708709, "grad_norm": 0.5016704717470153, "learning_rate": 2.7745336577453367e-05, "loss": 0.5976, "step": 17146 }, { "epoch": 0.5006277188987183, "grad_norm": 0.5248419792021184, "learning_rate": 2.774371451743715e-05, "loss": 0.6001, "step": 17147 }, { "epoch": 0.5006569151265656, "grad_norm": 0.5457049674526802, "learning_rate": 2.7742092457420927e-05, "loss": 0.6392, "step": 17148 }, { "epoch": 0.500686111354413, "grad_norm": 0.546770378260444, "learning_rate": 2.7740470397404705e-05, "loss": 0.6421, "step": 17149 }, { "epoch": 0.5007153075822603, "grad_norm": 0.6292412267092278, "learning_rate": 2.7738848337388484e-05, "loss": 0.6102, "step": 17150 }, { "epoch": 0.5007445038101077, "grad_norm": 0.5382528268636555, "learning_rate": 2.7737226277372262e-05, "loss": 0.6438, "step": 17151 }, { "epoch": 0.5007737000379551, "grad_norm": 0.5501573181500548, "learning_rate": 2.7735604217356044e-05, "loss": 0.6684, "step": 17152 }, { "epoch": 0.5008028962658024, "grad_norm": 0.5414391084743359, "learning_rate": 2.7733982157339822e-05, "loss": 0.663, "step": 17153 }, { "epoch": 0.5008320924936498, "grad_norm": 0.551809232617652, "learning_rate": 2.77323600973236e-05, "loss": 0.6063, "step": 17154 }, { "epoch": 0.5008612887214972, "grad_norm": 0.5071987422761556, "learning_rate": 2.773073803730738e-05, "loss": 0.6229, "step": 17155 }, { "epoch": 0.5008904849493445, "grad_norm": 0.5119563699262305, "learning_rate": 2.7729115977291157e-05, "loss": 0.5913, "step": 17156 }, { "epoch": 0.5009196811771919, "grad_norm": 0.52963955880062, "learning_rate": 2.772749391727494e-05, "loss": 0.6147, "step": 17157 }, { "epoch": 0.5009488774050392, "grad_norm": 0.5271241361980878, "learning_rate": 2.7725871857258718e-05, "loss": 0.5917, "step": 17158 }, { "epoch": 0.5009780736328866, "grad_norm": 0.5459588417144755, "learning_rate": 2.7724249797242503e-05, "loss": 0.6712, "step": 17159 }, { "epoch": 0.501007269860734, "grad_norm": 0.51202396412343, "learning_rate": 2.772262773722628e-05, "loss": 0.6134, "step": 17160 }, { "epoch": 0.5010364660885813, "grad_norm": 0.542641483049383, "learning_rate": 2.772100567721006e-05, "loss": 0.6148, "step": 17161 }, { "epoch": 0.5010656623164287, "grad_norm": 0.5767200241275666, "learning_rate": 2.7719383617193838e-05, "loss": 0.6359, "step": 17162 }, { "epoch": 0.501094858544276, "grad_norm": 0.5434888055003079, "learning_rate": 2.771776155717762e-05, "loss": 0.6718, "step": 17163 }, { "epoch": 0.5011240547721234, "grad_norm": 0.48923466521748027, "learning_rate": 2.7716139497161398e-05, "loss": 0.5335, "step": 17164 }, { "epoch": 0.5011532509999708, "grad_norm": 0.5445030707743801, "learning_rate": 2.7714517437145176e-05, "loss": 0.6181, "step": 17165 }, { "epoch": 0.5011824472278181, "grad_norm": 0.5335052214339051, "learning_rate": 2.7712895377128955e-05, "loss": 0.6575, "step": 17166 }, { "epoch": 0.5012116434556655, "grad_norm": 0.5495773445238639, "learning_rate": 2.7711273317112733e-05, "loss": 0.6338, "step": 17167 }, { "epoch": 0.5012408396835129, "grad_norm": 0.5345085529077319, "learning_rate": 2.7709651257096515e-05, "loss": 0.6237, "step": 17168 }, { "epoch": 0.5012700359113602, "grad_norm": 0.5074794585329321, "learning_rate": 2.7708029197080293e-05, "loss": 0.5782, "step": 17169 }, { "epoch": 0.5012992321392076, "grad_norm": 0.5446930452326997, "learning_rate": 2.770640713706407e-05, "loss": 0.5931, "step": 17170 }, { "epoch": 0.5013284283670549, "grad_norm": 0.5050007069493463, "learning_rate": 2.770478507704785e-05, "loss": 0.5917, "step": 17171 }, { "epoch": 0.5013576245949023, "grad_norm": 0.5632372377110475, "learning_rate": 2.7703163017031632e-05, "loss": 0.6772, "step": 17172 }, { "epoch": 0.5013868208227497, "grad_norm": 0.537828936003214, "learning_rate": 2.770154095701541e-05, "loss": 0.6333, "step": 17173 }, { "epoch": 0.501416017050597, "grad_norm": 0.5156054660234506, "learning_rate": 2.769991889699919e-05, "loss": 0.5742, "step": 17174 }, { "epoch": 0.5014452132784444, "grad_norm": 0.4940213566838814, "learning_rate": 2.7698296836982967e-05, "loss": 0.5685, "step": 17175 }, { "epoch": 0.5014744095062917, "grad_norm": 0.5177853506761851, "learning_rate": 2.7696674776966745e-05, "loss": 0.6212, "step": 17176 }, { "epoch": 0.5015036057341391, "grad_norm": 0.5530860034321462, "learning_rate": 2.7695052716950527e-05, "loss": 0.6559, "step": 17177 }, { "epoch": 0.5015328019619865, "grad_norm": 0.5421622447866382, "learning_rate": 2.7693430656934312e-05, "loss": 0.6421, "step": 17178 }, { "epoch": 0.5015619981898338, "grad_norm": 0.5384340522543437, "learning_rate": 2.769180859691809e-05, "loss": 0.5664, "step": 17179 }, { "epoch": 0.5015911944176812, "grad_norm": 0.517077819612424, "learning_rate": 2.769018653690187e-05, "loss": 0.5717, "step": 17180 }, { "epoch": 0.5016203906455285, "grad_norm": 0.5730895857281496, "learning_rate": 2.7688564476885647e-05, "loss": 0.5758, "step": 17181 }, { "epoch": 0.5016495868733759, "grad_norm": 0.5203515884389709, "learning_rate": 2.7686942416869426e-05, "loss": 0.6112, "step": 17182 }, { "epoch": 0.5016787831012233, "grad_norm": 0.5263632882858411, "learning_rate": 2.7685320356853207e-05, "loss": 0.6016, "step": 17183 }, { "epoch": 0.5017079793290706, "grad_norm": 0.5232582930563648, "learning_rate": 2.7683698296836986e-05, "loss": 0.5973, "step": 17184 }, { "epoch": 0.501737175556918, "grad_norm": 0.5362732844744811, "learning_rate": 2.7682076236820764e-05, "loss": 0.6495, "step": 17185 }, { "epoch": 0.5017663717847655, "grad_norm": 0.5448293559750312, "learning_rate": 2.7680454176804543e-05, "loss": 0.6502, "step": 17186 }, { "epoch": 0.5017955680126128, "grad_norm": 0.5587855809271447, "learning_rate": 2.767883211678832e-05, "loss": 0.6879, "step": 17187 }, { "epoch": 0.5018247642404602, "grad_norm": 0.5087962171810815, "learning_rate": 2.7677210056772103e-05, "loss": 0.642, "step": 17188 }, { "epoch": 0.5018539604683075, "grad_norm": 0.5432388385565363, "learning_rate": 2.767558799675588e-05, "loss": 0.6101, "step": 17189 }, { "epoch": 0.5018831566961549, "grad_norm": 0.4768416528333687, "learning_rate": 2.767396593673966e-05, "loss": 0.5517, "step": 17190 }, { "epoch": 0.5019123529240023, "grad_norm": 0.5045041303714463, "learning_rate": 2.7672343876723438e-05, "loss": 0.5833, "step": 17191 }, { "epoch": 0.5019415491518496, "grad_norm": 0.5204230659344563, "learning_rate": 2.767072181670722e-05, "loss": 0.6128, "step": 17192 }, { "epoch": 0.501970745379697, "grad_norm": 0.4869398430842161, "learning_rate": 2.7669099756690998e-05, "loss": 0.5488, "step": 17193 }, { "epoch": 0.5019999416075444, "grad_norm": 0.5045938683553418, "learning_rate": 2.7667477696674776e-05, "loss": 0.5248, "step": 17194 }, { "epoch": 0.5020291378353917, "grad_norm": 0.5287276998743395, "learning_rate": 2.7665855636658555e-05, "loss": 0.6132, "step": 17195 }, { "epoch": 0.5020583340632391, "grad_norm": 0.5303959337620697, "learning_rate": 2.7664233576642333e-05, "loss": 0.6304, "step": 17196 }, { "epoch": 0.5020875302910864, "grad_norm": 0.5166913419079172, "learning_rate": 2.7662611516626118e-05, "loss": 0.5784, "step": 17197 }, { "epoch": 0.5021167265189338, "grad_norm": 0.5038695343384129, "learning_rate": 2.7660989456609897e-05, "loss": 0.5711, "step": 17198 }, { "epoch": 0.5021459227467812, "grad_norm": 0.5373735136871582, "learning_rate": 2.765936739659368e-05, "loss": 0.6644, "step": 17199 }, { "epoch": 0.5021751189746285, "grad_norm": 0.5425892171262211, "learning_rate": 2.7657745336577457e-05, "loss": 0.6518, "step": 17200 }, { "epoch": 0.5022043152024759, "grad_norm": 0.5215411792242614, "learning_rate": 2.7656123276561235e-05, "loss": 0.6003, "step": 17201 }, { "epoch": 0.5022335114303232, "grad_norm": 0.5183470458141308, "learning_rate": 2.7654501216545013e-05, "loss": 0.5832, "step": 17202 }, { "epoch": 0.5022627076581706, "grad_norm": 0.5050577088892204, "learning_rate": 2.7652879156528795e-05, "loss": 0.586, "step": 17203 }, { "epoch": 0.502291903886018, "grad_norm": 0.5373068413567968, "learning_rate": 2.7651257096512574e-05, "loss": 0.6179, "step": 17204 }, { "epoch": 0.5023211001138653, "grad_norm": 0.5107109868152148, "learning_rate": 2.7649635036496352e-05, "loss": 0.5719, "step": 17205 }, { "epoch": 0.5023502963417127, "grad_norm": 0.5007513844674178, "learning_rate": 2.764801297648013e-05, "loss": 0.5737, "step": 17206 }, { "epoch": 0.50237949256956, "grad_norm": 0.5109864133470411, "learning_rate": 2.764639091646391e-05, "loss": 0.5872, "step": 17207 }, { "epoch": 0.5024086887974074, "grad_norm": 0.536556467426446, "learning_rate": 2.764476885644769e-05, "loss": 0.6999, "step": 17208 }, { "epoch": 0.5024378850252548, "grad_norm": 0.514012052007437, "learning_rate": 2.764314679643147e-05, "loss": 0.6122, "step": 17209 }, { "epoch": 0.5024670812531021, "grad_norm": 0.49032940565063493, "learning_rate": 2.7641524736415247e-05, "loss": 0.5474, "step": 17210 }, { "epoch": 0.5024962774809495, "grad_norm": 0.5825698169821465, "learning_rate": 2.7639902676399026e-05, "loss": 0.6885, "step": 17211 }, { "epoch": 0.5025254737087969, "grad_norm": 0.4979726052000027, "learning_rate": 2.7638280616382804e-05, "loss": 0.5388, "step": 17212 }, { "epoch": 0.5025546699366442, "grad_norm": 0.5346519498187755, "learning_rate": 2.7636658556366586e-05, "loss": 0.5799, "step": 17213 }, { "epoch": 0.5025838661644916, "grad_norm": 0.5325733503235176, "learning_rate": 2.7635036496350364e-05, "loss": 0.6366, "step": 17214 }, { "epoch": 0.5026130623923389, "grad_norm": 0.5114669366985991, "learning_rate": 2.7633414436334142e-05, "loss": 0.5917, "step": 17215 }, { "epoch": 0.5026422586201863, "grad_norm": 0.5903707225335125, "learning_rate": 2.7631792376317928e-05, "loss": 0.7688, "step": 17216 }, { "epoch": 0.5026714548480337, "grad_norm": 0.5459205850519371, "learning_rate": 2.7630170316301706e-05, "loss": 0.6744, "step": 17217 }, { "epoch": 0.502700651075881, "grad_norm": 0.5869376430272781, "learning_rate": 2.7628548256285484e-05, "loss": 0.6325, "step": 17218 }, { "epoch": 0.5027298473037284, "grad_norm": 0.5518571098819096, "learning_rate": 2.7626926196269266e-05, "loss": 0.6432, "step": 17219 }, { "epoch": 0.5027590435315757, "grad_norm": 0.7547996377045849, "learning_rate": 2.7625304136253045e-05, "loss": 0.64, "step": 17220 }, { "epoch": 0.5027882397594231, "grad_norm": 0.49876767889952806, "learning_rate": 2.7623682076236823e-05, "loss": 0.5191, "step": 17221 }, { "epoch": 0.5028174359872705, "grad_norm": 0.5549557459528071, "learning_rate": 2.76220600162206e-05, "loss": 0.6436, "step": 17222 }, { "epoch": 0.5028466322151178, "grad_norm": 0.5414801436184796, "learning_rate": 2.7620437956204383e-05, "loss": 0.6647, "step": 17223 }, { "epoch": 0.5028758284429652, "grad_norm": 0.530379716470093, "learning_rate": 2.761881589618816e-05, "loss": 0.6342, "step": 17224 }, { "epoch": 0.5029050246708126, "grad_norm": 0.5387071121145444, "learning_rate": 2.761719383617194e-05, "loss": 0.6178, "step": 17225 }, { "epoch": 0.5029342208986599, "grad_norm": 0.4976142309848625, "learning_rate": 2.7615571776155718e-05, "loss": 0.5785, "step": 17226 }, { "epoch": 0.5029634171265073, "grad_norm": 0.5325586040243471, "learning_rate": 2.7613949716139497e-05, "loss": 0.6503, "step": 17227 }, { "epoch": 0.5029926133543546, "grad_norm": 0.5500290328263266, "learning_rate": 2.7612327656123278e-05, "loss": 0.6784, "step": 17228 }, { "epoch": 0.503021809582202, "grad_norm": 0.5424012714092419, "learning_rate": 2.7610705596107057e-05, "loss": 0.6742, "step": 17229 }, { "epoch": 0.5030510058100494, "grad_norm": 0.5024950831156819, "learning_rate": 2.7609083536090835e-05, "loss": 0.5808, "step": 17230 }, { "epoch": 0.5030802020378967, "grad_norm": 0.5218227373094118, "learning_rate": 2.7607461476074613e-05, "loss": 0.64, "step": 17231 }, { "epoch": 0.5031093982657441, "grad_norm": 0.5267813543153167, "learning_rate": 2.7605839416058392e-05, "loss": 0.5883, "step": 17232 }, { "epoch": 0.5031385944935914, "grad_norm": 0.5422103240330656, "learning_rate": 2.7604217356042174e-05, "loss": 0.6425, "step": 17233 }, { "epoch": 0.5031677907214388, "grad_norm": 0.5335276394283127, "learning_rate": 2.7602595296025952e-05, "loss": 0.6354, "step": 17234 }, { "epoch": 0.5031969869492862, "grad_norm": 0.4969378243489914, "learning_rate": 2.7600973236009737e-05, "loss": 0.5714, "step": 17235 }, { "epoch": 0.5032261831771335, "grad_norm": 0.5354829285593045, "learning_rate": 2.7599351175993515e-05, "loss": 0.6409, "step": 17236 }, { "epoch": 0.5032553794049809, "grad_norm": 0.4788862955313094, "learning_rate": 2.7597729115977294e-05, "loss": 0.5362, "step": 17237 }, { "epoch": 0.5032845756328282, "grad_norm": 0.531658633221626, "learning_rate": 2.7596107055961072e-05, "loss": 0.6319, "step": 17238 }, { "epoch": 0.5033137718606756, "grad_norm": 0.5337374771863002, "learning_rate": 2.7594484995944854e-05, "loss": 0.5932, "step": 17239 }, { "epoch": 0.503342968088523, "grad_norm": 0.5137185110914864, "learning_rate": 2.7592862935928632e-05, "loss": 0.5769, "step": 17240 }, { "epoch": 0.5033721643163703, "grad_norm": 0.5486309309394204, "learning_rate": 2.759124087591241e-05, "loss": 0.6602, "step": 17241 }, { "epoch": 0.5034013605442177, "grad_norm": 0.4939521206430206, "learning_rate": 2.758961881589619e-05, "loss": 0.564, "step": 17242 }, { "epoch": 0.503430556772065, "grad_norm": 0.515381800476641, "learning_rate": 2.7587996755879967e-05, "loss": 0.6014, "step": 17243 }, { "epoch": 0.5034597529999124, "grad_norm": 0.5447649881932259, "learning_rate": 2.758637469586375e-05, "loss": 0.69, "step": 17244 }, { "epoch": 0.5034889492277598, "grad_norm": 0.5286980144695509, "learning_rate": 2.7584752635847528e-05, "loss": 0.6308, "step": 17245 }, { "epoch": 0.5035181454556071, "grad_norm": 0.48725213636501963, "learning_rate": 2.7583130575831306e-05, "loss": 0.5357, "step": 17246 }, { "epoch": 0.5035473416834545, "grad_norm": 0.6438588894344673, "learning_rate": 2.7581508515815084e-05, "loss": 0.6856, "step": 17247 }, { "epoch": 0.5035765379113019, "grad_norm": 0.5346755410576485, "learning_rate": 2.7579886455798866e-05, "loss": 0.6178, "step": 17248 }, { "epoch": 0.5036057341391492, "grad_norm": 0.5699964709078053, "learning_rate": 2.7578264395782644e-05, "loss": 0.7174, "step": 17249 }, { "epoch": 0.5036349303669966, "grad_norm": 0.49770191175474826, "learning_rate": 2.7576642335766423e-05, "loss": 0.5726, "step": 17250 }, { "epoch": 0.5036641265948439, "grad_norm": 0.5277001671283587, "learning_rate": 2.75750202757502e-05, "loss": 0.5977, "step": 17251 }, { "epoch": 0.5036933228226913, "grad_norm": 0.5221796184642149, "learning_rate": 2.757339821573398e-05, "loss": 0.5835, "step": 17252 }, { "epoch": 0.5037225190505387, "grad_norm": 0.5115790168743101, "learning_rate": 2.7571776155717765e-05, "loss": 0.5975, "step": 17253 }, { "epoch": 0.503751715278386, "grad_norm": 0.5030936826653937, "learning_rate": 2.7570154095701543e-05, "loss": 0.5915, "step": 17254 }, { "epoch": 0.5037809115062334, "grad_norm": 0.5298638851794868, "learning_rate": 2.7568532035685325e-05, "loss": 0.6576, "step": 17255 }, { "epoch": 0.5038101077340807, "grad_norm": 0.5272748226406683, "learning_rate": 2.7566909975669103e-05, "loss": 0.5956, "step": 17256 }, { "epoch": 0.5038393039619281, "grad_norm": 0.5689334838676958, "learning_rate": 2.756528791565288e-05, "loss": 0.679, "step": 17257 }, { "epoch": 0.5038685001897755, "grad_norm": 0.5063172983734583, "learning_rate": 2.756366585563666e-05, "loss": 0.5973, "step": 17258 }, { "epoch": 0.5038976964176228, "grad_norm": 0.5068455252900596, "learning_rate": 2.7562043795620442e-05, "loss": 0.5647, "step": 17259 }, { "epoch": 0.5039268926454702, "grad_norm": 0.5266076420342398, "learning_rate": 2.756042173560422e-05, "loss": 0.5879, "step": 17260 }, { "epoch": 0.5039560888733176, "grad_norm": 0.5956929548073763, "learning_rate": 2.7558799675588e-05, "loss": 0.6903, "step": 17261 }, { "epoch": 0.5039852851011649, "grad_norm": 0.5228585175035031, "learning_rate": 2.7557177615571777e-05, "loss": 0.6181, "step": 17262 }, { "epoch": 0.5040144813290123, "grad_norm": 0.496245557735301, "learning_rate": 2.7555555555555555e-05, "loss": 0.5636, "step": 17263 }, { "epoch": 0.5040436775568596, "grad_norm": 0.5599067535062816, "learning_rate": 2.7553933495539337e-05, "loss": 0.6517, "step": 17264 }, { "epoch": 0.504072873784707, "grad_norm": 0.5411111793335264, "learning_rate": 2.7552311435523115e-05, "loss": 0.6293, "step": 17265 }, { "epoch": 0.5041020700125544, "grad_norm": 0.511307676966337, "learning_rate": 2.7550689375506894e-05, "loss": 0.6203, "step": 17266 }, { "epoch": 0.5041312662404017, "grad_norm": 0.5298106737353889, "learning_rate": 2.7549067315490672e-05, "loss": 0.6294, "step": 17267 }, { "epoch": 0.5041604624682491, "grad_norm": 0.5070052646661096, "learning_rate": 2.7547445255474454e-05, "loss": 0.5509, "step": 17268 }, { "epoch": 0.5041896586960964, "grad_norm": 0.5032920254969665, "learning_rate": 2.7545823195458232e-05, "loss": 0.5598, "step": 17269 }, { "epoch": 0.5042188549239438, "grad_norm": 0.5686600883643642, "learning_rate": 2.754420113544201e-05, "loss": 0.6878, "step": 17270 }, { "epoch": 0.5042480511517912, "grad_norm": 0.5150072645752267, "learning_rate": 2.754257907542579e-05, "loss": 0.571, "step": 17271 }, { "epoch": 0.5042772473796385, "grad_norm": 0.5485825787242871, "learning_rate": 2.7540957015409574e-05, "loss": 0.6786, "step": 17272 }, { "epoch": 0.5043064436074859, "grad_norm": 0.5199542983759069, "learning_rate": 2.7539334955393353e-05, "loss": 0.5796, "step": 17273 }, { "epoch": 0.5043356398353332, "grad_norm": 0.5199125056911118, "learning_rate": 2.753771289537713e-05, "loss": 0.6428, "step": 17274 }, { "epoch": 0.5043648360631806, "grad_norm": 0.5372894540544941, "learning_rate": 2.7536090835360913e-05, "loss": 0.6327, "step": 17275 }, { "epoch": 0.504394032291028, "grad_norm": 0.5502705945950501, "learning_rate": 2.753446877534469e-05, "loss": 0.7045, "step": 17276 }, { "epoch": 0.5044232285188753, "grad_norm": 0.5056744136646539, "learning_rate": 2.753284671532847e-05, "loss": 0.6074, "step": 17277 }, { "epoch": 0.5044524247467227, "grad_norm": 0.5381160511190701, "learning_rate": 2.7531224655312248e-05, "loss": 0.6496, "step": 17278 }, { "epoch": 0.50448162097457, "grad_norm": 0.5472849401957917, "learning_rate": 2.752960259529603e-05, "loss": 0.6488, "step": 17279 }, { "epoch": 0.5045108172024174, "grad_norm": 0.5134824117573654, "learning_rate": 2.7527980535279808e-05, "loss": 0.652, "step": 17280 }, { "epoch": 0.5045400134302648, "grad_norm": 0.5242930549881927, "learning_rate": 2.7526358475263586e-05, "loss": 0.5878, "step": 17281 }, { "epoch": 0.5045692096581121, "grad_norm": 0.4983219434582157, "learning_rate": 2.7524736415247365e-05, "loss": 0.5783, "step": 17282 }, { "epoch": 0.5045984058859595, "grad_norm": 0.5332774551528495, "learning_rate": 2.7523114355231143e-05, "loss": 0.6089, "step": 17283 }, { "epoch": 0.5046276021138069, "grad_norm": 0.5158484539170918, "learning_rate": 2.7521492295214925e-05, "loss": 0.6424, "step": 17284 }, { "epoch": 0.5046567983416542, "grad_norm": 0.5089652027433834, "learning_rate": 2.7519870235198703e-05, "loss": 0.6051, "step": 17285 }, { "epoch": 0.5046859945695016, "grad_norm": 0.5087354660924832, "learning_rate": 2.751824817518248e-05, "loss": 0.5847, "step": 17286 }, { "epoch": 0.504715190797349, "grad_norm": 0.5600231540776963, "learning_rate": 2.751662611516626e-05, "loss": 0.6712, "step": 17287 }, { "epoch": 0.5047443870251963, "grad_norm": 0.5190751899279804, "learning_rate": 2.751500405515004e-05, "loss": 0.5813, "step": 17288 }, { "epoch": 0.5047735832530437, "grad_norm": 0.5521710166614253, "learning_rate": 2.751338199513382e-05, "loss": 0.6293, "step": 17289 }, { "epoch": 0.504802779480891, "grad_norm": 0.48393622036528106, "learning_rate": 2.75117599351176e-05, "loss": 0.5471, "step": 17290 }, { "epoch": 0.5048319757087384, "grad_norm": 0.47681239607978687, "learning_rate": 2.7510137875101384e-05, "loss": 0.5609, "step": 17291 }, { "epoch": 0.5048611719365858, "grad_norm": 0.5582922591493549, "learning_rate": 2.7508515815085162e-05, "loss": 0.6714, "step": 17292 }, { "epoch": 0.5048903681644331, "grad_norm": 0.5357104683067438, "learning_rate": 2.750689375506894e-05, "loss": 0.6215, "step": 17293 }, { "epoch": 0.5049195643922805, "grad_norm": 0.5169617334282518, "learning_rate": 2.750527169505272e-05, "loss": 0.5751, "step": 17294 }, { "epoch": 0.5049487606201278, "grad_norm": 0.496655348321992, "learning_rate": 2.75036496350365e-05, "loss": 0.5332, "step": 17295 }, { "epoch": 0.5049779568479752, "grad_norm": 0.552590306248368, "learning_rate": 2.750202757502028e-05, "loss": 0.7545, "step": 17296 }, { "epoch": 0.5050071530758226, "grad_norm": 0.5529078752829809, "learning_rate": 2.7500405515004057e-05, "loss": 0.6688, "step": 17297 }, { "epoch": 0.5050363493036699, "grad_norm": 0.48947471647720914, "learning_rate": 2.7498783454987836e-05, "loss": 0.5544, "step": 17298 }, { "epoch": 0.5050655455315173, "grad_norm": 0.47967061935637395, "learning_rate": 2.7497161394971614e-05, "loss": 0.5389, "step": 17299 }, { "epoch": 0.5050947417593646, "grad_norm": 0.4952129197962446, "learning_rate": 2.7495539334955396e-05, "loss": 0.5117, "step": 17300 }, { "epoch": 0.505123937987212, "grad_norm": 0.5023282109726083, "learning_rate": 2.7493917274939174e-05, "loss": 0.6038, "step": 17301 }, { "epoch": 0.5051531342150594, "grad_norm": 0.5397697196657785, "learning_rate": 2.7492295214922952e-05, "loss": 0.6261, "step": 17302 }, { "epoch": 0.5051823304429067, "grad_norm": 0.4925632717464796, "learning_rate": 2.749067315490673e-05, "loss": 0.5566, "step": 17303 }, { "epoch": 0.5052115266707541, "grad_norm": 0.5249675579030116, "learning_rate": 2.7489051094890513e-05, "loss": 0.6379, "step": 17304 }, { "epoch": 0.5052407228986014, "grad_norm": 0.5465040900571058, "learning_rate": 2.748742903487429e-05, "loss": 0.7125, "step": 17305 }, { "epoch": 0.5052699191264488, "grad_norm": 0.5308180993871758, "learning_rate": 2.748580697485807e-05, "loss": 0.6735, "step": 17306 }, { "epoch": 0.5052991153542963, "grad_norm": 0.5121172207285907, "learning_rate": 2.7484184914841848e-05, "loss": 0.6098, "step": 17307 }, { "epoch": 0.5053283115821436, "grad_norm": 0.5113078663848217, "learning_rate": 2.7482562854825626e-05, "loss": 0.563, "step": 17308 }, { "epoch": 0.505357507809991, "grad_norm": 0.5430455491181273, "learning_rate": 2.7480940794809408e-05, "loss": 0.6429, "step": 17309 }, { "epoch": 0.5053867040378384, "grad_norm": 0.5556961225687466, "learning_rate": 2.747931873479319e-05, "loss": 0.6591, "step": 17310 }, { "epoch": 0.5054159002656857, "grad_norm": 0.5466724068167004, "learning_rate": 2.747769667477697e-05, "loss": 0.6727, "step": 17311 }, { "epoch": 0.5054450964935331, "grad_norm": 0.48375830072496123, "learning_rate": 2.747607461476075e-05, "loss": 0.5283, "step": 17312 }, { "epoch": 0.5054742927213804, "grad_norm": 0.5009767479809752, "learning_rate": 2.7474452554744528e-05, "loss": 0.5721, "step": 17313 }, { "epoch": 0.5055034889492278, "grad_norm": 0.5103589808452521, "learning_rate": 2.7472830494728307e-05, "loss": 0.6022, "step": 17314 }, { "epoch": 0.5055326851770752, "grad_norm": 0.5193004131314859, "learning_rate": 2.7471208434712088e-05, "loss": 0.616, "step": 17315 }, { "epoch": 0.5055618814049225, "grad_norm": 0.5552595874410926, "learning_rate": 2.7469586374695867e-05, "loss": 0.675, "step": 17316 }, { "epoch": 0.5055910776327699, "grad_norm": 0.5416540597506111, "learning_rate": 2.7467964314679645e-05, "loss": 0.6299, "step": 17317 }, { "epoch": 0.5056202738606173, "grad_norm": 0.49930833282768267, "learning_rate": 2.7466342254663423e-05, "loss": 0.5712, "step": 17318 }, { "epoch": 0.5056494700884646, "grad_norm": 0.5191591002833483, "learning_rate": 2.7464720194647202e-05, "loss": 0.614, "step": 17319 }, { "epoch": 0.505678666316312, "grad_norm": 0.6141762524589278, "learning_rate": 2.7463098134630984e-05, "loss": 0.6629, "step": 17320 }, { "epoch": 0.5057078625441593, "grad_norm": 0.5437810797548902, "learning_rate": 2.7461476074614762e-05, "loss": 0.6503, "step": 17321 }, { "epoch": 0.5057370587720067, "grad_norm": 0.5424868410537979, "learning_rate": 2.745985401459854e-05, "loss": 0.6144, "step": 17322 }, { "epoch": 0.5057662549998541, "grad_norm": 0.4965031086117664, "learning_rate": 2.745823195458232e-05, "loss": 0.5616, "step": 17323 }, { "epoch": 0.5057954512277014, "grad_norm": 0.48877693915248444, "learning_rate": 2.74566098945661e-05, "loss": 0.5774, "step": 17324 }, { "epoch": 0.5058246474555488, "grad_norm": 0.526529434187753, "learning_rate": 2.745498783454988e-05, "loss": 0.604, "step": 17325 }, { "epoch": 0.5058538436833961, "grad_norm": 0.5163151667343681, "learning_rate": 2.7453365774533657e-05, "loss": 0.6242, "step": 17326 }, { "epoch": 0.5058830399112435, "grad_norm": 0.572779470282681, "learning_rate": 2.7451743714517436e-05, "loss": 0.6526, "step": 17327 }, { "epoch": 0.5059122361390909, "grad_norm": 0.555172043616022, "learning_rate": 2.7450121654501214e-05, "loss": 0.6664, "step": 17328 }, { "epoch": 0.5059414323669382, "grad_norm": 0.5286981639671177, "learning_rate": 2.7448499594485e-05, "loss": 0.6699, "step": 17329 }, { "epoch": 0.5059706285947856, "grad_norm": 0.531353285671383, "learning_rate": 2.7446877534468777e-05, "loss": 0.6403, "step": 17330 }, { "epoch": 0.505999824822633, "grad_norm": 0.5147082136231189, "learning_rate": 2.744525547445256e-05, "loss": 0.5827, "step": 17331 }, { "epoch": 0.5060290210504803, "grad_norm": 0.5080054679372515, "learning_rate": 2.7443633414436338e-05, "loss": 0.6051, "step": 17332 }, { "epoch": 0.5060582172783277, "grad_norm": 0.5025860870230221, "learning_rate": 2.7442011354420116e-05, "loss": 0.6037, "step": 17333 }, { "epoch": 0.506087413506175, "grad_norm": 0.5731785077113898, "learning_rate": 2.7440389294403894e-05, "loss": 0.6583, "step": 17334 }, { "epoch": 0.5061166097340224, "grad_norm": 0.524863819269714, "learning_rate": 2.7438767234387676e-05, "loss": 0.5773, "step": 17335 }, { "epoch": 0.5061458059618698, "grad_norm": 0.5478932614253611, "learning_rate": 2.7437145174371454e-05, "loss": 0.6848, "step": 17336 }, { "epoch": 0.5061750021897171, "grad_norm": 0.5078545288596809, "learning_rate": 2.7435523114355233e-05, "loss": 0.5922, "step": 17337 }, { "epoch": 0.5062041984175645, "grad_norm": 0.5393067489627841, "learning_rate": 2.743390105433901e-05, "loss": 0.6772, "step": 17338 }, { "epoch": 0.5062333946454118, "grad_norm": 0.4867661178794226, "learning_rate": 2.743227899432279e-05, "loss": 0.5672, "step": 17339 }, { "epoch": 0.5062625908732592, "grad_norm": 0.5016309629485195, "learning_rate": 2.743065693430657e-05, "loss": 0.5792, "step": 17340 }, { "epoch": 0.5062917871011066, "grad_norm": 0.5550392271247678, "learning_rate": 2.742903487429035e-05, "loss": 0.6224, "step": 17341 }, { "epoch": 0.5063209833289539, "grad_norm": 0.5591124014317762, "learning_rate": 2.7427412814274128e-05, "loss": 0.6637, "step": 17342 }, { "epoch": 0.5063501795568013, "grad_norm": 0.48787128628823195, "learning_rate": 2.7425790754257906e-05, "loss": 0.5578, "step": 17343 }, { "epoch": 0.5063793757846486, "grad_norm": 0.5015861373351287, "learning_rate": 2.7424168694241685e-05, "loss": 0.5533, "step": 17344 }, { "epoch": 0.506408572012496, "grad_norm": 0.4808654941830447, "learning_rate": 2.7422546634225467e-05, "loss": 0.5234, "step": 17345 }, { "epoch": 0.5064377682403434, "grad_norm": 0.5025737249264701, "learning_rate": 2.7420924574209245e-05, "loss": 0.5926, "step": 17346 }, { "epoch": 0.5064669644681907, "grad_norm": 0.5041353815070021, "learning_rate": 2.7419302514193023e-05, "loss": 0.5486, "step": 17347 }, { "epoch": 0.5064961606960381, "grad_norm": 0.5275737462307301, "learning_rate": 2.741768045417681e-05, "loss": 0.6273, "step": 17348 }, { "epoch": 0.5065253569238855, "grad_norm": 0.5603030821394219, "learning_rate": 2.7416058394160587e-05, "loss": 0.6671, "step": 17349 }, { "epoch": 0.5065545531517328, "grad_norm": 0.5049509979983935, "learning_rate": 2.7414436334144365e-05, "loss": 0.5839, "step": 17350 }, { "epoch": 0.5065837493795802, "grad_norm": 0.5515684591550865, "learning_rate": 2.7412814274128147e-05, "loss": 0.6742, "step": 17351 }, { "epoch": 0.5066129456074275, "grad_norm": 0.5398744220663276, "learning_rate": 2.7411192214111925e-05, "loss": 0.6192, "step": 17352 }, { "epoch": 0.5066421418352749, "grad_norm": 0.5048829148813532, "learning_rate": 2.7409570154095704e-05, "loss": 0.5173, "step": 17353 }, { "epoch": 0.5066713380631223, "grad_norm": 0.5489493398565741, "learning_rate": 2.7407948094079482e-05, "loss": 0.6668, "step": 17354 }, { "epoch": 0.5067005342909696, "grad_norm": 0.571995128104873, "learning_rate": 2.740632603406326e-05, "loss": 0.666, "step": 17355 }, { "epoch": 0.506729730518817, "grad_norm": 0.532022991703262, "learning_rate": 2.7404703974047042e-05, "loss": 0.6269, "step": 17356 }, { "epoch": 0.5067589267466643, "grad_norm": 0.5788431942598312, "learning_rate": 2.740308191403082e-05, "loss": 0.5443, "step": 17357 }, { "epoch": 0.5067881229745117, "grad_norm": 0.5324430649871121, "learning_rate": 2.74014598540146e-05, "loss": 0.6654, "step": 17358 }, { "epoch": 0.5068173192023591, "grad_norm": 0.576233639954909, "learning_rate": 2.7399837793998377e-05, "loss": 0.6901, "step": 17359 }, { "epoch": 0.5068465154302064, "grad_norm": 0.48717583889807997, "learning_rate": 2.739821573398216e-05, "loss": 0.5255, "step": 17360 }, { "epoch": 0.5068757116580538, "grad_norm": 0.5375256351313514, "learning_rate": 2.7396593673965938e-05, "loss": 0.6764, "step": 17361 }, { "epoch": 0.5069049078859011, "grad_norm": 0.5491841785171253, "learning_rate": 2.7394971613949716e-05, "loss": 0.6562, "step": 17362 }, { "epoch": 0.5069341041137485, "grad_norm": 0.5550288743666076, "learning_rate": 2.7393349553933494e-05, "loss": 0.6447, "step": 17363 }, { "epoch": 0.5069633003415959, "grad_norm": 0.5092720838068453, "learning_rate": 2.7391727493917273e-05, "loss": 0.5778, "step": 17364 }, { "epoch": 0.5069924965694432, "grad_norm": 0.5236144765418422, "learning_rate": 2.7390105433901054e-05, "loss": 0.6227, "step": 17365 }, { "epoch": 0.5070216927972906, "grad_norm": 0.5295628514821061, "learning_rate": 2.7388483373884833e-05, "loss": 0.641, "step": 17366 }, { "epoch": 0.507050889025138, "grad_norm": 0.5638287540751403, "learning_rate": 2.7386861313868618e-05, "loss": 0.6813, "step": 17367 }, { "epoch": 0.5070800852529853, "grad_norm": 0.5426823998610115, "learning_rate": 2.7385239253852396e-05, "loss": 0.6267, "step": 17368 }, { "epoch": 0.5071092814808327, "grad_norm": 0.5068044251451475, "learning_rate": 2.7383617193836175e-05, "loss": 0.5867, "step": 17369 }, { "epoch": 0.50713847770868, "grad_norm": 0.49540969326839823, "learning_rate": 2.7381995133819953e-05, "loss": 0.5845, "step": 17370 }, { "epoch": 0.5071676739365274, "grad_norm": 0.5165196441202933, "learning_rate": 2.7380373073803735e-05, "loss": 0.5992, "step": 17371 }, { "epoch": 0.5071968701643748, "grad_norm": 0.5414749248012882, "learning_rate": 2.7378751013787513e-05, "loss": 0.6533, "step": 17372 }, { "epoch": 0.5072260663922221, "grad_norm": 0.5745160422319338, "learning_rate": 2.737712895377129e-05, "loss": 0.665, "step": 17373 }, { "epoch": 0.5072552626200695, "grad_norm": 0.5256537002133073, "learning_rate": 2.737550689375507e-05, "loss": 0.6007, "step": 17374 }, { "epoch": 0.5072844588479168, "grad_norm": 0.5368799174884935, "learning_rate": 2.737388483373885e-05, "loss": 0.6484, "step": 17375 }, { "epoch": 0.5073136550757642, "grad_norm": 0.5029201891642033, "learning_rate": 2.737226277372263e-05, "loss": 0.5975, "step": 17376 }, { "epoch": 0.5073428513036116, "grad_norm": 0.4909052164701176, "learning_rate": 2.737064071370641e-05, "loss": 0.599, "step": 17377 }, { "epoch": 0.5073720475314589, "grad_norm": 0.5480402755802462, "learning_rate": 2.7369018653690187e-05, "loss": 0.6801, "step": 17378 }, { "epoch": 0.5074012437593063, "grad_norm": 0.5148856709507557, "learning_rate": 2.7367396593673965e-05, "loss": 0.5947, "step": 17379 }, { "epoch": 0.5074304399871536, "grad_norm": 0.5247838315454979, "learning_rate": 2.7365774533657747e-05, "loss": 0.5974, "step": 17380 }, { "epoch": 0.507459636215001, "grad_norm": 0.5855711880256964, "learning_rate": 2.7364152473641525e-05, "loss": 0.679, "step": 17381 }, { "epoch": 0.5074888324428484, "grad_norm": 0.5052413314041395, "learning_rate": 2.7362530413625304e-05, "loss": 0.6009, "step": 17382 }, { "epoch": 0.5075180286706957, "grad_norm": 0.48275139892467084, "learning_rate": 2.7360908353609082e-05, "loss": 0.5561, "step": 17383 }, { "epoch": 0.5075472248985431, "grad_norm": 0.4879798573209222, "learning_rate": 2.735928629359286e-05, "loss": 0.5488, "step": 17384 }, { "epoch": 0.5075764211263905, "grad_norm": 0.5151040522090745, "learning_rate": 2.7357664233576642e-05, "loss": 0.6379, "step": 17385 }, { "epoch": 0.5076056173542378, "grad_norm": 0.5043487698706817, "learning_rate": 2.7356042173560424e-05, "loss": 0.5743, "step": 17386 }, { "epoch": 0.5076348135820852, "grad_norm": 0.5318079440790293, "learning_rate": 2.7354420113544206e-05, "loss": 0.5703, "step": 17387 }, { "epoch": 0.5076640098099325, "grad_norm": 0.5221909412938065, "learning_rate": 2.7352798053527984e-05, "loss": 0.6284, "step": 17388 }, { "epoch": 0.5076932060377799, "grad_norm": 0.5482069014112578, "learning_rate": 2.7351175993511763e-05, "loss": 0.6138, "step": 17389 }, { "epoch": 0.5077224022656273, "grad_norm": 0.5531272609423978, "learning_rate": 2.734955393349554e-05, "loss": 0.616, "step": 17390 }, { "epoch": 0.5077515984934746, "grad_norm": 0.5036065334590778, "learning_rate": 2.7347931873479323e-05, "loss": 0.5864, "step": 17391 }, { "epoch": 0.507780794721322, "grad_norm": 0.5206603418212867, "learning_rate": 2.73463098134631e-05, "loss": 0.5697, "step": 17392 }, { "epoch": 0.5078099909491693, "grad_norm": 0.5421750712301908, "learning_rate": 2.734468775344688e-05, "loss": 0.6746, "step": 17393 }, { "epoch": 0.5078391871770167, "grad_norm": 0.5187223497263972, "learning_rate": 2.7343065693430658e-05, "loss": 0.5803, "step": 17394 }, { "epoch": 0.5078683834048641, "grad_norm": 0.498480353788795, "learning_rate": 2.7341443633414436e-05, "loss": 0.5846, "step": 17395 }, { "epoch": 0.5078975796327114, "grad_norm": 0.535021118789384, "learning_rate": 2.7339821573398218e-05, "loss": 0.6399, "step": 17396 }, { "epoch": 0.5079267758605588, "grad_norm": 0.554659945676189, "learning_rate": 2.7338199513381996e-05, "loss": 0.7114, "step": 17397 }, { "epoch": 0.5079559720884061, "grad_norm": 0.4775263598119779, "learning_rate": 2.7336577453365775e-05, "loss": 0.5562, "step": 17398 }, { "epoch": 0.5079851683162535, "grad_norm": 0.507573590886772, "learning_rate": 2.7334955393349553e-05, "loss": 0.5872, "step": 17399 }, { "epoch": 0.5080143645441009, "grad_norm": 0.49708353298280455, "learning_rate": 2.733333333333333e-05, "loss": 0.5472, "step": 17400 }, { "epoch": 0.5080435607719482, "grad_norm": 0.47993146117564084, "learning_rate": 2.7331711273317113e-05, "loss": 0.5371, "step": 17401 }, { "epoch": 0.5080727569997956, "grad_norm": 0.5167710309420366, "learning_rate": 2.733008921330089e-05, "loss": 0.6398, "step": 17402 }, { "epoch": 0.508101953227643, "grad_norm": 0.5425567558995502, "learning_rate": 2.732846715328467e-05, "loss": 0.6521, "step": 17403 }, { "epoch": 0.5081311494554903, "grad_norm": 0.5137931946269235, "learning_rate": 2.7326845093268455e-05, "loss": 0.6267, "step": 17404 }, { "epoch": 0.5081603456833377, "grad_norm": 0.5614392400693272, "learning_rate": 2.7325223033252233e-05, "loss": 0.6496, "step": 17405 }, { "epoch": 0.508189541911185, "grad_norm": 0.5058204793105108, "learning_rate": 2.7323600973236012e-05, "loss": 0.6093, "step": 17406 }, { "epoch": 0.5082187381390324, "grad_norm": 0.4813430753578251, "learning_rate": 2.7321978913219794e-05, "loss": 0.5136, "step": 17407 }, { "epoch": 0.5082479343668798, "grad_norm": 0.5296569744935002, "learning_rate": 2.7320356853203572e-05, "loss": 0.6077, "step": 17408 }, { "epoch": 0.5082771305947271, "grad_norm": 0.5386896061680994, "learning_rate": 2.731873479318735e-05, "loss": 0.6065, "step": 17409 }, { "epoch": 0.5083063268225745, "grad_norm": 0.6173649610246202, "learning_rate": 2.731711273317113e-05, "loss": 0.7216, "step": 17410 }, { "epoch": 0.5083355230504218, "grad_norm": 0.6204005962462813, "learning_rate": 2.7315490673154907e-05, "loss": 0.6466, "step": 17411 }, { "epoch": 0.5083647192782692, "grad_norm": 0.6975279585134525, "learning_rate": 2.731386861313869e-05, "loss": 0.7458, "step": 17412 }, { "epoch": 0.5083939155061166, "grad_norm": 0.5396826433183015, "learning_rate": 2.7312246553122467e-05, "loss": 0.7085, "step": 17413 }, { "epoch": 0.5084231117339639, "grad_norm": 0.5000827297953596, "learning_rate": 2.7310624493106246e-05, "loss": 0.572, "step": 17414 }, { "epoch": 0.5084523079618113, "grad_norm": 0.5095959286289384, "learning_rate": 2.7309002433090024e-05, "loss": 0.5969, "step": 17415 }, { "epoch": 0.5084815041896587, "grad_norm": 0.5241951003655957, "learning_rate": 2.7307380373073806e-05, "loss": 0.6198, "step": 17416 }, { "epoch": 0.508510700417506, "grad_norm": 0.5167327252903049, "learning_rate": 2.7305758313057584e-05, "loss": 0.5989, "step": 17417 }, { "epoch": 0.5085398966453534, "grad_norm": 0.5297702123610435, "learning_rate": 2.7304136253041362e-05, "loss": 0.5732, "step": 17418 }, { "epoch": 0.5085690928732007, "grad_norm": 0.6028853673113054, "learning_rate": 2.730251419302514e-05, "loss": 0.6533, "step": 17419 }, { "epoch": 0.5085982891010481, "grad_norm": 0.5091237146047987, "learning_rate": 2.730089213300892e-05, "loss": 0.5778, "step": 17420 }, { "epoch": 0.5086274853288955, "grad_norm": 0.5575432717851323, "learning_rate": 2.72992700729927e-05, "loss": 0.657, "step": 17421 }, { "epoch": 0.5086566815567428, "grad_norm": 0.536768241632614, "learning_rate": 2.729764801297648e-05, "loss": 0.6726, "step": 17422 }, { "epoch": 0.5086858777845902, "grad_norm": 0.5059773279082533, "learning_rate": 2.7296025952960264e-05, "loss": 0.5891, "step": 17423 }, { "epoch": 0.5087150740124375, "grad_norm": 0.49698006052202337, "learning_rate": 2.7294403892944043e-05, "loss": 0.5699, "step": 17424 }, { "epoch": 0.5087442702402849, "grad_norm": 0.5535090704882951, "learning_rate": 2.729278183292782e-05, "loss": 0.6788, "step": 17425 }, { "epoch": 0.5087734664681323, "grad_norm": 0.5242735920348899, "learning_rate": 2.72911597729116e-05, "loss": 0.5528, "step": 17426 }, { "epoch": 0.5088026626959796, "grad_norm": 0.5404598927627072, "learning_rate": 2.728953771289538e-05, "loss": 0.5964, "step": 17427 }, { "epoch": 0.5088318589238271, "grad_norm": 0.5477801104948424, "learning_rate": 2.728791565287916e-05, "loss": 0.6083, "step": 17428 }, { "epoch": 0.5088610551516745, "grad_norm": 0.536151085650438, "learning_rate": 2.7286293592862938e-05, "loss": 0.6296, "step": 17429 }, { "epoch": 0.5088902513795218, "grad_norm": 0.5185059062516122, "learning_rate": 2.7284671532846716e-05, "loss": 0.5996, "step": 17430 }, { "epoch": 0.5089194476073692, "grad_norm": 0.5365466609275231, "learning_rate": 2.7283049472830495e-05, "loss": 0.6292, "step": 17431 }, { "epoch": 0.5089486438352165, "grad_norm": 0.5520599198775012, "learning_rate": 2.7281427412814277e-05, "loss": 0.66, "step": 17432 }, { "epoch": 0.5089778400630639, "grad_norm": 0.5614318871605374, "learning_rate": 2.7279805352798055e-05, "loss": 0.6814, "step": 17433 }, { "epoch": 0.5090070362909113, "grad_norm": 0.5225924776767671, "learning_rate": 2.7278183292781833e-05, "loss": 0.6268, "step": 17434 }, { "epoch": 0.5090362325187586, "grad_norm": 0.5078607468707631, "learning_rate": 2.7276561232765612e-05, "loss": 0.6002, "step": 17435 }, { "epoch": 0.509065428746606, "grad_norm": 0.5220134413944579, "learning_rate": 2.7274939172749394e-05, "loss": 0.6098, "step": 17436 }, { "epoch": 0.5090946249744533, "grad_norm": 0.5746652915949916, "learning_rate": 2.7273317112733172e-05, "loss": 0.6943, "step": 17437 }, { "epoch": 0.5091238212023007, "grad_norm": 0.522826449067026, "learning_rate": 2.727169505271695e-05, "loss": 0.6161, "step": 17438 }, { "epoch": 0.5091530174301481, "grad_norm": 0.5401046171774475, "learning_rate": 2.727007299270073e-05, "loss": 0.6891, "step": 17439 }, { "epoch": 0.5091822136579954, "grad_norm": 0.5773009474294156, "learning_rate": 2.7268450932684507e-05, "loss": 0.558, "step": 17440 }, { "epoch": 0.5092114098858428, "grad_norm": 0.49874541264288785, "learning_rate": 2.726682887266829e-05, "loss": 0.5366, "step": 17441 }, { "epoch": 0.5092406061136902, "grad_norm": 0.553586372072774, "learning_rate": 2.726520681265207e-05, "loss": 0.6532, "step": 17442 }, { "epoch": 0.5092698023415375, "grad_norm": 0.5193324194681151, "learning_rate": 2.7263584752635852e-05, "loss": 0.6128, "step": 17443 }, { "epoch": 0.5092989985693849, "grad_norm": 0.5205535922232398, "learning_rate": 2.726196269261963e-05, "loss": 0.5774, "step": 17444 }, { "epoch": 0.5093281947972322, "grad_norm": 0.5198128362758115, "learning_rate": 2.726034063260341e-05, "loss": 0.6129, "step": 17445 }, { "epoch": 0.5093573910250796, "grad_norm": 0.5514347378583627, "learning_rate": 2.7258718572587187e-05, "loss": 0.6103, "step": 17446 }, { "epoch": 0.509386587252927, "grad_norm": 0.532873536653763, "learning_rate": 2.725709651257097e-05, "loss": 0.6109, "step": 17447 }, { "epoch": 0.5094157834807743, "grad_norm": 0.5609787359303506, "learning_rate": 2.7255474452554748e-05, "loss": 0.677, "step": 17448 }, { "epoch": 0.5094449797086217, "grad_norm": 0.49772482693961895, "learning_rate": 2.7253852392538526e-05, "loss": 0.5679, "step": 17449 }, { "epoch": 0.509474175936469, "grad_norm": 0.5627534995863466, "learning_rate": 2.7252230332522304e-05, "loss": 0.679, "step": 17450 }, { "epoch": 0.5095033721643164, "grad_norm": 0.5141793450829917, "learning_rate": 2.7250608272506083e-05, "loss": 0.5961, "step": 17451 }, { "epoch": 0.5095325683921638, "grad_norm": 0.5271370102640831, "learning_rate": 2.7248986212489864e-05, "loss": 0.6504, "step": 17452 }, { "epoch": 0.5095617646200111, "grad_norm": 0.49871520804354075, "learning_rate": 2.7247364152473643e-05, "loss": 0.5995, "step": 17453 }, { "epoch": 0.5095909608478585, "grad_norm": 0.5057840428407181, "learning_rate": 2.724574209245742e-05, "loss": 0.5627, "step": 17454 }, { "epoch": 0.5096201570757058, "grad_norm": 0.5464758439230855, "learning_rate": 2.72441200324412e-05, "loss": 0.664, "step": 17455 }, { "epoch": 0.5096493533035532, "grad_norm": 0.5339350091719672, "learning_rate": 2.7242497972424978e-05, "loss": 0.6558, "step": 17456 }, { "epoch": 0.5096785495314006, "grad_norm": 0.5149364938848798, "learning_rate": 2.724087591240876e-05, "loss": 0.5821, "step": 17457 }, { "epoch": 0.5097077457592479, "grad_norm": 0.4863378063127192, "learning_rate": 2.7239253852392538e-05, "loss": 0.5243, "step": 17458 }, { "epoch": 0.5097369419870953, "grad_norm": 0.567934980942088, "learning_rate": 2.7237631792376316e-05, "loss": 0.6867, "step": 17459 }, { "epoch": 0.5097661382149427, "grad_norm": 0.48928089744566133, "learning_rate": 2.7236009732360095e-05, "loss": 0.5672, "step": 17460 }, { "epoch": 0.50979533444279, "grad_norm": 0.5317992342779525, "learning_rate": 2.723438767234388e-05, "loss": 0.6345, "step": 17461 }, { "epoch": 0.5098245306706374, "grad_norm": 0.534920896086628, "learning_rate": 2.723276561232766e-05, "loss": 0.658, "step": 17462 }, { "epoch": 0.5098537268984847, "grad_norm": 0.5431012951673017, "learning_rate": 2.723114355231144e-05, "loss": 0.6586, "step": 17463 }, { "epoch": 0.5098829231263321, "grad_norm": 0.5460461239871428, "learning_rate": 2.722952149229522e-05, "loss": 0.6408, "step": 17464 }, { "epoch": 0.5099121193541795, "grad_norm": 0.5075042926396757, "learning_rate": 2.7227899432278997e-05, "loss": 0.6025, "step": 17465 }, { "epoch": 0.5099413155820268, "grad_norm": 0.5438620893510028, "learning_rate": 2.7226277372262775e-05, "loss": 0.6412, "step": 17466 }, { "epoch": 0.5099705118098742, "grad_norm": 0.510691263730572, "learning_rate": 2.7224655312246557e-05, "loss": 0.5867, "step": 17467 }, { "epoch": 0.5099997080377215, "grad_norm": 0.5324907141514532, "learning_rate": 2.7223033252230335e-05, "loss": 0.6839, "step": 17468 }, { "epoch": 0.5100289042655689, "grad_norm": 0.5096908680797418, "learning_rate": 2.7221411192214114e-05, "loss": 0.5822, "step": 17469 }, { "epoch": 0.5100581004934163, "grad_norm": 0.5110178673969945, "learning_rate": 2.7219789132197892e-05, "loss": 0.6218, "step": 17470 }, { "epoch": 0.5100872967212636, "grad_norm": 0.5415702279318162, "learning_rate": 2.721816707218167e-05, "loss": 0.6548, "step": 17471 }, { "epoch": 0.510116492949111, "grad_norm": 0.5228517124969325, "learning_rate": 2.7216545012165452e-05, "loss": 0.6693, "step": 17472 }, { "epoch": 0.5101456891769584, "grad_norm": 0.5491397416103522, "learning_rate": 2.721492295214923e-05, "loss": 0.6699, "step": 17473 }, { "epoch": 0.5101748854048057, "grad_norm": 0.5216050985237956, "learning_rate": 2.721330089213301e-05, "loss": 0.6497, "step": 17474 }, { "epoch": 0.5102040816326531, "grad_norm": 0.5686884094882103, "learning_rate": 2.7211678832116787e-05, "loss": 0.6899, "step": 17475 }, { "epoch": 0.5102332778605004, "grad_norm": 0.5027155260974568, "learning_rate": 2.7210056772100566e-05, "loss": 0.5799, "step": 17476 }, { "epoch": 0.5102624740883478, "grad_norm": 0.514441286039648, "learning_rate": 2.7208434712084347e-05, "loss": 0.5817, "step": 17477 }, { "epoch": 0.5102916703161952, "grad_norm": 0.5077476786601759, "learning_rate": 2.7206812652068126e-05, "loss": 0.6104, "step": 17478 }, { "epoch": 0.5103208665440425, "grad_norm": 0.5129654488289997, "learning_rate": 2.7205190592051904e-05, "loss": 0.582, "step": 17479 }, { "epoch": 0.5103500627718899, "grad_norm": 0.5175521032413961, "learning_rate": 2.720356853203569e-05, "loss": 0.5834, "step": 17480 }, { "epoch": 0.5103792589997372, "grad_norm": 0.5068956670935049, "learning_rate": 2.7201946472019468e-05, "loss": 0.5699, "step": 17481 }, { "epoch": 0.5104084552275846, "grad_norm": 0.4955124720153096, "learning_rate": 2.7200324412003246e-05, "loss": 0.5713, "step": 17482 }, { "epoch": 0.510437651455432, "grad_norm": 0.49410637210363695, "learning_rate": 2.7198702351987028e-05, "loss": 0.5754, "step": 17483 }, { "epoch": 0.5104668476832793, "grad_norm": 0.5567727369326703, "learning_rate": 2.7197080291970806e-05, "loss": 0.6831, "step": 17484 }, { "epoch": 0.5104960439111267, "grad_norm": 0.5876172536668376, "learning_rate": 2.7195458231954585e-05, "loss": 0.6906, "step": 17485 }, { "epoch": 0.510525240138974, "grad_norm": 0.5437961200230573, "learning_rate": 2.7193836171938363e-05, "loss": 0.6282, "step": 17486 }, { "epoch": 0.5105544363668214, "grad_norm": 0.4881182616926418, "learning_rate": 2.719221411192214e-05, "loss": 0.5661, "step": 17487 }, { "epoch": 0.5105836325946688, "grad_norm": 0.5456721591021139, "learning_rate": 2.7190592051905923e-05, "loss": 0.6263, "step": 17488 }, { "epoch": 0.5106128288225161, "grad_norm": 0.5755188944829094, "learning_rate": 2.71889699918897e-05, "loss": 0.7755, "step": 17489 }, { "epoch": 0.5106420250503635, "grad_norm": 0.5071024780201424, "learning_rate": 2.718734793187348e-05, "loss": 0.6133, "step": 17490 }, { "epoch": 0.5106712212782109, "grad_norm": 0.5543684262374504, "learning_rate": 2.7185725871857258e-05, "loss": 0.5818, "step": 17491 }, { "epoch": 0.5107004175060582, "grad_norm": 0.5176254095821783, "learning_rate": 2.718410381184104e-05, "loss": 0.6027, "step": 17492 }, { "epoch": 0.5107296137339056, "grad_norm": 0.5197050420280981, "learning_rate": 2.718248175182482e-05, "loss": 0.6587, "step": 17493 }, { "epoch": 0.5107588099617529, "grad_norm": 0.5303129423544892, "learning_rate": 2.7180859691808597e-05, "loss": 0.6312, "step": 17494 }, { "epoch": 0.5107880061896003, "grad_norm": 0.507198598478874, "learning_rate": 2.7179237631792375e-05, "loss": 0.5892, "step": 17495 }, { "epoch": 0.5108172024174477, "grad_norm": 0.5500629968084638, "learning_rate": 2.7177615571776154e-05, "loss": 0.6914, "step": 17496 }, { "epoch": 0.510846398645295, "grad_norm": 0.49824012858642835, "learning_rate": 2.7175993511759935e-05, "loss": 0.5676, "step": 17497 }, { "epoch": 0.5108755948731424, "grad_norm": 0.529113183650398, "learning_rate": 2.7174371451743714e-05, "loss": 0.6288, "step": 17498 }, { "epoch": 0.5109047911009897, "grad_norm": 0.5119784315364475, "learning_rate": 2.71727493917275e-05, "loss": 0.6042, "step": 17499 }, { "epoch": 0.5109339873288371, "grad_norm": 0.526206374722267, "learning_rate": 2.7171127331711277e-05, "loss": 0.6289, "step": 17500 }, { "epoch": 0.5109631835566845, "grad_norm": 0.4646799284655416, "learning_rate": 2.7169505271695056e-05, "loss": 0.5364, "step": 17501 }, { "epoch": 0.5109923797845318, "grad_norm": 0.5137485423413385, "learning_rate": 2.7167883211678834e-05, "loss": 0.6224, "step": 17502 }, { "epoch": 0.5110215760123792, "grad_norm": 0.5636308270068453, "learning_rate": 2.7166261151662616e-05, "loss": 0.718, "step": 17503 }, { "epoch": 0.5110507722402265, "grad_norm": 0.4695618158212903, "learning_rate": 2.7164639091646394e-05, "loss": 0.5418, "step": 17504 }, { "epoch": 0.5110799684680739, "grad_norm": 0.5154529017844675, "learning_rate": 2.7163017031630172e-05, "loss": 0.5999, "step": 17505 }, { "epoch": 0.5111091646959213, "grad_norm": 0.5350003368104075, "learning_rate": 2.716139497161395e-05, "loss": 0.5833, "step": 17506 }, { "epoch": 0.5111383609237686, "grad_norm": 0.6001675144695654, "learning_rate": 2.715977291159773e-05, "loss": 0.6353, "step": 17507 }, { "epoch": 0.511167557151616, "grad_norm": 0.49509148815180554, "learning_rate": 2.715815085158151e-05, "loss": 0.5998, "step": 17508 }, { "epoch": 0.5111967533794634, "grad_norm": 0.5260233247275063, "learning_rate": 2.715652879156529e-05, "loss": 0.6414, "step": 17509 }, { "epoch": 0.5112259496073107, "grad_norm": 0.5017510546928798, "learning_rate": 2.7154906731549068e-05, "loss": 0.5873, "step": 17510 }, { "epoch": 0.5112551458351581, "grad_norm": 0.5025627944199699, "learning_rate": 2.7153284671532846e-05, "loss": 0.6037, "step": 17511 }, { "epoch": 0.5112843420630054, "grad_norm": 0.48135176871872526, "learning_rate": 2.7151662611516624e-05, "loss": 0.5379, "step": 17512 }, { "epoch": 0.5113135382908528, "grad_norm": 0.4930435644466321, "learning_rate": 2.7150040551500406e-05, "loss": 0.5518, "step": 17513 }, { "epoch": 0.5113427345187002, "grad_norm": 0.5090551382220005, "learning_rate": 2.7148418491484185e-05, "loss": 0.5754, "step": 17514 }, { "epoch": 0.5113719307465475, "grad_norm": 0.4769225466208921, "learning_rate": 2.7146796431467963e-05, "loss": 0.5194, "step": 17515 }, { "epoch": 0.5114011269743949, "grad_norm": 0.510584297560028, "learning_rate": 2.714517437145174e-05, "loss": 0.5836, "step": 17516 }, { "epoch": 0.5114303232022422, "grad_norm": 0.526966644346983, "learning_rate": 2.7143552311435523e-05, "loss": 0.5569, "step": 17517 }, { "epoch": 0.5114595194300896, "grad_norm": 0.4995295076875658, "learning_rate": 2.7141930251419305e-05, "loss": 0.5813, "step": 17518 }, { "epoch": 0.511488715657937, "grad_norm": 0.4819434277018351, "learning_rate": 2.7140308191403087e-05, "loss": 0.5272, "step": 17519 }, { "epoch": 0.5115179118857843, "grad_norm": 0.49715217904134523, "learning_rate": 2.7138686131386865e-05, "loss": 0.583, "step": 17520 }, { "epoch": 0.5115471081136317, "grad_norm": 0.5795156554484153, "learning_rate": 2.7137064071370643e-05, "loss": 0.7088, "step": 17521 }, { "epoch": 0.511576304341479, "grad_norm": 0.5183223325494891, "learning_rate": 2.7135442011354422e-05, "loss": 0.6099, "step": 17522 }, { "epoch": 0.5116055005693264, "grad_norm": 0.6197888255520959, "learning_rate": 2.7133819951338204e-05, "loss": 0.6594, "step": 17523 }, { "epoch": 0.5116346967971738, "grad_norm": 0.5522809064359318, "learning_rate": 2.7132197891321982e-05, "loss": 0.6562, "step": 17524 }, { "epoch": 0.5116638930250211, "grad_norm": 0.5094357539158518, "learning_rate": 2.713057583130576e-05, "loss": 0.5711, "step": 17525 }, { "epoch": 0.5116930892528685, "grad_norm": 0.4932879068231291, "learning_rate": 2.712895377128954e-05, "loss": 0.5745, "step": 17526 }, { "epoch": 0.5117222854807159, "grad_norm": 0.5345438137360143, "learning_rate": 2.7127331711273317e-05, "loss": 0.6946, "step": 17527 }, { "epoch": 0.5117514817085632, "grad_norm": 0.5141152565058985, "learning_rate": 2.71257096512571e-05, "loss": 0.612, "step": 17528 }, { "epoch": 0.5117806779364106, "grad_norm": 0.5199198527660945, "learning_rate": 2.7124087591240877e-05, "loss": 0.6164, "step": 17529 }, { "epoch": 0.5118098741642579, "grad_norm": 0.4896290707632896, "learning_rate": 2.7122465531224656e-05, "loss": 0.5848, "step": 17530 }, { "epoch": 0.5118390703921053, "grad_norm": 0.5371225225853397, "learning_rate": 2.7120843471208434e-05, "loss": 0.6472, "step": 17531 }, { "epoch": 0.5118682666199527, "grad_norm": 0.4884315920447806, "learning_rate": 2.7119221411192212e-05, "loss": 0.5269, "step": 17532 }, { "epoch": 0.5118974628478, "grad_norm": 0.5272590861761688, "learning_rate": 2.7117599351175994e-05, "loss": 0.6079, "step": 17533 }, { "epoch": 0.5119266590756474, "grad_norm": 0.5502334219032646, "learning_rate": 2.7115977291159772e-05, "loss": 0.6358, "step": 17534 }, { "epoch": 0.5119558553034947, "grad_norm": 0.5456832861655437, "learning_rate": 2.711435523114355e-05, "loss": 0.6471, "step": 17535 }, { "epoch": 0.5119850515313421, "grad_norm": 0.5554221777377373, "learning_rate": 2.711273317112733e-05, "loss": 0.6925, "step": 17536 }, { "epoch": 0.5120142477591895, "grad_norm": 0.5392068118454522, "learning_rate": 2.7111111111111114e-05, "loss": 0.6364, "step": 17537 }, { "epoch": 0.5120434439870368, "grad_norm": 0.5532362821071504, "learning_rate": 2.7109489051094893e-05, "loss": 0.6486, "step": 17538 }, { "epoch": 0.5120726402148842, "grad_norm": 0.5076535932514702, "learning_rate": 2.7107866991078674e-05, "loss": 0.565, "step": 17539 }, { "epoch": 0.5121018364427316, "grad_norm": 0.5132922822510965, "learning_rate": 2.7106244931062453e-05, "loss": 0.6125, "step": 17540 }, { "epoch": 0.5121310326705789, "grad_norm": 0.4713842956451498, "learning_rate": 2.710462287104623e-05, "loss": 0.5121, "step": 17541 }, { "epoch": 0.5121602288984263, "grad_norm": 0.5414720327697147, "learning_rate": 2.710300081103001e-05, "loss": 0.654, "step": 17542 }, { "epoch": 0.5121894251262736, "grad_norm": 0.5137114063181363, "learning_rate": 2.7101378751013788e-05, "loss": 0.6064, "step": 17543 }, { "epoch": 0.512218621354121, "grad_norm": 0.5274987670366392, "learning_rate": 2.709975669099757e-05, "loss": 0.6437, "step": 17544 }, { "epoch": 0.5122478175819684, "grad_norm": 0.5173126840957769, "learning_rate": 2.7098134630981348e-05, "loss": 0.5914, "step": 17545 }, { "epoch": 0.5122770138098157, "grad_norm": 0.5818677498670619, "learning_rate": 2.7096512570965126e-05, "loss": 0.6536, "step": 17546 }, { "epoch": 0.5123062100376631, "grad_norm": 0.5018394297610483, "learning_rate": 2.7094890510948905e-05, "loss": 0.5776, "step": 17547 }, { "epoch": 0.5123354062655106, "grad_norm": 0.5348422351953047, "learning_rate": 2.7093268450932687e-05, "loss": 0.6579, "step": 17548 }, { "epoch": 0.5123646024933579, "grad_norm": 0.5291481637962104, "learning_rate": 2.7091646390916465e-05, "loss": 0.5918, "step": 17549 }, { "epoch": 0.5123937987212053, "grad_norm": 0.5216480489965635, "learning_rate": 2.7090024330900243e-05, "loss": 0.6297, "step": 17550 }, { "epoch": 0.5124229949490526, "grad_norm": 0.46375374497207345, "learning_rate": 2.7088402270884022e-05, "loss": 0.4992, "step": 17551 }, { "epoch": 0.5124521911769, "grad_norm": 0.5290899250213729, "learning_rate": 2.70867802108678e-05, "loss": 0.6671, "step": 17552 }, { "epoch": 0.5124813874047474, "grad_norm": 0.5891018593169219, "learning_rate": 2.7085158150851582e-05, "loss": 0.6462, "step": 17553 }, { "epoch": 0.5125105836325947, "grad_norm": 0.5229234996875332, "learning_rate": 2.708353609083536e-05, "loss": 0.6308, "step": 17554 }, { "epoch": 0.5125397798604421, "grad_norm": 0.5417702032522043, "learning_rate": 2.7081914030819145e-05, "loss": 0.6253, "step": 17555 }, { "epoch": 0.5125689760882894, "grad_norm": 0.5154296986778435, "learning_rate": 2.7080291970802924e-05, "loss": 0.5945, "step": 17556 }, { "epoch": 0.5125981723161368, "grad_norm": 0.5371179846663203, "learning_rate": 2.7078669910786702e-05, "loss": 0.6285, "step": 17557 }, { "epoch": 0.5126273685439842, "grad_norm": 0.5102653029613805, "learning_rate": 2.707704785077048e-05, "loss": 0.5691, "step": 17558 }, { "epoch": 0.5126565647718315, "grad_norm": 0.4936308120159557, "learning_rate": 2.7075425790754262e-05, "loss": 0.5784, "step": 17559 }, { "epoch": 0.5126857609996789, "grad_norm": 0.5217321298821633, "learning_rate": 2.707380373073804e-05, "loss": 0.647, "step": 17560 }, { "epoch": 0.5127149572275262, "grad_norm": 0.5068687704425572, "learning_rate": 2.707218167072182e-05, "loss": 0.5658, "step": 17561 }, { "epoch": 0.5127441534553736, "grad_norm": 0.49431201158053634, "learning_rate": 2.7070559610705597e-05, "loss": 0.5608, "step": 17562 }, { "epoch": 0.512773349683221, "grad_norm": 0.4876518569110912, "learning_rate": 2.7068937550689376e-05, "loss": 0.5283, "step": 17563 }, { "epoch": 0.5128025459110683, "grad_norm": 0.536059505502698, "learning_rate": 2.7067315490673158e-05, "loss": 0.6511, "step": 17564 }, { "epoch": 0.5128317421389157, "grad_norm": 0.5241544001231182, "learning_rate": 2.7065693430656936e-05, "loss": 0.6302, "step": 17565 }, { "epoch": 0.512860938366763, "grad_norm": 0.5062008072593126, "learning_rate": 2.7064071370640714e-05, "loss": 0.6051, "step": 17566 }, { "epoch": 0.5128901345946104, "grad_norm": 0.5464833268189991, "learning_rate": 2.7062449310624493e-05, "loss": 0.6825, "step": 17567 }, { "epoch": 0.5129193308224578, "grad_norm": 0.4958776470395963, "learning_rate": 2.7060827250608274e-05, "loss": 0.5914, "step": 17568 }, { "epoch": 0.5129485270503051, "grad_norm": 0.5443529081216213, "learning_rate": 2.7059205190592053e-05, "loss": 0.6199, "step": 17569 }, { "epoch": 0.5129777232781525, "grad_norm": 0.5250855660053548, "learning_rate": 2.705758313057583e-05, "loss": 0.6337, "step": 17570 }, { "epoch": 0.5130069195059999, "grad_norm": 0.5223677640428278, "learning_rate": 2.705596107055961e-05, "loss": 0.5788, "step": 17571 }, { "epoch": 0.5130361157338472, "grad_norm": 0.4939230756646236, "learning_rate": 2.7054339010543388e-05, "loss": 0.5762, "step": 17572 }, { "epoch": 0.5130653119616946, "grad_norm": 0.4786430916075806, "learning_rate": 2.705271695052717e-05, "loss": 0.5314, "step": 17573 }, { "epoch": 0.5130945081895419, "grad_norm": 0.5520026896846846, "learning_rate": 2.705109489051095e-05, "loss": 0.6842, "step": 17574 }, { "epoch": 0.5131237044173893, "grad_norm": 0.5571608231702052, "learning_rate": 2.7049472830494733e-05, "loss": 0.6388, "step": 17575 }, { "epoch": 0.5131529006452367, "grad_norm": 0.5650684175768187, "learning_rate": 2.704785077047851e-05, "loss": 0.6419, "step": 17576 }, { "epoch": 0.513182096873084, "grad_norm": 0.489269937378053, "learning_rate": 2.704622871046229e-05, "loss": 0.6007, "step": 17577 }, { "epoch": 0.5132112931009314, "grad_norm": 0.5163293314222086, "learning_rate": 2.7044606650446068e-05, "loss": 0.6223, "step": 17578 }, { "epoch": 0.5132404893287787, "grad_norm": 0.47959433884832037, "learning_rate": 2.704298459042985e-05, "loss": 0.5295, "step": 17579 }, { "epoch": 0.5132696855566261, "grad_norm": 0.5080872524473654, "learning_rate": 2.704136253041363e-05, "loss": 0.6373, "step": 17580 }, { "epoch": 0.5132988817844735, "grad_norm": 0.5400548213169147, "learning_rate": 2.7039740470397407e-05, "loss": 0.6812, "step": 17581 }, { "epoch": 0.5133280780123208, "grad_norm": 0.49747123407260757, "learning_rate": 2.7038118410381185e-05, "loss": 0.5633, "step": 17582 }, { "epoch": 0.5133572742401682, "grad_norm": 0.5289966600195053, "learning_rate": 2.7036496350364964e-05, "loss": 0.5735, "step": 17583 }, { "epoch": 0.5133864704680156, "grad_norm": 0.4980914222908482, "learning_rate": 2.7034874290348745e-05, "loss": 0.5836, "step": 17584 }, { "epoch": 0.5134156666958629, "grad_norm": 0.52387544193589, "learning_rate": 2.7033252230332524e-05, "loss": 0.6147, "step": 17585 }, { "epoch": 0.5134448629237103, "grad_norm": 0.49988980769913766, "learning_rate": 2.7031630170316302e-05, "loss": 0.5685, "step": 17586 }, { "epoch": 0.5134740591515576, "grad_norm": 0.5523983624646114, "learning_rate": 2.703000811030008e-05, "loss": 0.6465, "step": 17587 }, { "epoch": 0.513503255379405, "grad_norm": 0.48513529580807346, "learning_rate": 2.702838605028386e-05, "loss": 0.5443, "step": 17588 }, { "epoch": 0.5135324516072524, "grad_norm": 0.5285284694040552, "learning_rate": 2.702676399026764e-05, "loss": 0.6113, "step": 17589 }, { "epoch": 0.5135616478350997, "grad_norm": 0.5389246376034565, "learning_rate": 2.702514193025142e-05, "loss": 0.6416, "step": 17590 }, { "epoch": 0.5135908440629471, "grad_norm": 0.5727850421522805, "learning_rate": 2.7023519870235197e-05, "loss": 0.6883, "step": 17591 }, { "epoch": 0.5136200402907944, "grad_norm": 0.5348899639560305, "learning_rate": 2.7021897810218976e-05, "loss": 0.5613, "step": 17592 }, { "epoch": 0.5136492365186418, "grad_norm": 0.5335094954310529, "learning_rate": 2.702027575020276e-05, "loss": 0.6158, "step": 17593 }, { "epoch": 0.5136784327464892, "grad_norm": 0.5587558512828957, "learning_rate": 2.701865369018654e-05, "loss": 0.6411, "step": 17594 }, { "epoch": 0.5137076289743365, "grad_norm": 0.5190926449443448, "learning_rate": 2.701703163017032e-05, "loss": 0.5922, "step": 17595 }, { "epoch": 0.5137368252021839, "grad_norm": 0.5337322998966456, "learning_rate": 2.70154095701541e-05, "loss": 0.6025, "step": 17596 }, { "epoch": 0.5137660214300313, "grad_norm": 0.5168778897884605, "learning_rate": 2.7013787510137878e-05, "loss": 0.5787, "step": 17597 }, { "epoch": 0.5137952176578786, "grad_norm": 0.5354244328545299, "learning_rate": 2.7012165450121656e-05, "loss": 0.6519, "step": 17598 }, { "epoch": 0.513824413885726, "grad_norm": 0.4954098674691526, "learning_rate": 2.7010543390105434e-05, "loss": 0.5405, "step": 17599 }, { "epoch": 0.5138536101135733, "grad_norm": 0.4809664727280817, "learning_rate": 2.7008921330089216e-05, "loss": 0.5301, "step": 17600 }, { "epoch": 0.5138828063414207, "grad_norm": 0.523219832246262, "learning_rate": 2.7007299270072995e-05, "loss": 0.6154, "step": 17601 }, { "epoch": 0.5139120025692681, "grad_norm": 0.5725939915409904, "learning_rate": 2.7005677210056773e-05, "loss": 0.7113, "step": 17602 }, { "epoch": 0.5139411987971154, "grad_norm": 0.5254464770640724, "learning_rate": 2.700405515004055e-05, "loss": 0.5723, "step": 17603 }, { "epoch": 0.5139703950249628, "grad_norm": 0.5880809745705915, "learning_rate": 2.7002433090024333e-05, "loss": 0.6776, "step": 17604 }, { "epoch": 0.5139995912528101, "grad_norm": 0.48605812606168347, "learning_rate": 2.700081103000811e-05, "loss": 0.548, "step": 17605 }, { "epoch": 0.5140287874806575, "grad_norm": 0.5157923051822411, "learning_rate": 2.699918896999189e-05, "loss": 0.5973, "step": 17606 }, { "epoch": 0.5140579837085049, "grad_norm": 0.5161353448824721, "learning_rate": 2.6997566909975668e-05, "loss": 0.5904, "step": 17607 }, { "epoch": 0.5140871799363522, "grad_norm": 0.49276864587762953, "learning_rate": 2.6995944849959447e-05, "loss": 0.5494, "step": 17608 }, { "epoch": 0.5141163761641996, "grad_norm": 0.506184791075013, "learning_rate": 2.699432278994323e-05, "loss": 0.5698, "step": 17609 }, { "epoch": 0.514145572392047, "grad_norm": 0.5348653528726267, "learning_rate": 2.6992700729927007e-05, "loss": 0.6179, "step": 17610 }, { "epoch": 0.5141747686198943, "grad_norm": 0.5320268119778913, "learning_rate": 2.6991078669910785e-05, "loss": 0.6501, "step": 17611 }, { "epoch": 0.5142039648477417, "grad_norm": 0.5297074488621093, "learning_rate": 2.698945660989457e-05, "loss": 0.6414, "step": 17612 }, { "epoch": 0.514233161075589, "grad_norm": 0.535386657087703, "learning_rate": 2.698783454987835e-05, "loss": 0.5467, "step": 17613 }, { "epoch": 0.5142623573034364, "grad_norm": 0.5291141680433393, "learning_rate": 2.6986212489862127e-05, "loss": 0.653, "step": 17614 }, { "epoch": 0.5142915535312838, "grad_norm": 0.5379741835551733, "learning_rate": 2.698459042984591e-05, "loss": 0.625, "step": 17615 }, { "epoch": 0.5143207497591311, "grad_norm": 0.4936614864369024, "learning_rate": 2.6982968369829687e-05, "loss": 0.5972, "step": 17616 }, { "epoch": 0.5143499459869785, "grad_norm": 0.5270192601723477, "learning_rate": 2.6981346309813466e-05, "loss": 0.6026, "step": 17617 }, { "epoch": 0.5143791422148258, "grad_norm": 0.5024427705091408, "learning_rate": 2.6979724249797244e-05, "loss": 0.5726, "step": 17618 }, { "epoch": 0.5144083384426732, "grad_norm": 0.5314962347716975, "learning_rate": 2.6978102189781022e-05, "loss": 0.6267, "step": 17619 }, { "epoch": 0.5144375346705206, "grad_norm": 0.49252463010551367, "learning_rate": 2.6976480129764804e-05, "loss": 0.5386, "step": 17620 }, { "epoch": 0.5144667308983679, "grad_norm": 0.5440963667134494, "learning_rate": 2.6974858069748582e-05, "loss": 0.7088, "step": 17621 }, { "epoch": 0.5144959271262153, "grad_norm": 0.49683523992717155, "learning_rate": 2.697323600973236e-05, "loss": 0.5615, "step": 17622 }, { "epoch": 0.5145251233540626, "grad_norm": 0.5406232515170616, "learning_rate": 2.697161394971614e-05, "loss": 0.6226, "step": 17623 }, { "epoch": 0.51455431958191, "grad_norm": 0.4973868203378918, "learning_rate": 2.696999188969992e-05, "loss": 0.5863, "step": 17624 }, { "epoch": 0.5145835158097574, "grad_norm": 0.6317181612348897, "learning_rate": 2.69683698296837e-05, "loss": 0.7371, "step": 17625 }, { "epoch": 0.5146127120376047, "grad_norm": 0.5044634701977156, "learning_rate": 2.6966747769667478e-05, "loss": 0.5454, "step": 17626 }, { "epoch": 0.5146419082654521, "grad_norm": 0.49788372996434804, "learning_rate": 2.6965125709651256e-05, "loss": 0.5852, "step": 17627 }, { "epoch": 0.5146711044932994, "grad_norm": 0.506729520893507, "learning_rate": 2.6963503649635034e-05, "loss": 0.5917, "step": 17628 }, { "epoch": 0.5147003007211468, "grad_norm": 0.5065092471772126, "learning_rate": 2.6961881589618816e-05, "loss": 0.5679, "step": 17629 }, { "epoch": 0.5147294969489942, "grad_norm": 0.525883570475891, "learning_rate": 2.6960259529602595e-05, "loss": 0.6177, "step": 17630 }, { "epoch": 0.5147586931768415, "grad_norm": 0.5064873100548402, "learning_rate": 2.695863746958638e-05, "loss": 0.6172, "step": 17631 }, { "epoch": 0.5147878894046889, "grad_norm": 0.5028059464705433, "learning_rate": 2.6957015409570158e-05, "loss": 0.5582, "step": 17632 }, { "epoch": 0.5148170856325363, "grad_norm": 0.49710995231369076, "learning_rate": 2.6955393349553936e-05, "loss": 0.571, "step": 17633 }, { "epoch": 0.5148462818603836, "grad_norm": 0.5123108581153811, "learning_rate": 2.6953771289537715e-05, "loss": 0.5739, "step": 17634 }, { "epoch": 0.514875478088231, "grad_norm": 0.5066453324808056, "learning_rate": 2.6952149229521497e-05, "loss": 0.5959, "step": 17635 }, { "epoch": 0.5149046743160783, "grad_norm": 0.5027051286031423, "learning_rate": 2.6950527169505275e-05, "loss": 0.5884, "step": 17636 }, { "epoch": 0.5149338705439257, "grad_norm": 0.5289547362145317, "learning_rate": 2.6948905109489053e-05, "loss": 0.6271, "step": 17637 }, { "epoch": 0.5149630667717731, "grad_norm": 0.509251468229532, "learning_rate": 2.6947283049472832e-05, "loss": 0.5896, "step": 17638 }, { "epoch": 0.5149922629996204, "grad_norm": 0.5569129742348389, "learning_rate": 2.694566098945661e-05, "loss": 0.6609, "step": 17639 }, { "epoch": 0.5150214592274678, "grad_norm": 0.5468986086569988, "learning_rate": 2.6944038929440392e-05, "loss": 0.6229, "step": 17640 }, { "epoch": 0.5150506554553151, "grad_norm": 0.5326296447943163, "learning_rate": 2.694241686942417e-05, "loss": 0.6414, "step": 17641 }, { "epoch": 0.5150798516831625, "grad_norm": 0.5530064310896947, "learning_rate": 2.694079480940795e-05, "loss": 0.6277, "step": 17642 }, { "epoch": 0.5151090479110099, "grad_norm": 0.5356976832815293, "learning_rate": 2.6939172749391727e-05, "loss": 0.6265, "step": 17643 }, { "epoch": 0.5151382441388572, "grad_norm": 0.4763479559532144, "learning_rate": 2.6937550689375505e-05, "loss": 0.552, "step": 17644 }, { "epoch": 0.5151674403667046, "grad_norm": 0.5118573605216522, "learning_rate": 2.6935928629359287e-05, "loss": 0.5817, "step": 17645 }, { "epoch": 0.515196636594552, "grad_norm": 0.5230092602396468, "learning_rate": 2.6934306569343065e-05, "loss": 0.5749, "step": 17646 }, { "epoch": 0.5152258328223993, "grad_norm": 0.47730439070990577, "learning_rate": 2.6932684509326844e-05, "loss": 0.5656, "step": 17647 }, { "epoch": 0.5152550290502467, "grad_norm": 0.5216908765435867, "learning_rate": 2.6931062449310622e-05, "loss": 0.5767, "step": 17648 }, { "epoch": 0.515284225278094, "grad_norm": 0.5274043946954441, "learning_rate": 2.6929440389294404e-05, "loss": 0.6479, "step": 17649 }, { "epoch": 0.5153134215059414, "grad_norm": 0.5348202072427561, "learning_rate": 2.6927818329278186e-05, "loss": 0.6264, "step": 17650 }, { "epoch": 0.5153426177337888, "grad_norm": 0.5506466670099321, "learning_rate": 2.6926196269261968e-05, "loss": 0.6288, "step": 17651 }, { "epoch": 0.5153718139616361, "grad_norm": 0.49397283901328165, "learning_rate": 2.6924574209245746e-05, "loss": 0.5726, "step": 17652 }, { "epoch": 0.5154010101894835, "grad_norm": 0.46503841163183035, "learning_rate": 2.6922952149229524e-05, "loss": 0.5155, "step": 17653 }, { "epoch": 0.5154302064173308, "grad_norm": 0.534731311163149, "learning_rate": 2.6921330089213303e-05, "loss": 0.6407, "step": 17654 }, { "epoch": 0.5154594026451782, "grad_norm": 0.5597587669155142, "learning_rate": 2.691970802919708e-05, "loss": 0.5571, "step": 17655 }, { "epoch": 0.5154885988730256, "grad_norm": 0.553990515019105, "learning_rate": 2.6918085969180863e-05, "loss": 0.664, "step": 17656 }, { "epoch": 0.5155177951008729, "grad_norm": 0.5269384873978756, "learning_rate": 2.691646390916464e-05, "loss": 0.6321, "step": 17657 }, { "epoch": 0.5155469913287203, "grad_norm": 0.5222435005542523, "learning_rate": 2.691484184914842e-05, "loss": 0.6339, "step": 17658 }, { "epoch": 0.5155761875565676, "grad_norm": 0.5163835440159349, "learning_rate": 2.6913219789132198e-05, "loss": 0.6056, "step": 17659 }, { "epoch": 0.515605383784415, "grad_norm": 0.5602722005265003, "learning_rate": 2.691159772911598e-05, "loss": 0.6995, "step": 17660 }, { "epoch": 0.5156345800122624, "grad_norm": 0.5001019109869269, "learning_rate": 2.6909975669099758e-05, "loss": 0.5716, "step": 17661 }, { "epoch": 0.5156637762401097, "grad_norm": 0.5197655459798531, "learning_rate": 2.6908353609083536e-05, "loss": 0.6441, "step": 17662 }, { "epoch": 0.5156929724679571, "grad_norm": 0.4986309272901289, "learning_rate": 2.6906731549067315e-05, "loss": 0.5776, "step": 17663 }, { "epoch": 0.5157221686958045, "grad_norm": 0.5527530206963523, "learning_rate": 2.6905109489051093e-05, "loss": 0.6521, "step": 17664 }, { "epoch": 0.5157513649236518, "grad_norm": 0.5381065406255017, "learning_rate": 2.6903487429034875e-05, "loss": 0.6106, "step": 17665 }, { "epoch": 0.5157805611514992, "grad_norm": 0.5112836432974575, "learning_rate": 2.6901865369018653e-05, "loss": 0.5872, "step": 17666 }, { "epoch": 0.5158097573793465, "grad_norm": 0.5324197491306185, "learning_rate": 2.690024330900243e-05, "loss": 0.6104, "step": 17667 }, { "epoch": 0.5158389536071939, "grad_norm": 0.5460852490282909, "learning_rate": 2.689862124898621e-05, "loss": 0.7099, "step": 17668 }, { "epoch": 0.5158681498350414, "grad_norm": 0.5095672806713439, "learning_rate": 2.6896999188969995e-05, "loss": 0.5756, "step": 17669 }, { "epoch": 0.5158973460628887, "grad_norm": 0.5152921337469628, "learning_rate": 2.6895377128953774e-05, "loss": 0.6034, "step": 17670 }, { "epoch": 0.5159265422907361, "grad_norm": 0.5058720604132638, "learning_rate": 2.6893755068937555e-05, "loss": 0.5696, "step": 17671 }, { "epoch": 0.5159557385185835, "grad_norm": 0.4948003185540838, "learning_rate": 2.6892133008921334e-05, "loss": 0.5269, "step": 17672 }, { "epoch": 0.5159849347464308, "grad_norm": 0.5833727190392614, "learning_rate": 2.6890510948905112e-05, "loss": 0.6961, "step": 17673 }, { "epoch": 0.5160141309742782, "grad_norm": 0.56338248485468, "learning_rate": 2.688888888888889e-05, "loss": 0.6609, "step": 17674 }, { "epoch": 0.5160433272021255, "grad_norm": 0.5428658833882662, "learning_rate": 2.688726682887267e-05, "loss": 0.6744, "step": 17675 }, { "epoch": 0.5160725234299729, "grad_norm": 0.4853368513917136, "learning_rate": 2.688564476885645e-05, "loss": 0.5597, "step": 17676 }, { "epoch": 0.5161017196578203, "grad_norm": 0.49955829663323714, "learning_rate": 2.688402270884023e-05, "loss": 0.5483, "step": 17677 }, { "epoch": 0.5161309158856676, "grad_norm": 0.5039154229118882, "learning_rate": 2.6882400648824007e-05, "loss": 0.5771, "step": 17678 }, { "epoch": 0.516160112113515, "grad_norm": 0.5523245737624635, "learning_rate": 2.6880778588807786e-05, "loss": 0.6915, "step": 17679 }, { "epoch": 0.5161893083413623, "grad_norm": 0.563751863213241, "learning_rate": 2.6879156528791567e-05, "loss": 0.6757, "step": 17680 }, { "epoch": 0.5162185045692097, "grad_norm": 0.4859051276918781, "learning_rate": 2.6877534468775346e-05, "loss": 0.5435, "step": 17681 }, { "epoch": 0.5162477007970571, "grad_norm": 0.47970101488147965, "learning_rate": 2.6875912408759124e-05, "loss": 0.5692, "step": 17682 }, { "epoch": 0.5162768970249044, "grad_norm": 0.49106683243144017, "learning_rate": 2.6874290348742903e-05, "loss": 0.5662, "step": 17683 }, { "epoch": 0.5163060932527518, "grad_norm": 0.5067474811513827, "learning_rate": 2.687266828872668e-05, "loss": 0.5806, "step": 17684 }, { "epoch": 0.5163352894805991, "grad_norm": 0.5058971074324711, "learning_rate": 2.6871046228710463e-05, "loss": 0.625, "step": 17685 }, { "epoch": 0.5163644857084465, "grad_norm": 0.5281787709951814, "learning_rate": 2.686942416869424e-05, "loss": 0.6094, "step": 17686 }, { "epoch": 0.5163936819362939, "grad_norm": 0.523707075183957, "learning_rate": 2.686780210867802e-05, "loss": 0.6278, "step": 17687 }, { "epoch": 0.5164228781641412, "grad_norm": 0.552638835891776, "learning_rate": 2.6866180048661805e-05, "loss": 0.6512, "step": 17688 }, { "epoch": 0.5164520743919886, "grad_norm": 0.5042457756939012, "learning_rate": 2.6864557988645583e-05, "loss": 0.5622, "step": 17689 }, { "epoch": 0.516481270619836, "grad_norm": 0.514504791182648, "learning_rate": 2.686293592862936e-05, "loss": 0.6175, "step": 17690 }, { "epoch": 0.5165104668476833, "grad_norm": 0.5443014114370681, "learning_rate": 2.6861313868613143e-05, "loss": 0.655, "step": 17691 }, { "epoch": 0.5165396630755307, "grad_norm": 0.4580451356602807, "learning_rate": 2.685969180859692e-05, "loss": 0.4875, "step": 17692 }, { "epoch": 0.516568859303378, "grad_norm": 0.5077633026797543, "learning_rate": 2.68580697485807e-05, "loss": 0.5788, "step": 17693 }, { "epoch": 0.5165980555312254, "grad_norm": 0.5001399822944905, "learning_rate": 2.6856447688564478e-05, "loss": 0.5732, "step": 17694 }, { "epoch": 0.5166272517590728, "grad_norm": 0.5152563075278451, "learning_rate": 2.6854825628548257e-05, "loss": 0.6252, "step": 17695 }, { "epoch": 0.5166564479869201, "grad_norm": 0.5357703351213025, "learning_rate": 2.685320356853204e-05, "loss": 0.6152, "step": 17696 }, { "epoch": 0.5166856442147675, "grad_norm": 0.5757595095040635, "learning_rate": 2.6851581508515817e-05, "loss": 0.6791, "step": 17697 }, { "epoch": 0.5167148404426148, "grad_norm": 0.5270889289304146, "learning_rate": 2.6849959448499595e-05, "loss": 0.6148, "step": 17698 }, { "epoch": 0.5167440366704622, "grad_norm": 0.526317821503914, "learning_rate": 2.6848337388483374e-05, "loss": 0.5963, "step": 17699 }, { "epoch": 0.5167732328983096, "grad_norm": 0.5518896514977287, "learning_rate": 2.6846715328467152e-05, "loss": 0.6955, "step": 17700 }, { "epoch": 0.5168024291261569, "grad_norm": 0.5445390235180934, "learning_rate": 2.6845093268450934e-05, "loss": 0.676, "step": 17701 }, { "epoch": 0.5168316253540043, "grad_norm": 0.49655927289969154, "learning_rate": 2.6843471208434712e-05, "loss": 0.5942, "step": 17702 }, { "epoch": 0.5168608215818516, "grad_norm": 0.5367299564167239, "learning_rate": 2.684184914841849e-05, "loss": 0.6128, "step": 17703 }, { "epoch": 0.516890017809699, "grad_norm": 0.5621642626188402, "learning_rate": 2.684022708840227e-05, "loss": 0.6871, "step": 17704 }, { "epoch": 0.5169192140375464, "grad_norm": 0.5474150927833352, "learning_rate": 2.683860502838605e-05, "loss": 0.692, "step": 17705 }, { "epoch": 0.5169484102653937, "grad_norm": 0.49518944554364336, "learning_rate": 2.6836982968369832e-05, "loss": 0.5874, "step": 17706 }, { "epoch": 0.5169776064932411, "grad_norm": 0.5100303659179363, "learning_rate": 2.6835360908353614e-05, "loss": 0.6003, "step": 17707 }, { "epoch": 0.5170068027210885, "grad_norm": 0.5256536730098892, "learning_rate": 2.6833738848337392e-05, "loss": 0.6014, "step": 17708 }, { "epoch": 0.5170359989489358, "grad_norm": 0.5040135909593947, "learning_rate": 2.683211678832117e-05, "loss": 0.5838, "step": 17709 }, { "epoch": 0.5170651951767832, "grad_norm": 0.5463239345501693, "learning_rate": 2.683049472830495e-05, "loss": 0.6343, "step": 17710 }, { "epoch": 0.5170943914046305, "grad_norm": 0.5221704239933611, "learning_rate": 2.682887266828873e-05, "loss": 0.5806, "step": 17711 }, { "epoch": 0.5171235876324779, "grad_norm": 0.5355411479220403, "learning_rate": 2.682725060827251e-05, "loss": 0.5587, "step": 17712 }, { "epoch": 0.5171527838603253, "grad_norm": 0.563705629503771, "learning_rate": 2.6825628548256288e-05, "loss": 0.682, "step": 17713 }, { "epoch": 0.5171819800881726, "grad_norm": 0.4845537411471105, "learning_rate": 2.6824006488240066e-05, "loss": 0.5557, "step": 17714 }, { "epoch": 0.51721117631602, "grad_norm": 0.524169645148023, "learning_rate": 2.6822384428223844e-05, "loss": 0.6201, "step": 17715 }, { "epoch": 0.5172403725438673, "grad_norm": 0.5212749417691893, "learning_rate": 2.6820762368207626e-05, "loss": 0.5972, "step": 17716 }, { "epoch": 0.5172695687717147, "grad_norm": 0.5147399432510362, "learning_rate": 2.6819140308191405e-05, "loss": 0.6117, "step": 17717 }, { "epoch": 0.5172987649995621, "grad_norm": 0.5599569513919648, "learning_rate": 2.6817518248175183e-05, "loss": 0.7069, "step": 17718 }, { "epoch": 0.5173279612274094, "grad_norm": 0.5436904892994698, "learning_rate": 2.681589618815896e-05, "loss": 0.6574, "step": 17719 }, { "epoch": 0.5173571574552568, "grad_norm": 0.5141961076671681, "learning_rate": 2.681427412814274e-05, "loss": 0.6063, "step": 17720 }, { "epoch": 0.5173863536831041, "grad_norm": 0.49515559620230637, "learning_rate": 2.681265206812652e-05, "loss": 0.5443, "step": 17721 }, { "epoch": 0.5174155499109515, "grad_norm": 0.5051685556041744, "learning_rate": 2.68110300081103e-05, "loss": 0.5753, "step": 17722 }, { "epoch": 0.5174447461387989, "grad_norm": 0.5106505055278736, "learning_rate": 2.6809407948094078e-05, "loss": 0.579, "step": 17723 }, { "epoch": 0.5174739423666462, "grad_norm": 0.49952061743224774, "learning_rate": 2.6807785888077857e-05, "loss": 0.564, "step": 17724 }, { "epoch": 0.5175031385944936, "grad_norm": 0.5159968734478493, "learning_rate": 2.6806163828061642e-05, "loss": 0.6087, "step": 17725 }, { "epoch": 0.517532334822341, "grad_norm": 0.5322137075278718, "learning_rate": 2.680454176804542e-05, "loss": 0.5875, "step": 17726 }, { "epoch": 0.5175615310501883, "grad_norm": 0.4983653235644286, "learning_rate": 2.6802919708029202e-05, "loss": 0.5576, "step": 17727 }, { "epoch": 0.5175907272780357, "grad_norm": 0.5714894588654851, "learning_rate": 2.680129764801298e-05, "loss": 0.7045, "step": 17728 }, { "epoch": 0.517619923505883, "grad_norm": 0.5410537048669665, "learning_rate": 2.679967558799676e-05, "loss": 0.6412, "step": 17729 }, { "epoch": 0.5176491197337304, "grad_norm": 0.5542385815054313, "learning_rate": 2.6798053527980537e-05, "loss": 0.605, "step": 17730 }, { "epoch": 0.5176783159615778, "grad_norm": 0.5136887211753997, "learning_rate": 2.6796431467964315e-05, "loss": 0.5653, "step": 17731 }, { "epoch": 0.5177075121894251, "grad_norm": 0.5460098310795063, "learning_rate": 2.6794809407948097e-05, "loss": 0.6319, "step": 17732 }, { "epoch": 0.5177367084172725, "grad_norm": 0.5054349489577928, "learning_rate": 2.6793187347931875e-05, "loss": 0.5541, "step": 17733 }, { "epoch": 0.5177659046451198, "grad_norm": 0.5303711845485035, "learning_rate": 2.6791565287915654e-05, "loss": 0.6416, "step": 17734 }, { "epoch": 0.5177951008729672, "grad_norm": 0.5121971479218687, "learning_rate": 2.6789943227899432e-05, "loss": 0.6071, "step": 17735 }, { "epoch": 0.5178242971008146, "grad_norm": 0.5535593744093141, "learning_rate": 2.6788321167883214e-05, "loss": 0.6805, "step": 17736 }, { "epoch": 0.5178534933286619, "grad_norm": 0.5047572837554586, "learning_rate": 2.6786699107866992e-05, "loss": 0.5836, "step": 17737 }, { "epoch": 0.5178826895565093, "grad_norm": 0.5326188660430649, "learning_rate": 2.678507704785077e-05, "loss": 0.6382, "step": 17738 }, { "epoch": 0.5179118857843567, "grad_norm": 0.559001530744115, "learning_rate": 2.678345498783455e-05, "loss": 0.6457, "step": 17739 }, { "epoch": 0.517941082012204, "grad_norm": 0.53548140212038, "learning_rate": 2.6781832927818328e-05, "loss": 0.6474, "step": 17740 }, { "epoch": 0.5179702782400514, "grad_norm": 0.5301385015849248, "learning_rate": 2.678021086780211e-05, "loss": 0.6612, "step": 17741 }, { "epoch": 0.5179994744678987, "grad_norm": 0.5263056194706709, "learning_rate": 2.6778588807785888e-05, "loss": 0.6441, "step": 17742 }, { "epoch": 0.5180286706957461, "grad_norm": 0.4888707096330124, "learning_rate": 2.6776966747769666e-05, "loss": 0.5551, "step": 17743 }, { "epoch": 0.5180578669235935, "grad_norm": 0.5659639783931616, "learning_rate": 2.677534468775345e-05, "loss": 0.6412, "step": 17744 }, { "epoch": 0.5180870631514408, "grad_norm": 0.4771373127139971, "learning_rate": 2.677372262773723e-05, "loss": 0.5488, "step": 17745 }, { "epoch": 0.5181162593792882, "grad_norm": 0.5313317950918851, "learning_rate": 2.6772100567721008e-05, "loss": 0.6263, "step": 17746 }, { "epoch": 0.5181454556071355, "grad_norm": 0.5353895147492489, "learning_rate": 2.677047850770479e-05, "loss": 0.6156, "step": 17747 }, { "epoch": 0.5181746518349829, "grad_norm": 0.5700183100037606, "learning_rate": 2.6768856447688568e-05, "loss": 0.6776, "step": 17748 }, { "epoch": 0.5182038480628303, "grad_norm": 0.5224683092577252, "learning_rate": 2.6767234387672346e-05, "loss": 0.5894, "step": 17749 }, { "epoch": 0.5182330442906776, "grad_norm": 0.5025485998972419, "learning_rate": 2.6765612327656125e-05, "loss": 0.4935, "step": 17750 }, { "epoch": 0.518262240518525, "grad_norm": 0.5272719954895562, "learning_rate": 2.6763990267639903e-05, "loss": 0.6272, "step": 17751 }, { "epoch": 0.5182914367463723, "grad_norm": 0.5163982219910052, "learning_rate": 2.6762368207623685e-05, "loss": 0.585, "step": 17752 }, { "epoch": 0.5183206329742197, "grad_norm": 0.5368375227880291, "learning_rate": 2.6760746147607463e-05, "loss": 0.5827, "step": 17753 }, { "epoch": 0.5183498292020671, "grad_norm": 0.5447594196735768, "learning_rate": 2.675912408759124e-05, "loss": 0.7051, "step": 17754 }, { "epoch": 0.5183790254299144, "grad_norm": 0.5524894129675375, "learning_rate": 2.675750202757502e-05, "loss": 0.6729, "step": 17755 }, { "epoch": 0.5184082216577618, "grad_norm": 0.5270514224073378, "learning_rate": 2.67558799675588e-05, "loss": 0.6089, "step": 17756 }, { "epoch": 0.5184374178856092, "grad_norm": 0.5303158229814438, "learning_rate": 2.675425790754258e-05, "loss": 0.6115, "step": 17757 }, { "epoch": 0.5184666141134565, "grad_norm": 0.5365224065711085, "learning_rate": 2.675263584752636e-05, "loss": 0.6459, "step": 17758 }, { "epoch": 0.5184958103413039, "grad_norm": 0.5093267640691579, "learning_rate": 2.6751013787510137e-05, "loss": 0.6142, "step": 17759 }, { "epoch": 0.5185250065691512, "grad_norm": 0.555511767704749, "learning_rate": 2.6749391727493915e-05, "loss": 0.673, "step": 17760 }, { "epoch": 0.5185542027969986, "grad_norm": 0.5181020461753745, "learning_rate": 2.6747769667477697e-05, "loss": 0.607, "step": 17761 }, { "epoch": 0.518583399024846, "grad_norm": 0.5754816009926627, "learning_rate": 2.6746147607461475e-05, "loss": 0.604, "step": 17762 }, { "epoch": 0.5186125952526933, "grad_norm": 0.5226476328801455, "learning_rate": 2.674452554744526e-05, "loss": 0.6489, "step": 17763 }, { "epoch": 0.5186417914805407, "grad_norm": 0.5286934919033711, "learning_rate": 2.674290348742904e-05, "loss": 0.6146, "step": 17764 }, { "epoch": 0.518670987708388, "grad_norm": 0.4996819744422972, "learning_rate": 2.6741281427412817e-05, "loss": 0.5619, "step": 17765 }, { "epoch": 0.5187001839362354, "grad_norm": 0.5570680579975413, "learning_rate": 2.6739659367396596e-05, "loss": 0.7334, "step": 17766 }, { "epoch": 0.5187293801640828, "grad_norm": 0.48921319815823316, "learning_rate": 2.6738037307380377e-05, "loss": 0.4878, "step": 17767 }, { "epoch": 0.5187585763919301, "grad_norm": 0.5586007703234646, "learning_rate": 2.6736415247364156e-05, "loss": 0.6752, "step": 17768 }, { "epoch": 0.5187877726197775, "grad_norm": 0.5229482870433259, "learning_rate": 2.6734793187347934e-05, "loss": 0.621, "step": 17769 }, { "epoch": 0.5188169688476248, "grad_norm": 0.5317005376198902, "learning_rate": 2.6733171127331713e-05, "loss": 0.6595, "step": 17770 }, { "epoch": 0.5188461650754722, "grad_norm": 0.538107063491384, "learning_rate": 2.673154906731549e-05, "loss": 0.6569, "step": 17771 }, { "epoch": 0.5188753613033196, "grad_norm": 0.5921212774697023, "learning_rate": 2.6729927007299273e-05, "loss": 0.7114, "step": 17772 }, { "epoch": 0.5189045575311669, "grad_norm": 0.5133558503036965, "learning_rate": 2.672830494728305e-05, "loss": 0.5957, "step": 17773 }, { "epoch": 0.5189337537590143, "grad_norm": 0.5089069258521126, "learning_rate": 2.672668288726683e-05, "loss": 0.6001, "step": 17774 }, { "epoch": 0.5189629499868617, "grad_norm": 0.5491174757786881, "learning_rate": 2.6725060827250608e-05, "loss": 0.6705, "step": 17775 }, { "epoch": 0.518992146214709, "grad_norm": 0.5304889596025373, "learning_rate": 2.6723438767234386e-05, "loss": 0.6288, "step": 17776 }, { "epoch": 0.5190213424425564, "grad_norm": 0.5579353730287769, "learning_rate": 2.6721816707218168e-05, "loss": 0.699, "step": 17777 }, { "epoch": 0.5190505386704037, "grad_norm": 0.5508021747898244, "learning_rate": 2.6720194647201946e-05, "loss": 0.6392, "step": 17778 }, { "epoch": 0.5190797348982511, "grad_norm": 0.5179042821627108, "learning_rate": 2.6718572587185725e-05, "loss": 0.6148, "step": 17779 }, { "epoch": 0.5191089311260985, "grad_norm": 0.48714344219697675, "learning_rate": 2.6716950527169503e-05, "loss": 0.5413, "step": 17780 }, { "epoch": 0.5191381273539458, "grad_norm": 0.5413199333500145, "learning_rate": 2.6715328467153285e-05, "loss": 0.6396, "step": 17781 }, { "epoch": 0.5191673235817932, "grad_norm": 0.5272744834717619, "learning_rate": 2.6713706407137067e-05, "loss": 0.6144, "step": 17782 }, { "epoch": 0.5191965198096405, "grad_norm": 0.5100085783382189, "learning_rate": 2.671208434712085e-05, "loss": 0.5511, "step": 17783 }, { "epoch": 0.5192257160374879, "grad_norm": 0.4993579066182441, "learning_rate": 2.6710462287104627e-05, "loss": 0.5463, "step": 17784 }, { "epoch": 0.5192549122653353, "grad_norm": 0.5267342882222381, "learning_rate": 2.6708840227088405e-05, "loss": 0.6149, "step": 17785 }, { "epoch": 0.5192841084931826, "grad_norm": 0.5449561738251968, "learning_rate": 2.6707218167072184e-05, "loss": 0.6731, "step": 17786 }, { "epoch": 0.51931330472103, "grad_norm": 0.598569150023352, "learning_rate": 2.6705596107055962e-05, "loss": 0.617, "step": 17787 }, { "epoch": 0.5193425009488774, "grad_norm": 0.5703285379433749, "learning_rate": 2.6703974047039744e-05, "loss": 0.7612, "step": 17788 }, { "epoch": 0.5193716971767248, "grad_norm": 0.5003512733930687, "learning_rate": 2.6702351987023522e-05, "loss": 0.606, "step": 17789 }, { "epoch": 0.5194008934045722, "grad_norm": 0.47374508102908774, "learning_rate": 2.67007299270073e-05, "loss": 0.5526, "step": 17790 }, { "epoch": 0.5194300896324195, "grad_norm": 0.5826348241561317, "learning_rate": 2.669910786699108e-05, "loss": 0.7199, "step": 17791 }, { "epoch": 0.5194592858602669, "grad_norm": 0.5177578725930764, "learning_rate": 2.669748580697486e-05, "loss": 0.641, "step": 17792 }, { "epoch": 0.5194884820881143, "grad_norm": 0.5350282683090867, "learning_rate": 2.669586374695864e-05, "loss": 0.6022, "step": 17793 }, { "epoch": 0.5195176783159616, "grad_norm": 0.574277040842001, "learning_rate": 2.6694241686942417e-05, "loss": 0.7281, "step": 17794 }, { "epoch": 0.519546874543809, "grad_norm": 0.5481438471332897, "learning_rate": 2.6692619626926196e-05, "loss": 0.6827, "step": 17795 }, { "epoch": 0.5195760707716564, "grad_norm": 0.5316919853781447, "learning_rate": 2.6690997566909974e-05, "loss": 0.6602, "step": 17796 }, { "epoch": 0.5196052669995037, "grad_norm": 0.5908453461289297, "learning_rate": 2.6689375506893756e-05, "loss": 0.703, "step": 17797 }, { "epoch": 0.5196344632273511, "grad_norm": 0.5261697555136038, "learning_rate": 2.6687753446877534e-05, "loss": 0.6291, "step": 17798 }, { "epoch": 0.5196636594551984, "grad_norm": 0.5029050115203838, "learning_rate": 2.6686131386861313e-05, "loss": 0.577, "step": 17799 }, { "epoch": 0.5196928556830458, "grad_norm": 0.5198228713327743, "learning_rate": 2.668450932684509e-05, "loss": 0.6073, "step": 17800 }, { "epoch": 0.5197220519108932, "grad_norm": 0.5703713026967493, "learning_rate": 2.6682887266828876e-05, "loss": 0.6239, "step": 17801 }, { "epoch": 0.5197512481387405, "grad_norm": 0.5395880932378695, "learning_rate": 2.6681265206812654e-05, "loss": 0.6374, "step": 17802 }, { "epoch": 0.5197804443665879, "grad_norm": 0.538205909377737, "learning_rate": 2.6679643146796436e-05, "loss": 0.6445, "step": 17803 }, { "epoch": 0.5198096405944352, "grad_norm": 0.5204255324872709, "learning_rate": 2.6678021086780215e-05, "loss": 0.5776, "step": 17804 }, { "epoch": 0.5198388368222826, "grad_norm": 0.5986085066416256, "learning_rate": 2.6676399026763993e-05, "loss": 0.6937, "step": 17805 }, { "epoch": 0.51986803305013, "grad_norm": 0.5195397964228394, "learning_rate": 2.667477696674777e-05, "loss": 0.6207, "step": 17806 }, { "epoch": 0.5198972292779773, "grad_norm": 0.529540798825777, "learning_rate": 2.667315490673155e-05, "loss": 0.6119, "step": 17807 }, { "epoch": 0.5199264255058247, "grad_norm": 0.5171065129430906, "learning_rate": 2.667153284671533e-05, "loss": 0.5723, "step": 17808 }, { "epoch": 0.519955621733672, "grad_norm": 0.5561340097440681, "learning_rate": 2.666991078669911e-05, "loss": 0.5989, "step": 17809 }, { "epoch": 0.5199848179615194, "grad_norm": 0.504676940252611, "learning_rate": 2.6668288726682888e-05, "loss": 0.5886, "step": 17810 }, { "epoch": 0.5200140141893668, "grad_norm": 0.5018355287449652, "learning_rate": 2.6666666666666667e-05, "loss": 0.5338, "step": 17811 }, { "epoch": 0.5200432104172141, "grad_norm": 0.539140695940694, "learning_rate": 2.666504460665045e-05, "loss": 0.6121, "step": 17812 }, { "epoch": 0.5200724066450615, "grad_norm": 0.540331212303838, "learning_rate": 2.6663422546634227e-05, "loss": 0.6401, "step": 17813 }, { "epoch": 0.5201016028729089, "grad_norm": 0.5424027706560874, "learning_rate": 2.6661800486618005e-05, "loss": 0.6416, "step": 17814 }, { "epoch": 0.5201307991007562, "grad_norm": 0.5286092608689895, "learning_rate": 2.6660178426601783e-05, "loss": 0.632, "step": 17815 }, { "epoch": 0.5201599953286036, "grad_norm": 0.5241184188185433, "learning_rate": 2.6658556366585562e-05, "loss": 0.5968, "step": 17816 }, { "epoch": 0.5201891915564509, "grad_norm": 0.5756081055897905, "learning_rate": 2.6656934306569344e-05, "loss": 0.6, "step": 17817 }, { "epoch": 0.5202183877842983, "grad_norm": 0.5842878430348954, "learning_rate": 2.6655312246553122e-05, "loss": 0.669, "step": 17818 }, { "epoch": 0.5202475840121457, "grad_norm": 0.5227368961049969, "learning_rate": 2.66536901865369e-05, "loss": 0.6498, "step": 17819 }, { "epoch": 0.520276780239993, "grad_norm": 0.5869598436359779, "learning_rate": 2.6652068126520686e-05, "loss": 0.6602, "step": 17820 }, { "epoch": 0.5203059764678404, "grad_norm": 0.5014990515423704, "learning_rate": 2.6650446066504464e-05, "loss": 0.5831, "step": 17821 }, { "epoch": 0.5203351726956877, "grad_norm": 0.5258690792441074, "learning_rate": 2.6648824006488242e-05, "loss": 0.631, "step": 17822 }, { "epoch": 0.5203643689235351, "grad_norm": 0.5298136536637768, "learning_rate": 2.6647201946472024e-05, "loss": 0.6167, "step": 17823 }, { "epoch": 0.5203935651513825, "grad_norm": 0.5116161513559386, "learning_rate": 2.6645579886455802e-05, "loss": 0.6094, "step": 17824 }, { "epoch": 0.5204227613792298, "grad_norm": 0.5038811424559384, "learning_rate": 2.664395782643958e-05, "loss": 0.5611, "step": 17825 }, { "epoch": 0.5204519576070772, "grad_norm": 0.5714201147924902, "learning_rate": 2.664233576642336e-05, "loss": 0.6764, "step": 17826 }, { "epoch": 0.5204811538349245, "grad_norm": 0.5339096706892853, "learning_rate": 2.6640713706407138e-05, "loss": 0.6334, "step": 17827 }, { "epoch": 0.5205103500627719, "grad_norm": 0.5425282440784936, "learning_rate": 2.663909164639092e-05, "loss": 0.6743, "step": 17828 }, { "epoch": 0.5205395462906193, "grad_norm": 0.5353066396566879, "learning_rate": 2.6637469586374698e-05, "loss": 0.6407, "step": 17829 }, { "epoch": 0.5205687425184666, "grad_norm": 0.48097753491897616, "learning_rate": 2.6635847526358476e-05, "loss": 0.5266, "step": 17830 }, { "epoch": 0.520597938746314, "grad_norm": 0.5352649862647239, "learning_rate": 2.6634225466342254e-05, "loss": 0.6619, "step": 17831 }, { "epoch": 0.5206271349741614, "grad_norm": 0.5224227469724454, "learning_rate": 2.6632603406326033e-05, "loss": 0.6504, "step": 17832 }, { "epoch": 0.5206563312020087, "grad_norm": 0.5203322779682652, "learning_rate": 2.6630981346309815e-05, "loss": 0.548, "step": 17833 }, { "epoch": 0.5206855274298561, "grad_norm": 0.5510434462532193, "learning_rate": 2.6629359286293593e-05, "loss": 0.6094, "step": 17834 }, { "epoch": 0.5207147236577034, "grad_norm": 0.5779236221272155, "learning_rate": 2.662773722627737e-05, "loss": 0.665, "step": 17835 }, { "epoch": 0.5207439198855508, "grad_norm": 0.5363093000520952, "learning_rate": 2.662611516626115e-05, "loss": 0.6049, "step": 17836 }, { "epoch": 0.5207731161133982, "grad_norm": 0.558623016748157, "learning_rate": 2.662449310624493e-05, "loss": 0.6582, "step": 17837 }, { "epoch": 0.5208023123412455, "grad_norm": 0.5574727185222095, "learning_rate": 2.662287104622871e-05, "loss": 0.6605, "step": 17838 }, { "epoch": 0.5208315085690929, "grad_norm": 0.49475400992318663, "learning_rate": 2.6621248986212495e-05, "loss": 0.5765, "step": 17839 }, { "epoch": 0.5208607047969402, "grad_norm": 0.5071588656585411, "learning_rate": 2.6619626926196273e-05, "loss": 0.6247, "step": 17840 }, { "epoch": 0.5208899010247876, "grad_norm": 0.48603812902188953, "learning_rate": 2.661800486618005e-05, "loss": 0.567, "step": 17841 }, { "epoch": 0.520919097252635, "grad_norm": 0.5384989009774871, "learning_rate": 2.661638280616383e-05, "loss": 0.6051, "step": 17842 }, { "epoch": 0.5209482934804823, "grad_norm": 0.49739682035847216, "learning_rate": 2.661476074614761e-05, "loss": 0.5853, "step": 17843 }, { "epoch": 0.5209774897083297, "grad_norm": 0.5698824871172579, "learning_rate": 2.661313868613139e-05, "loss": 0.7172, "step": 17844 }, { "epoch": 0.521006685936177, "grad_norm": 0.5189775582147967, "learning_rate": 2.661151662611517e-05, "loss": 0.6252, "step": 17845 }, { "epoch": 0.5210358821640244, "grad_norm": 0.5258535685432986, "learning_rate": 2.6609894566098947e-05, "loss": 0.6347, "step": 17846 }, { "epoch": 0.5210650783918718, "grad_norm": 0.5428898437510097, "learning_rate": 2.6608272506082725e-05, "loss": 0.6303, "step": 17847 }, { "epoch": 0.5210942746197191, "grad_norm": 0.5020773126793738, "learning_rate": 2.6606650446066507e-05, "loss": 0.5607, "step": 17848 }, { "epoch": 0.5211234708475665, "grad_norm": 0.5379965439833709, "learning_rate": 2.6605028386050285e-05, "loss": 0.6243, "step": 17849 }, { "epoch": 0.5211526670754139, "grad_norm": 0.5506670273476462, "learning_rate": 2.6603406326034064e-05, "loss": 0.5775, "step": 17850 }, { "epoch": 0.5211818633032612, "grad_norm": 0.4928296629873311, "learning_rate": 2.6601784266017842e-05, "loss": 0.5694, "step": 17851 }, { "epoch": 0.5212110595311086, "grad_norm": 0.5104270856226178, "learning_rate": 2.660016220600162e-05, "loss": 0.548, "step": 17852 }, { "epoch": 0.5212402557589559, "grad_norm": 0.5035147401016651, "learning_rate": 2.6598540145985402e-05, "loss": 0.5662, "step": 17853 }, { "epoch": 0.5212694519868033, "grad_norm": 0.5269662601545353, "learning_rate": 2.659691808596918e-05, "loss": 0.6475, "step": 17854 }, { "epoch": 0.5212986482146507, "grad_norm": 0.5078539485501428, "learning_rate": 2.659529602595296e-05, "loss": 0.6184, "step": 17855 }, { "epoch": 0.521327844442498, "grad_norm": 0.5017641447305164, "learning_rate": 2.6593673965936737e-05, "loss": 0.5516, "step": 17856 }, { "epoch": 0.5213570406703454, "grad_norm": 0.5354880844054534, "learning_rate": 2.6592051905920523e-05, "loss": 0.5946, "step": 17857 }, { "epoch": 0.5213862368981927, "grad_norm": 0.5120043013691885, "learning_rate": 2.65904298459043e-05, "loss": 0.6049, "step": 17858 }, { "epoch": 0.5214154331260401, "grad_norm": 0.5451935365550569, "learning_rate": 2.6588807785888083e-05, "loss": 0.6626, "step": 17859 }, { "epoch": 0.5214446293538875, "grad_norm": 0.5475812869990994, "learning_rate": 2.658718572587186e-05, "loss": 0.6652, "step": 17860 }, { "epoch": 0.5214738255817348, "grad_norm": 0.5592988002104989, "learning_rate": 2.658556366585564e-05, "loss": 0.7062, "step": 17861 }, { "epoch": 0.5215030218095822, "grad_norm": 0.551697815811138, "learning_rate": 2.6583941605839418e-05, "loss": 0.701, "step": 17862 }, { "epoch": 0.5215322180374296, "grad_norm": 0.4921266725301012, "learning_rate": 2.6582319545823196e-05, "loss": 0.5374, "step": 17863 }, { "epoch": 0.5215614142652769, "grad_norm": 0.5522342140076608, "learning_rate": 2.6580697485806978e-05, "loss": 0.6523, "step": 17864 }, { "epoch": 0.5215906104931243, "grad_norm": 0.5292685565123771, "learning_rate": 2.6579075425790756e-05, "loss": 0.6613, "step": 17865 }, { "epoch": 0.5216198067209716, "grad_norm": 0.5315318730582741, "learning_rate": 2.6577453365774535e-05, "loss": 0.6472, "step": 17866 }, { "epoch": 0.521649002948819, "grad_norm": 0.5396359617425047, "learning_rate": 2.6575831305758313e-05, "loss": 0.6549, "step": 17867 }, { "epoch": 0.5216781991766664, "grad_norm": 0.4925132543402344, "learning_rate": 2.6574209245742095e-05, "loss": 0.5255, "step": 17868 }, { "epoch": 0.5217073954045137, "grad_norm": 0.5479553748320162, "learning_rate": 2.6572587185725873e-05, "loss": 0.6479, "step": 17869 }, { "epoch": 0.5217365916323611, "grad_norm": 0.5092657878303115, "learning_rate": 2.657096512570965e-05, "loss": 0.5815, "step": 17870 }, { "epoch": 0.5217657878602084, "grad_norm": 0.5992010894599898, "learning_rate": 2.656934306569343e-05, "loss": 0.5884, "step": 17871 }, { "epoch": 0.5217949840880558, "grad_norm": 0.5701166062691566, "learning_rate": 2.656772100567721e-05, "loss": 0.6758, "step": 17872 }, { "epoch": 0.5218241803159032, "grad_norm": 0.47584708688392546, "learning_rate": 2.656609894566099e-05, "loss": 0.4974, "step": 17873 }, { "epoch": 0.5218533765437505, "grad_norm": 0.5191462163528157, "learning_rate": 2.656447688564477e-05, "loss": 0.66, "step": 17874 }, { "epoch": 0.5218825727715979, "grad_norm": 0.5694185048949836, "learning_rate": 2.6562854825628547e-05, "loss": 0.6809, "step": 17875 }, { "epoch": 0.5219117689994452, "grad_norm": 0.5099602399578113, "learning_rate": 2.6561232765612332e-05, "loss": 0.6151, "step": 17876 }, { "epoch": 0.5219409652272926, "grad_norm": 0.5766556794203641, "learning_rate": 2.655961070559611e-05, "loss": 0.7777, "step": 17877 }, { "epoch": 0.52197016145514, "grad_norm": 0.47388167754684507, "learning_rate": 2.655798864557989e-05, "loss": 0.541, "step": 17878 }, { "epoch": 0.5219993576829873, "grad_norm": 0.514578655300012, "learning_rate": 2.655636658556367e-05, "loss": 0.6187, "step": 17879 }, { "epoch": 0.5220285539108347, "grad_norm": 0.5171644282332034, "learning_rate": 2.655474452554745e-05, "loss": 0.6093, "step": 17880 }, { "epoch": 0.522057750138682, "grad_norm": 0.5234843402837585, "learning_rate": 2.6553122465531227e-05, "loss": 0.6022, "step": 17881 }, { "epoch": 0.5220869463665294, "grad_norm": 0.5534291587895375, "learning_rate": 2.6551500405515006e-05, "loss": 0.6457, "step": 17882 }, { "epoch": 0.5221161425943768, "grad_norm": 0.5034227784374332, "learning_rate": 2.6549878345498784e-05, "loss": 0.5643, "step": 17883 }, { "epoch": 0.5221453388222241, "grad_norm": 0.4950893283896473, "learning_rate": 2.6548256285482566e-05, "loss": 0.5648, "step": 17884 }, { "epoch": 0.5221745350500715, "grad_norm": 0.5130563331150206, "learning_rate": 2.6546634225466344e-05, "loss": 0.6555, "step": 17885 }, { "epoch": 0.5222037312779189, "grad_norm": 0.5167212559783788, "learning_rate": 2.6545012165450123e-05, "loss": 0.5631, "step": 17886 }, { "epoch": 0.5222329275057662, "grad_norm": 0.5046841822704644, "learning_rate": 2.65433901054339e-05, "loss": 0.5867, "step": 17887 }, { "epoch": 0.5222621237336136, "grad_norm": 0.49460143034999166, "learning_rate": 2.654176804541768e-05, "loss": 0.5836, "step": 17888 }, { "epoch": 0.5222913199614609, "grad_norm": 0.5409378226629578, "learning_rate": 2.654014598540146e-05, "loss": 0.6545, "step": 17889 }, { "epoch": 0.5223205161893083, "grad_norm": 0.5155479012157823, "learning_rate": 2.653852392538524e-05, "loss": 0.6013, "step": 17890 }, { "epoch": 0.5223497124171557, "grad_norm": 0.5358695808083684, "learning_rate": 2.6536901865369018e-05, "loss": 0.5927, "step": 17891 }, { "epoch": 0.522378908645003, "grad_norm": 0.5288506864912011, "learning_rate": 2.6535279805352796e-05, "loss": 0.63, "step": 17892 }, { "epoch": 0.5224081048728504, "grad_norm": 0.5307764160653521, "learning_rate": 2.6533657745336578e-05, "loss": 0.6168, "step": 17893 }, { "epoch": 0.5224373011006977, "grad_norm": 0.4773055930213435, "learning_rate": 2.6532035685320356e-05, "loss": 0.5367, "step": 17894 }, { "epoch": 0.5224664973285451, "grad_norm": 0.5510423470355211, "learning_rate": 2.653041362530414e-05, "loss": 0.6638, "step": 17895 }, { "epoch": 0.5224956935563925, "grad_norm": 0.50356353860053, "learning_rate": 2.652879156528792e-05, "loss": 0.6009, "step": 17896 }, { "epoch": 0.5225248897842398, "grad_norm": 0.5414660543899161, "learning_rate": 2.6527169505271698e-05, "loss": 0.6372, "step": 17897 }, { "epoch": 0.5225540860120872, "grad_norm": 0.513437858532963, "learning_rate": 2.6525547445255477e-05, "loss": 0.6225, "step": 17898 }, { "epoch": 0.5225832822399346, "grad_norm": 0.568911762146105, "learning_rate": 2.6523925385239255e-05, "loss": 0.6632, "step": 17899 }, { "epoch": 0.5226124784677819, "grad_norm": 0.5255658328270807, "learning_rate": 2.6522303325223037e-05, "loss": 0.6135, "step": 17900 }, { "epoch": 0.5226416746956293, "grad_norm": 0.5579917047225764, "learning_rate": 2.6520681265206815e-05, "loss": 0.5857, "step": 17901 }, { "epoch": 0.5226708709234766, "grad_norm": 0.5168801195803823, "learning_rate": 2.6519059205190593e-05, "loss": 0.6041, "step": 17902 }, { "epoch": 0.522700067151324, "grad_norm": 0.501173901458174, "learning_rate": 2.6517437145174372e-05, "loss": 0.5448, "step": 17903 }, { "epoch": 0.5227292633791714, "grad_norm": 0.5077502346219541, "learning_rate": 2.6515815085158154e-05, "loss": 0.5901, "step": 17904 }, { "epoch": 0.5227584596070187, "grad_norm": 0.5209495457596184, "learning_rate": 2.6514193025141932e-05, "loss": 0.5938, "step": 17905 }, { "epoch": 0.5227876558348661, "grad_norm": 0.5166778787862885, "learning_rate": 2.651257096512571e-05, "loss": 0.6027, "step": 17906 }, { "epoch": 0.5228168520627134, "grad_norm": 0.5264153264470658, "learning_rate": 2.651094890510949e-05, "loss": 0.6236, "step": 17907 }, { "epoch": 0.5228460482905608, "grad_norm": 0.5435769611786403, "learning_rate": 2.6509326845093267e-05, "loss": 0.6392, "step": 17908 }, { "epoch": 0.5228752445184082, "grad_norm": 0.5029992707477, "learning_rate": 2.650770478507705e-05, "loss": 0.5751, "step": 17909 }, { "epoch": 0.5229044407462556, "grad_norm": 0.5332655093153121, "learning_rate": 2.6506082725060827e-05, "loss": 0.6538, "step": 17910 }, { "epoch": 0.522933636974103, "grad_norm": 0.5095051178215799, "learning_rate": 2.6504460665044606e-05, "loss": 0.5729, "step": 17911 }, { "epoch": 0.5229628332019504, "grad_norm": 0.4980731126462855, "learning_rate": 2.6502838605028384e-05, "loss": 0.5542, "step": 17912 }, { "epoch": 0.5229920294297977, "grad_norm": 0.4669333727248244, "learning_rate": 2.6501216545012166e-05, "loss": 0.5075, "step": 17913 }, { "epoch": 0.5230212256576451, "grad_norm": 0.5226712567801084, "learning_rate": 2.6499594484995948e-05, "loss": 0.6057, "step": 17914 }, { "epoch": 0.5230504218854924, "grad_norm": 0.5835545692406239, "learning_rate": 2.649797242497973e-05, "loss": 0.7802, "step": 17915 }, { "epoch": 0.5230796181133398, "grad_norm": 0.503016644334242, "learning_rate": 2.6496350364963508e-05, "loss": 0.6164, "step": 17916 }, { "epoch": 0.5231088143411872, "grad_norm": 0.5108937197664579, "learning_rate": 2.6494728304947286e-05, "loss": 0.5423, "step": 17917 }, { "epoch": 0.5231380105690345, "grad_norm": 0.5264491452342709, "learning_rate": 2.6493106244931064e-05, "loss": 0.5957, "step": 17918 }, { "epoch": 0.5231672067968819, "grad_norm": 0.5057915702247634, "learning_rate": 2.6491484184914843e-05, "loss": 0.5716, "step": 17919 }, { "epoch": 0.5231964030247293, "grad_norm": 0.5550812376331422, "learning_rate": 2.6489862124898625e-05, "loss": 0.6579, "step": 17920 }, { "epoch": 0.5232255992525766, "grad_norm": 0.5372099082402502, "learning_rate": 2.6488240064882403e-05, "loss": 0.6499, "step": 17921 }, { "epoch": 0.523254795480424, "grad_norm": 0.4757973730143669, "learning_rate": 2.648661800486618e-05, "loss": 0.5191, "step": 17922 }, { "epoch": 0.5232839917082713, "grad_norm": 0.5972426759971948, "learning_rate": 2.648499594484996e-05, "loss": 0.6599, "step": 17923 }, { "epoch": 0.5233131879361187, "grad_norm": 0.5169451226186372, "learning_rate": 2.648337388483374e-05, "loss": 0.5926, "step": 17924 }, { "epoch": 0.5233423841639661, "grad_norm": 0.525026825124284, "learning_rate": 2.648175182481752e-05, "loss": 0.628, "step": 17925 }, { "epoch": 0.5233715803918134, "grad_norm": 0.5139092833248954, "learning_rate": 2.6480129764801298e-05, "loss": 0.5852, "step": 17926 }, { "epoch": 0.5234007766196608, "grad_norm": 0.4918162202903885, "learning_rate": 2.6478507704785077e-05, "loss": 0.5482, "step": 17927 }, { "epoch": 0.5234299728475081, "grad_norm": 0.5177838796471965, "learning_rate": 2.6476885644768855e-05, "loss": 0.6148, "step": 17928 }, { "epoch": 0.5234591690753555, "grad_norm": 0.5296616051315601, "learning_rate": 2.6475263584752637e-05, "loss": 0.6471, "step": 17929 }, { "epoch": 0.5234883653032029, "grad_norm": 0.5119334751159964, "learning_rate": 2.6473641524736415e-05, "loss": 0.6159, "step": 17930 }, { "epoch": 0.5235175615310502, "grad_norm": 0.5504823560495901, "learning_rate": 2.6472019464720193e-05, "loss": 0.6341, "step": 17931 }, { "epoch": 0.5235467577588976, "grad_norm": 0.5456438050905075, "learning_rate": 2.6470397404703972e-05, "loss": 0.6729, "step": 17932 }, { "epoch": 0.523575953986745, "grad_norm": 0.5167373832232703, "learning_rate": 2.6468775344687757e-05, "loss": 0.592, "step": 17933 }, { "epoch": 0.5236051502145923, "grad_norm": 0.5176177103525823, "learning_rate": 2.6467153284671535e-05, "loss": 0.558, "step": 17934 }, { "epoch": 0.5236343464424397, "grad_norm": 0.5328450359783203, "learning_rate": 2.6465531224655317e-05, "loss": 0.639, "step": 17935 }, { "epoch": 0.523663542670287, "grad_norm": 0.5639051305583264, "learning_rate": 2.6463909164639095e-05, "loss": 0.7211, "step": 17936 }, { "epoch": 0.5236927388981344, "grad_norm": 0.5468490239243252, "learning_rate": 2.6462287104622874e-05, "loss": 0.655, "step": 17937 }, { "epoch": 0.5237219351259818, "grad_norm": 0.529127841117322, "learning_rate": 2.6460665044606652e-05, "loss": 0.6546, "step": 17938 }, { "epoch": 0.5237511313538291, "grad_norm": 0.5166437690929027, "learning_rate": 2.645904298459043e-05, "loss": 0.5759, "step": 17939 }, { "epoch": 0.5237803275816765, "grad_norm": 0.5500004334592551, "learning_rate": 2.6457420924574212e-05, "loss": 0.6636, "step": 17940 }, { "epoch": 0.5238095238095238, "grad_norm": 0.559802903522701, "learning_rate": 2.645579886455799e-05, "loss": 0.6956, "step": 17941 }, { "epoch": 0.5238387200373712, "grad_norm": 0.5082460999965333, "learning_rate": 2.645417680454177e-05, "loss": 0.5602, "step": 17942 }, { "epoch": 0.5238679162652186, "grad_norm": 0.47749141030266234, "learning_rate": 2.6452554744525547e-05, "loss": 0.5335, "step": 17943 }, { "epoch": 0.5238971124930659, "grad_norm": 0.5227770579802254, "learning_rate": 2.6450932684509326e-05, "loss": 0.6093, "step": 17944 }, { "epoch": 0.5239263087209133, "grad_norm": 0.5046709698896548, "learning_rate": 2.6449310624493108e-05, "loss": 0.6011, "step": 17945 }, { "epoch": 0.5239555049487606, "grad_norm": 0.5450167544936139, "learning_rate": 2.6447688564476886e-05, "loss": 0.6617, "step": 17946 }, { "epoch": 0.523984701176608, "grad_norm": 0.5191613481168975, "learning_rate": 2.6446066504460664e-05, "loss": 0.6116, "step": 17947 }, { "epoch": 0.5240138974044554, "grad_norm": 0.5502770739913281, "learning_rate": 2.6444444444444443e-05, "loss": 0.689, "step": 17948 }, { "epoch": 0.5240430936323027, "grad_norm": 0.49764466041003613, "learning_rate": 2.6442822384428224e-05, "loss": 0.5989, "step": 17949 }, { "epoch": 0.5240722898601501, "grad_norm": 0.5333447904685348, "learning_rate": 2.6441200324412003e-05, "loss": 0.6414, "step": 17950 }, { "epoch": 0.5241014860879974, "grad_norm": 0.5679422341130071, "learning_rate": 2.643957826439578e-05, "loss": 0.6799, "step": 17951 }, { "epoch": 0.5241306823158448, "grad_norm": 0.5657955680322067, "learning_rate": 2.6437956204379566e-05, "loss": 0.7213, "step": 17952 }, { "epoch": 0.5241598785436922, "grad_norm": 0.5344387075572581, "learning_rate": 2.6436334144363345e-05, "loss": 0.6373, "step": 17953 }, { "epoch": 0.5241890747715395, "grad_norm": 0.5156744120096167, "learning_rate": 2.6434712084347123e-05, "loss": 0.623, "step": 17954 }, { "epoch": 0.5242182709993869, "grad_norm": 0.4945532356814797, "learning_rate": 2.64330900243309e-05, "loss": 0.5434, "step": 17955 }, { "epoch": 0.5242474672272343, "grad_norm": 0.5049720068589935, "learning_rate": 2.6431467964314683e-05, "loss": 0.5912, "step": 17956 }, { "epoch": 0.5242766634550816, "grad_norm": 0.521872443341238, "learning_rate": 2.642984590429846e-05, "loss": 0.6247, "step": 17957 }, { "epoch": 0.524305859682929, "grad_norm": 0.5710637837353213, "learning_rate": 2.642822384428224e-05, "loss": 0.6713, "step": 17958 }, { "epoch": 0.5243350559107763, "grad_norm": 0.5497364757138699, "learning_rate": 2.642660178426602e-05, "loss": 0.6802, "step": 17959 }, { "epoch": 0.5243642521386237, "grad_norm": 0.5093054451704312, "learning_rate": 2.64249797242498e-05, "loss": 0.5875, "step": 17960 }, { "epoch": 0.5243934483664711, "grad_norm": 0.5227892713409628, "learning_rate": 2.642335766423358e-05, "loss": 0.615, "step": 17961 }, { "epoch": 0.5244226445943184, "grad_norm": 0.5439105925378963, "learning_rate": 2.6421735604217357e-05, "loss": 0.6499, "step": 17962 }, { "epoch": 0.5244518408221658, "grad_norm": 0.5070620489945222, "learning_rate": 2.6420113544201135e-05, "loss": 0.5989, "step": 17963 }, { "epoch": 0.5244810370500131, "grad_norm": 0.5202092575869941, "learning_rate": 2.6418491484184914e-05, "loss": 0.5257, "step": 17964 }, { "epoch": 0.5245102332778605, "grad_norm": 0.525364719179849, "learning_rate": 2.6416869424168695e-05, "loss": 0.6043, "step": 17965 }, { "epoch": 0.5245394295057079, "grad_norm": 0.5228228835100941, "learning_rate": 2.6415247364152474e-05, "loss": 0.6271, "step": 17966 }, { "epoch": 0.5245686257335552, "grad_norm": 0.5182905142737377, "learning_rate": 2.6413625304136252e-05, "loss": 0.6108, "step": 17967 }, { "epoch": 0.5245978219614026, "grad_norm": 0.4893481110853535, "learning_rate": 2.641200324412003e-05, "loss": 0.5804, "step": 17968 }, { "epoch": 0.52462701818925, "grad_norm": 0.501597544816103, "learning_rate": 2.6410381184103812e-05, "loss": 0.5623, "step": 17969 }, { "epoch": 0.5246562144170973, "grad_norm": 0.6145528738942553, "learning_rate": 2.640875912408759e-05, "loss": 0.6785, "step": 17970 }, { "epoch": 0.5246854106449447, "grad_norm": 0.5445069658409812, "learning_rate": 2.6407137064071376e-05, "loss": 0.6355, "step": 17971 }, { "epoch": 0.524714606872792, "grad_norm": 0.5318027962329097, "learning_rate": 2.6405515004055154e-05, "loss": 0.6349, "step": 17972 }, { "epoch": 0.5247438031006394, "grad_norm": 0.5258504786569159, "learning_rate": 2.6403892944038933e-05, "loss": 0.5947, "step": 17973 }, { "epoch": 0.5247729993284868, "grad_norm": 0.522760381334964, "learning_rate": 2.640227088402271e-05, "loss": 0.6054, "step": 17974 }, { "epoch": 0.5248021955563341, "grad_norm": 0.5142917208713874, "learning_rate": 2.640064882400649e-05, "loss": 0.6104, "step": 17975 }, { "epoch": 0.5248313917841815, "grad_norm": 0.5271586840353617, "learning_rate": 2.639902676399027e-05, "loss": 0.6036, "step": 17976 }, { "epoch": 0.5248605880120288, "grad_norm": 0.4972326177841131, "learning_rate": 2.639740470397405e-05, "loss": 0.5201, "step": 17977 }, { "epoch": 0.5248897842398762, "grad_norm": 0.5940317613050992, "learning_rate": 2.6395782643957828e-05, "loss": 0.7253, "step": 17978 }, { "epoch": 0.5249189804677236, "grad_norm": 0.4895911273986577, "learning_rate": 2.6394160583941606e-05, "loss": 0.5637, "step": 17979 }, { "epoch": 0.5249481766955709, "grad_norm": 0.5304566173017595, "learning_rate": 2.6392538523925388e-05, "loss": 0.6488, "step": 17980 }, { "epoch": 0.5249773729234183, "grad_norm": 0.5351470471411445, "learning_rate": 2.6390916463909166e-05, "loss": 0.6502, "step": 17981 }, { "epoch": 0.5250065691512656, "grad_norm": 0.5185278962789122, "learning_rate": 2.6389294403892945e-05, "loss": 0.6077, "step": 17982 }, { "epoch": 0.525035765379113, "grad_norm": 0.5423811168162712, "learning_rate": 2.6387672343876723e-05, "loss": 0.6452, "step": 17983 }, { "epoch": 0.5250649616069604, "grad_norm": 0.5213751406639588, "learning_rate": 2.63860502838605e-05, "loss": 0.6313, "step": 17984 }, { "epoch": 0.5250941578348077, "grad_norm": 0.4856230541967284, "learning_rate": 2.6384428223844283e-05, "loss": 0.5256, "step": 17985 }, { "epoch": 0.5251233540626551, "grad_norm": 0.5358261125327207, "learning_rate": 2.638280616382806e-05, "loss": 0.6774, "step": 17986 }, { "epoch": 0.5251525502905025, "grad_norm": 0.4979631229308845, "learning_rate": 2.638118410381184e-05, "loss": 0.6154, "step": 17987 }, { "epoch": 0.5251817465183498, "grad_norm": 0.5544397645142376, "learning_rate": 2.637956204379562e-05, "loss": 0.6966, "step": 17988 }, { "epoch": 0.5252109427461972, "grad_norm": 0.5148613749416505, "learning_rate": 2.6377939983779397e-05, "loss": 0.6224, "step": 17989 }, { "epoch": 0.5252401389740445, "grad_norm": 0.5398164434402712, "learning_rate": 2.6376317923763182e-05, "loss": 0.6258, "step": 17990 }, { "epoch": 0.5252693352018919, "grad_norm": 0.5294620319604759, "learning_rate": 2.6374695863746964e-05, "loss": 0.6444, "step": 17991 }, { "epoch": 0.5252985314297393, "grad_norm": 0.5199689854085195, "learning_rate": 2.6373073803730742e-05, "loss": 0.6348, "step": 17992 }, { "epoch": 0.5253277276575866, "grad_norm": 0.5072109371189755, "learning_rate": 2.637145174371452e-05, "loss": 0.5955, "step": 17993 }, { "epoch": 0.525356923885434, "grad_norm": 0.553312575926852, "learning_rate": 2.63698296836983e-05, "loss": 0.6966, "step": 17994 }, { "epoch": 0.5253861201132813, "grad_norm": 0.5008954116758779, "learning_rate": 2.6368207623682077e-05, "loss": 0.59, "step": 17995 }, { "epoch": 0.5254153163411287, "grad_norm": 0.5137871886394134, "learning_rate": 2.636658556366586e-05, "loss": 0.6014, "step": 17996 }, { "epoch": 0.5254445125689761, "grad_norm": 0.5134535409532583, "learning_rate": 2.6364963503649637e-05, "loss": 0.6163, "step": 17997 }, { "epoch": 0.5254737087968234, "grad_norm": 0.5469953170102412, "learning_rate": 2.6363341443633416e-05, "loss": 0.6517, "step": 17998 }, { "epoch": 0.5255029050246708, "grad_norm": 0.4894295022386297, "learning_rate": 2.6361719383617194e-05, "loss": 0.558, "step": 17999 }, { "epoch": 0.5255321012525181, "grad_norm": 0.5371336077686221, "learning_rate": 2.6360097323600972e-05, "loss": 0.6346, "step": 18000 }, { "epoch": 0.5255612974803655, "grad_norm": 0.5293115385575108, "learning_rate": 2.6358475263584754e-05, "loss": 0.6455, "step": 18001 }, { "epoch": 0.5255904937082129, "grad_norm": 0.5227127333459569, "learning_rate": 2.6356853203568533e-05, "loss": 0.6283, "step": 18002 }, { "epoch": 0.5256196899360602, "grad_norm": 0.49639958142459667, "learning_rate": 2.635523114355231e-05, "loss": 0.5343, "step": 18003 }, { "epoch": 0.5256488861639076, "grad_norm": 0.5078806515121685, "learning_rate": 2.635360908353609e-05, "loss": 0.5922, "step": 18004 }, { "epoch": 0.525678082391755, "grad_norm": 0.4933028714044022, "learning_rate": 2.635198702351987e-05, "loss": 0.5669, "step": 18005 }, { "epoch": 0.5257072786196023, "grad_norm": 0.5812804365387629, "learning_rate": 2.635036496350365e-05, "loss": 0.7078, "step": 18006 }, { "epoch": 0.5257364748474497, "grad_norm": 0.5740441451270755, "learning_rate": 2.6348742903487428e-05, "loss": 0.7509, "step": 18007 }, { "epoch": 0.525765671075297, "grad_norm": 0.5099183771780073, "learning_rate": 2.6347120843471213e-05, "loss": 0.5694, "step": 18008 }, { "epoch": 0.5257948673031444, "grad_norm": 0.529337639141785, "learning_rate": 2.634549878345499e-05, "loss": 0.6506, "step": 18009 }, { "epoch": 0.5258240635309918, "grad_norm": 0.5630296141425295, "learning_rate": 2.634387672343877e-05, "loss": 0.706, "step": 18010 }, { "epoch": 0.5258532597588391, "grad_norm": 0.4907201147264379, "learning_rate": 2.634225466342255e-05, "loss": 0.5287, "step": 18011 }, { "epoch": 0.5258824559866865, "grad_norm": 0.47867561841533457, "learning_rate": 2.634063260340633e-05, "loss": 0.5051, "step": 18012 }, { "epoch": 0.5259116522145338, "grad_norm": 0.5471677513943425, "learning_rate": 2.6339010543390108e-05, "loss": 0.6727, "step": 18013 }, { "epoch": 0.5259408484423812, "grad_norm": 0.5011478846300076, "learning_rate": 2.6337388483373887e-05, "loss": 0.5508, "step": 18014 }, { "epoch": 0.5259700446702286, "grad_norm": 0.5424493106749209, "learning_rate": 2.6335766423357665e-05, "loss": 0.6495, "step": 18015 }, { "epoch": 0.5259992408980759, "grad_norm": 0.524160582260496, "learning_rate": 2.6334144363341447e-05, "loss": 0.6294, "step": 18016 }, { "epoch": 0.5260284371259233, "grad_norm": 0.5312202177960262, "learning_rate": 2.6332522303325225e-05, "loss": 0.612, "step": 18017 }, { "epoch": 0.5260576333537706, "grad_norm": 0.5171747667441845, "learning_rate": 2.6330900243309003e-05, "loss": 0.6176, "step": 18018 }, { "epoch": 0.526086829581618, "grad_norm": 0.5329013458744782, "learning_rate": 2.6329278183292782e-05, "loss": 0.6338, "step": 18019 }, { "epoch": 0.5261160258094654, "grad_norm": 0.5201592071374536, "learning_rate": 2.632765612327656e-05, "loss": 0.5889, "step": 18020 }, { "epoch": 0.5261452220373127, "grad_norm": 0.5386092279984787, "learning_rate": 2.6326034063260342e-05, "loss": 0.6458, "step": 18021 }, { "epoch": 0.5261744182651601, "grad_norm": 0.46831704687893383, "learning_rate": 2.632441200324412e-05, "loss": 0.4973, "step": 18022 }, { "epoch": 0.5262036144930075, "grad_norm": 0.5065511744301083, "learning_rate": 2.63227899432279e-05, "loss": 0.5813, "step": 18023 }, { "epoch": 0.5262328107208548, "grad_norm": 0.5226006054704464, "learning_rate": 2.6321167883211677e-05, "loss": 0.5966, "step": 18024 }, { "epoch": 0.5262620069487022, "grad_norm": 0.5026035586372508, "learning_rate": 2.631954582319546e-05, "loss": 0.5767, "step": 18025 }, { "epoch": 0.5262912031765495, "grad_norm": 0.5592248244972973, "learning_rate": 2.6317923763179237e-05, "loss": 0.6327, "step": 18026 }, { "epoch": 0.5263203994043969, "grad_norm": 0.4921847863033139, "learning_rate": 2.6316301703163022e-05, "loss": 0.5394, "step": 18027 }, { "epoch": 0.5263495956322443, "grad_norm": 0.48230347238112115, "learning_rate": 2.63146796431468e-05, "loss": 0.5102, "step": 18028 }, { "epoch": 0.5263787918600916, "grad_norm": 0.503180306108998, "learning_rate": 2.631305758313058e-05, "loss": 0.5856, "step": 18029 }, { "epoch": 0.526407988087939, "grad_norm": 0.516449523869087, "learning_rate": 2.6311435523114357e-05, "loss": 0.5831, "step": 18030 }, { "epoch": 0.5264371843157865, "grad_norm": 0.5297466620729036, "learning_rate": 2.6309813463098136e-05, "loss": 0.591, "step": 18031 }, { "epoch": 0.5264663805436338, "grad_norm": 0.509620950120912, "learning_rate": 2.6308191403081918e-05, "loss": 0.618, "step": 18032 }, { "epoch": 0.5264955767714812, "grad_norm": 0.5238185561797327, "learning_rate": 2.6306569343065696e-05, "loss": 0.5673, "step": 18033 }, { "epoch": 0.5265247729993285, "grad_norm": 0.5096080026495793, "learning_rate": 2.6304947283049474e-05, "loss": 0.6037, "step": 18034 }, { "epoch": 0.5265539692271759, "grad_norm": 0.5724207591428576, "learning_rate": 2.6303325223033253e-05, "loss": 0.7137, "step": 18035 }, { "epoch": 0.5265831654550233, "grad_norm": 0.568674297858754, "learning_rate": 2.6301703163017035e-05, "loss": 0.7135, "step": 18036 }, { "epoch": 0.5266123616828706, "grad_norm": 0.5732666932918699, "learning_rate": 2.6300081103000813e-05, "loss": 0.7207, "step": 18037 }, { "epoch": 0.526641557910718, "grad_norm": 0.5356499929227907, "learning_rate": 2.629845904298459e-05, "loss": 0.6183, "step": 18038 }, { "epoch": 0.5266707541385653, "grad_norm": 0.5508019246434195, "learning_rate": 2.629683698296837e-05, "loss": 0.6572, "step": 18039 }, { "epoch": 0.5266999503664127, "grad_norm": 0.4853161581098047, "learning_rate": 2.6295214922952148e-05, "loss": 0.5103, "step": 18040 }, { "epoch": 0.5267291465942601, "grad_norm": 0.5033988388159022, "learning_rate": 2.629359286293593e-05, "loss": 0.5708, "step": 18041 }, { "epoch": 0.5267583428221074, "grad_norm": 0.5001442797724411, "learning_rate": 2.6291970802919708e-05, "loss": 0.555, "step": 18042 }, { "epoch": 0.5267875390499548, "grad_norm": 0.4913159204010239, "learning_rate": 2.6290348742903487e-05, "loss": 0.5497, "step": 18043 }, { "epoch": 0.5268167352778022, "grad_norm": 0.502278437414815, "learning_rate": 2.6288726682887265e-05, "loss": 0.5647, "step": 18044 }, { "epoch": 0.5268459315056495, "grad_norm": 0.5580397529485592, "learning_rate": 2.6287104622871043e-05, "loss": 0.6618, "step": 18045 }, { "epoch": 0.5268751277334969, "grad_norm": 0.6007296318333031, "learning_rate": 2.628548256285483e-05, "loss": 0.7044, "step": 18046 }, { "epoch": 0.5269043239613442, "grad_norm": 0.4823217387773516, "learning_rate": 2.628386050283861e-05, "loss": 0.55, "step": 18047 }, { "epoch": 0.5269335201891916, "grad_norm": 0.523087252001756, "learning_rate": 2.628223844282239e-05, "loss": 0.5613, "step": 18048 }, { "epoch": 0.526962716417039, "grad_norm": 0.5626618430089899, "learning_rate": 2.6280616382806167e-05, "loss": 0.6371, "step": 18049 }, { "epoch": 0.5269919126448863, "grad_norm": 0.5470232462408009, "learning_rate": 2.6278994322789945e-05, "loss": 0.6453, "step": 18050 }, { "epoch": 0.5270211088727337, "grad_norm": 0.5029484161156603, "learning_rate": 2.6277372262773724e-05, "loss": 0.5956, "step": 18051 }, { "epoch": 0.527050305100581, "grad_norm": 0.5470613965911468, "learning_rate": 2.6275750202757505e-05, "loss": 0.6746, "step": 18052 }, { "epoch": 0.5270795013284284, "grad_norm": 0.5284419135845095, "learning_rate": 2.6274128142741284e-05, "loss": 0.64, "step": 18053 }, { "epoch": 0.5271086975562758, "grad_norm": 0.5924465563573953, "learning_rate": 2.6272506082725062e-05, "loss": 0.7323, "step": 18054 }, { "epoch": 0.5271378937841231, "grad_norm": 0.4966128964564806, "learning_rate": 2.627088402270884e-05, "loss": 0.5685, "step": 18055 }, { "epoch": 0.5271670900119705, "grad_norm": 0.5105972064860963, "learning_rate": 2.6269261962692622e-05, "loss": 0.5835, "step": 18056 }, { "epoch": 0.5271962862398178, "grad_norm": 0.5612343702451997, "learning_rate": 2.62676399026764e-05, "loss": 0.663, "step": 18057 }, { "epoch": 0.5272254824676652, "grad_norm": 0.4880745634830343, "learning_rate": 2.626601784266018e-05, "loss": 0.5369, "step": 18058 }, { "epoch": 0.5272546786955126, "grad_norm": 0.5065652185305768, "learning_rate": 2.6264395782643957e-05, "loss": 0.5513, "step": 18059 }, { "epoch": 0.5272838749233599, "grad_norm": 0.507408619052544, "learning_rate": 2.6262773722627736e-05, "loss": 0.5772, "step": 18060 }, { "epoch": 0.5273130711512073, "grad_norm": 0.47871772693162856, "learning_rate": 2.6261151662611518e-05, "loss": 0.5178, "step": 18061 }, { "epoch": 0.5273422673790547, "grad_norm": 0.5301083663108304, "learning_rate": 2.6259529602595296e-05, "loss": 0.6236, "step": 18062 }, { "epoch": 0.527371463606902, "grad_norm": 0.505578401226111, "learning_rate": 2.6257907542579074e-05, "loss": 0.5996, "step": 18063 }, { "epoch": 0.5274006598347494, "grad_norm": 0.5270734012661931, "learning_rate": 2.6256285482562853e-05, "loss": 0.6253, "step": 18064 }, { "epoch": 0.5274298560625967, "grad_norm": 0.5125376215907677, "learning_rate": 2.6254663422546638e-05, "loss": 0.5808, "step": 18065 }, { "epoch": 0.5274590522904441, "grad_norm": 0.5308750600636227, "learning_rate": 2.6253041362530416e-05, "loss": 0.6165, "step": 18066 }, { "epoch": 0.5274882485182915, "grad_norm": 0.5935328511251953, "learning_rate": 2.6251419302514198e-05, "loss": 0.6061, "step": 18067 }, { "epoch": 0.5275174447461388, "grad_norm": 0.5210893515905174, "learning_rate": 2.6249797242497976e-05, "loss": 0.6129, "step": 18068 }, { "epoch": 0.5275466409739862, "grad_norm": 0.5242633265136879, "learning_rate": 2.6248175182481755e-05, "loss": 0.5699, "step": 18069 }, { "epoch": 0.5275758372018335, "grad_norm": 0.49019140347570406, "learning_rate": 2.6246553122465533e-05, "loss": 0.5343, "step": 18070 }, { "epoch": 0.5276050334296809, "grad_norm": 0.5026482133203193, "learning_rate": 2.624493106244931e-05, "loss": 0.5567, "step": 18071 }, { "epoch": 0.5276342296575283, "grad_norm": 0.49767065833069224, "learning_rate": 2.6243309002433093e-05, "loss": 0.5505, "step": 18072 }, { "epoch": 0.5276634258853756, "grad_norm": 0.5100601375084739, "learning_rate": 2.624168694241687e-05, "loss": 0.5708, "step": 18073 }, { "epoch": 0.527692622113223, "grad_norm": 0.5325699875915086, "learning_rate": 2.624006488240065e-05, "loss": 0.644, "step": 18074 }, { "epoch": 0.5277218183410703, "grad_norm": 0.5736950927368294, "learning_rate": 2.623844282238443e-05, "loss": 0.6855, "step": 18075 }, { "epoch": 0.5277510145689177, "grad_norm": 0.5117761474813693, "learning_rate": 2.6236820762368207e-05, "loss": 0.5665, "step": 18076 }, { "epoch": 0.5277802107967651, "grad_norm": 0.5373520545442819, "learning_rate": 2.623519870235199e-05, "loss": 0.6307, "step": 18077 }, { "epoch": 0.5278094070246124, "grad_norm": 0.48738427035095977, "learning_rate": 2.6233576642335767e-05, "loss": 0.5327, "step": 18078 }, { "epoch": 0.5278386032524598, "grad_norm": 0.54564445659747, "learning_rate": 2.6231954582319545e-05, "loss": 0.6611, "step": 18079 }, { "epoch": 0.5278677994803072, "grad_norm": 0.5041266717778218, "learning_rate": 2.6230332522303324e-05, "loss": 0.5787, "step": 18080 }, { "epoch": 0.5278969957081545, "grad_norm": 0.5124269469057426, "learning_rate": 2.6228710462287105e-05, "loss": 0.5931, "step": 18081 }, { "epoch": 0.5279261919360019, "grad_norm": 0.5373474605267697, "learning_rate": 2.6227088402270884e-05, "loss": 0.6568, "step": 18082 }, { "epoch": 0.5279553881638492, "grad_norm": 0.5049015522683111, "learning_rate": 2.6225466342254662e-05, "loss": 0.5918, "step": 18083 }, { "epoch": 0.5279845843916966, "grad_norm": 0.5570069125294342, "learning_rate": 2.6223844282238447e-05, "loss": 0.6769, "step": 18084 }, { "epoch": 0.528013780619544, "grad_norm": 0.5403284779284218, "learning_rate": 2.6222222222222226e-05, "loss": 0.5617, "step": 18085 }, { "epoch": 0.5280429768473913, "grad_norm": 0.5055271606519238, "learning_rate": 2.6220600162206004e-05, "loss": 0.5795, "step": 18086 }, { "epoch": 0.5280721730752387, "grad_norm": 0.5152496525483878, "learning_rate": 2.6218978102189782e-05, "loss": 0.5971, "step": 18087 }, { "epoch": 0.528101369303086, "grad_norm": 0.5689389714251055, "learning_rate": 2.6217356042173564e-05, "loss": 0.557, "step": 18088 }, { "epoch": 0.5281305655309334, "grad_norm": 0.5248615120071763, "learning_rate": 2.6215733982157343e-05, "loss": 0.6516, "step": 18089 }, { "epoch": 0.5281597617587808, "grad_norm": 0.5302819509327871, "learning_rate": 2.621411192214112e-05, "loss": 0.6116, "step": 18090 }, { "epoch": 0.5281889579866281, "grad_norm": 0.5439448588179748, "learning_rate": 2.62124898621249e-05, "loss": 0.6492, "step": 18091 }, { "epoch": 0.5282181542144755, "grad_norm": 0.5076125665111685, "learning_rate": 2.621086780210868e-05, "loss": 0.6021, "step": 18092 }, { "epoch": 0.5282473504423228, "grad_norm": 0.5609567444130884, "learning_rate": 2.620924574209246e-05, "loss": 0.6832, "step": 18093 }, { "epoch": 0.5282765466701702, "grad_norm": 0.5341992044699506, "learning_rate": 2.6207623682076238e-05, "loss": 0.6477, "step": 18094 }, { "epoch": 0.5283057428980176, "grad_norm": 0.5567435885938595, "learning_rate": 2.6206001622060016e-05, "loss": 0.6569, "step": 18095 }, { "epoch": 0.5283349391258649, "grad_norm": 0.5293212437432978, "learning_rate": 2.6204379562043795e-05, "loss": 0.6555, "step": 18096 }, { "epoch": 0.5283641353537123, "grad_norm": 0.5423442262815117, "learning_rate": 2.6202757502027576e-05, "loss": 0.6938, "step": 18097 }, { "epoch": 0.5283933315815597, "grad_norm": 0.5167952297754441, "learning_rate": 2.6201135442011355e-05, "loss": 0.6416, "step": 18098 }, { "epoch": 0.528422527809407, "grad_norm": 0.5403784514231935, "learning_rate": 2.6199513381995133e-05, "loss": 0.6413, "step": 18099 }, { "epoch": 0.5284517240372544, "grad_norm": 0.5198519282797659, "learning_rate": 2.619789132197891e-05, "loss": 0.5859, "step": 18100 }, { "epoch": 0.5284809202651017, "grad_norm": 0.6818722352635196, "learning_rate": 2.6196269261962693e-05, "loss": 0.6422, "step": 18101 }, { "epoch": 0.5285101164929491, "grad_norm": 0.5520185517333702, "learning_rate": 2.619464720194647e-05, "loss": 0.6609, "step": 18102 }, { "epoch": 0.5285393127207965, "grad_norm": 0.525695435693724, "learning_rate": 2.6193025141930257e-05, "loss": 0.5804, "step": 18103 }, { "epoch": 0.5285685089486438, "grad_norm": 0.5193528769977054, "learning_rate": 2.6191403081914035e-05, "loss": 0.5753, "step": 18104 }, { "epoch": 0.5285977051764912, "grad_norm": 0.5291129740128707, "learning_rate": 2.6189781021897813e-05, "loss": 0.6309, "step": 18105 }, { "epoch": 0.5286269014043385, "grad_norm": 0.4996381949239219, "learning_rate": 2.6188158961881592e-05, "loss": 0.5447, "step": 18106 }, { "epoch": 0.5286560976321859, "grad_norm": 0.49950374147797943, "learning_rate": 2.618653690186537e-05, "loss": 0.5556, "step": 18107 }, { "epoch": 0.5286852938600333, "grad_norm": 0.5233778138750982, "learning_rate": 2.6184914841849152e-05, "loss": 0.612, "step": 18108 }, { "epoch": 0.5287144900878806, "grad_norm": 0.5303606424534476, "learning_rate": 2.618329278183293e-05, "loss": 0.5656, "step": 18109 }, { "epoch": 0.528743686315728, "grad_norm": 0.5398191737078142, "learning_rate": 2.618167072181671e-05, "loss": 0.6541, "step": 18110 }, { "epoch": 0.5287728825435754, "grad_norm": 0.5712822850476345, "learning_rate": 2.6180048661800487e-05, "loss": 0.7196, "step": 18111 }, { "epoch": 0.5288020787714227, "grad_norm": 0.5170020697930534, "learning_rate": 2.617842660178427e-05, "loss": 0.5866, "step": 18112 }, { "epoch": 0.5288312749992701, "grad_norm": 0.5151785172523216, "learning_rate": 2.6176804541768047e-05, "loss": 0.6344, "step": 18113 }, { "epoch": 0.5288604712271174, "grad_norm": 0.5440335395988931, "learning_rate": 2.6175182481751826e-05, "loss": 0.6327, "step": 18114 }, { "epoch": 0.5288896674549648, "grad_norm": 0.5538656347472823, "learning_rate": 2.6173560421735604e-05, "loss": 0.6943, "step": 18115 }, { "epoch": 0.5289188636828122, "grad_norm": 0.5404000502209595, "learning_rate": 2.6171938361719382e-05, "loss": 0.6246, "step": 18116 }, { "epoch": 0.5289480599106595, "grad_norm": 0.5232374351609179, "learning_rate": 2.6170316301703164e-05, "loss": 0.5901, "step": 18117 }, { "epoch": 0.5289772561385069, "grad_norm": 0.518435810755917, "learning_rate": 2.6168694241686942e-05, "loss": 0.5825, "step": 18118 }, { "epoch": 0.5290064523663542, "grad_norm": 0.5042334911406664, "learning_rate": 2.616707218167072e-05, "loss": 0.5873, "step": 18119 }, { "epoch": 0.5290356485942016, "grad_norm": 0.5224110438082546, "learning_rate": 2.61654501216545e-05, "loss": 0.6115, "step": 18120 }, { "epoch": 0.529064844822049, "grad_norm": 0.521960853621984, "learning_rate": 2.6163828061638278e-05, "loss": 0.6621, "step": 18121 }, { "epoch": 0.5290940410498963, "grad_norm": 0.48721763070705165, "learning_rate": 2.6162206001622063e-05, "loss": 0.5602, "step": 18122 }, { "epoch": 0.5291232372777437, "grad_norm": 0.5156269350033187, "learning_rate": 2.6160583941605845e-05, "loss": 0.5568, "step": 18123 }, { "epoch": 0.529152433505591, "grad_norm": 0.5090525092308033, "learning_rate": 2.6158961881589623e-05, "loss": 0.6089, "step": 18124 }, { "epoch": 0.5291816297334384, "grad_norm": 0.5120613899188857, "learning_rate": 2.61573398215734e-05, "loss": 0.593, "step": 18125 }, { "epoch": 0.5292108259612858, "grad_norm": 0.48867697795954157, "learning_rate": 2.615571776155718e-05, "loss": 0.563, "step": 18126 }, { "epoch": 0.5292400221891331, "grad_norm": 0.5327979021254665, "learning_rate": 2.6154095701540958e-05, "loss": 0.6158, "step": 18127 }, { "epoch": 0.5292692184169805, "grad_norm": 0.5220305653352036, "learning_rate": 2.615247364152474e-05, "loss": 0.6367, "step": 18128 }, { "epoch": 0.5292984146448279, "grad_norm": 0.6056513037956627, "learning_rate": 2.6150851581508518e-05, "loss": 0.6943, "step": 18129 }, { "epoch": 0.5293276108726752, "grad_norm": 0.5300576914750883, "learning_rate": 2.6149229521492297e-05, "loss": 0.6073, "step": 18130 }, { "epoch": 0.5293568071005226, "grad_norm": 0.49525551526323963, "learning_rate": 2.6147607461476075e-05, "loss": 0.5657, "step": 18131 }, { "epoch": 0.5293860033283699, "grad_norm": 0.7196907741996862, "learning_rate": 2.6145985401459853e-05, "loss": 0.6831, "step": 18132 }, { "epoch": 0.5294151995562173, "grad_norm": 0.5169997987361467, "learning_rate": 2.6144363341443635e-05, "loss": 0.5981, "step": 18133 }, { "epoch": 0.5294443957840647, "grad_norm": 0.49056771329358606, "learning_rate": 2.6142741281427413e-05, "loss": 0.5263, "step": 18134 }, { "epoch": 0.529473592011912, "grad_norm": 0.5063929295794515, "learning_rate": 2.6141119221411192e-05, "loss": 0.5927, "step": 18135 }, { "epoch": 0.5295027882397594, "grad_norm": 0.4889106493348355, "learning_rate": 2.613949716139497e-05, "loss": 0.5635, "step": 18136 }, { "epoch": 0.5295319844676067, "grad_norm": 0.5135557441680366, "learning_rate": 2.6137875101378752e-05, "loss": 0.5791, "step": 18137 }, { "epoch": 0.5295611806954541, "grad_norm": 0.5312870034505995, "learning_rate": 2.613625304136253e-05, "loss": 0.6598, "step": 18138 }, { "epoch": 0.5295903769233015, "grad_norm": 0.5145041795963273, "learning_rate": 2.613463098134631e-05, "loss": 0.5999, "step": 18139 }, { "epoch": 0.5296195731511488, "grad_norm": 0.5566776800944492, "learning_rate": 2.6133008921330087e-05, "loss": 0.6983, "step": 18140 }, { "epoch": 0.5296487693789962, "grad_norm": 0.48423105069928574, "learning_rate": 2.6131386861313872e-05, "loss": 0.5241, "step": 18141 }, { "epoch": 0.5296779656068435, "grad_norm": 0.4976993495405352, "learning_rate": 2.612976480129765e-05, "loss": 0.5396, "step": 18142 }, { "epoch": 0.5297071618346909, "grad_norm": 0.528480368955401, "learning_rate": 2.612814274128143e-05, "loss": 0.6242, "step": 18143 }, { "epoch": 0.5297363580625383, "grad_norm": 0.4928101498552884, "learning_rate": 2.612652068126521e-05, "loss": 0.5588, "step": 18144 }, { "epoch": 0.5297655542903856, "grad_norm": 0.5320578825645141, "learning_rate": 2.612489862124899e-05, "loss": 0.6163, "step": 18145 }, { "epoch": 0.529794750518233, "grad_norm": 0.5358521294593803, "learning_rate": 2.6123276561232767e-05, "loss": 0.6225, "step": 18146 }, { "epoch": 0.5298239467460804, "grad_norm": 0.530196223660669, "learning_rate": 2.6121654501216546e-05, "loss": 0.6611, "step": 18147 }, { "epoch": 0.5298531429739277, "grad_norm": 0.5227584585779264, "learning_rate": 2.6120032441200328e-05, "loss": 0.6457, "step": 18148 }, { "epoch": 0.5298823392017751, "grad_norm": 0.5599139086817257, "learning_rate": 2.6118410381184106e-05, "loss": 0.5893, "step": 18149 }, { "epoch": 0.5299115354296224, "grad_norm": 0.5068314288520372, "learning_rate": 2.6116788321167884e-05, "loss": 0.6026, "step": 18150 }, { "epoch": 0.5299407316574699, "grad_norm": 0.5219654612665595, "learning_rate": 2.6115166261151663e-05, "loss": 0.5997, "step": 18151 }, { "epoch": 0.5299699278853173, "grad_norm": 0.5223400980845843, "learning_rate": 2.611354420113544e-05, "loss": 0.5867, "step": 18152 }, { "epoch": 0.5299991241131646, "grad_norm": 0.5340426166332672, "learning_rate": 2.6111922141119223e-05, "loss": 0.6257, "step": 18153 }, { "epoch": 0.530028320341012, "grad_norm": 0.519654881139919, "learning_rate": 2.6110300081103e-05, "loss": 0.6218, "step": 18154 }, { "epoch": 0.5300575165688594, "grad_norm": 0.4647157823357978, "learning_rate": 2.610867802108678e-05, "loss": 0.5047, "step": 18155 }, { "epoch": 0.5300867127967067, "grad_norm": 0.4999725225879921, "learning_rate": 2.6107055961070558e-05, "loss": 0.5589, "step": 18156 }, { "epoch": 0.5301159090245541, "grad_norm": 0.5152686818274944, "learning_rate": 2.610543390105434e-05, "loss": 0.6027, "step": 18157 }, { "epoch": 0.5301451052524014, "grad_norm": 0.5322643194110671, "learning_rate": 2.6103811841038118e-05, "loss": 0.63, "step": 18158 }, { "epoch": 0.5301743014802488, "grad_norm": 0.5169487858229974, "learning_rate": 2.6102189781021903e-05, "loss": 0.6275, "step": 18159 }, { "epoch": 0.5302034977080962, "grad_norm": 0.508064568417365, "learning_rate": 2.610056772100568e-05, "loss": 0.5589, "step": 18160 }, { "epoch": 0.5302326939359435, "grad_norm": 0.5208950947923633, "learning_rate": 2.609894566098946e-05, "loss": 0.6021, "step": 18161 }, { "epoch": 0.5302618901637909, "grad_norm": 0.50244002293957, "learning_rate": 2.609732360097324e-05, "loss": 0.5744, "step": 18162 }, { "epoch": 0.5302910863916382, "grad_norm": 0.5508300426857331, "learning_rate": 2.6095701540957017e-05, "loss": 0.6846, "step": 18163 }, { "epoch": 0.5303202826194856, "grad_norm": 0.5722630138583362, "learning_rate": 2.60940794809408e-05, "loss": 0.6904, "step": 18164 }, { "epoch": 0.530349478847333, "grad_norm": 0.5011198154675476, "learning_rate": 2.6092457420924577e-05, "loss": 0.5663, "step": 18165 }, { "epoch": 0.5303786750751803, "grad_norm": 0.5312030745383183, "learning_rate": 2.6090835360908355e-05, "loss": 0.6555, "step": 18166 }, { "epoch": 0.5304078713030277, "grad_norm": 0.531454213996165, "learning_rate": 2.6089213300892134e-05, "loss": 0.6411, "step": 18167 }, { "epoch": 0.530437067530875, "grad_norm": 0.5475567228298642, "learning_rate": 2.6087591240875915e-05, "loss": 0.5994, "step": 18168 }, { "epoch": 0.5304662637587224, "grad_norm": 0.5627066486125765, "learning_rate": 2.6085969180859694e-05, "loss": 0.6924, "step": 18169 }, { "epoch": 0.5304954599865698, "grad_norm": 0.5254015710322624, "learning_rate": 2.6084347120843472e-05, "loss": 0.5983, "step": 18170 }, { "epoch": 0.5305246562144171, "grad_norm": 0.5290832458043961, "learning_rate": 2.608272506082725e-05, "loss": 0.6222, "step": 18171 }, { "epoch": 0.5305538524422645, "grad_norm": 0.47298816837368834, "learning_rate": 2.608110300081103e-05, "loss": 0.518, "step": 18172 }, { "epoch": 0.5305830486701119, "grad_norm": 0.517986158529295, "learning_rate": 2.607948094079481e-05, "loss": 0.4958, "step": 18173 }, { "epoch": 0.5306122448979592, "grad_norm": 0.45926047761047506, "learning_rate": 2.607785888077859e-05, "loss": 0.5048, "step": 18174 }, { "epoch": 0.5306414411258066, "grad_norm": 0.5349190908252616, "learning_rate": 2.6076236820762367e-05, "loss": 0.6438, "step": 18175 }, { "epoch": 0.5306706373536539, "grad_norm": 0.5501810352495835, "learning_rate": 2.6074614760746146e-05, "loss": 0.5935, "step": 18176 }, { "epoch": 0.5306998335815013, "grad_norm": 0.5537490333761573, "learning_rate": 2.6072992700729924e-05, "loss": 0.654, "step": 18177 }, { "epoch": 0.5307290298093487, "grad_norm": 0.5067650635592476, "learning_rate": 2.607137064071371e-05, "loss": 0.5564, "step": 18178 }, { "epoch": 0.530758226037196, "grad_norm": 0.5540859678658556, "learning_rate": 2.606974858069749e-05, "loss": 0.6255, "step": 18179 }, { "epoch": 0.5307874222650434, "grad_norm": 0.5222623178874608, "learning_rate": 2.606812652068127e-05, "loss": 0.5808, "step": 18180 }, { "epoch": 0.5308166184928907, "grad_norm": 0.4924615464303519, "learning_rate": 2.6066504460665048e-05, "loss": 0.5365, "step": 18181 }, { "epoch": 0.5308458147207381, "grad_norm": 0.5692866919572783, "learning_rate": 2.6064882400648826e-05, "loss": 0.6995, "step": 18182 }, { "epoch": 0.5308750109485855, "grad_norm": 0.5523752149554346, "learning_rate": 2.6063260340632605e-05, "loss": 0.6298, "step": 18183 }, { "epoch": 0.5309042071764328, "grad_norm": 0.4979576339402664, "learning_rate": 2.6061638280616386e-05, "loss": 0.5757, "step": 18184 }, { "epoch": 0.5309334034042802, "grad_norm": 0.4945869773794214, "learning_rate": 2.6060016220600165e-05, "loss": 0.5298, "step": 18185 }, { "epoch": 0.5309625996321276, "grad_norm": 0.5093873418910738, "learning_rate": 2.6058394160583943e-05, "loss": 0.584, "step": 18186 }, { "epoch": 0.5309917958599749, "grad_norm": 0.5239166249945607, "learning_rate": 2.605677210056772e-05, "loss": 0.6023, "step": 18187 }, { "epoch": 0.5310209920878223, "grad_norm": 0.5357228379445497, "learning_rate": 2.60551500405515e-05, "loss": 0.6425, "step": 18188 }, { "epoch": 0.5310501883156696, "grad_norm": 0.5351933717391192, "learning_rate": 2.605352798053528e-05, "loss": 0.6276, "step": 18189 }, { "epoch": 0.531079384543517, "grad_norm": 0.5188094971923541, "learning_rate": 2.605190592051906e-05, "loss": 0.5856, "step": 18190 }, { "epoch": 0.5311085807713644, "grad_norm": 0.5741388321825609, "learning_rate": 2.605028386050284e-05, "loss": 0.6102, "step": 18191 }, { "epoch": 0.5311377769992117, "grad_norm": 0.5286731066687163, "learning_rate": 2.6048661800486617e-05, "loss": 0.6548, "step": 18192 }, { "epoch": 0.5311669732270591, "grad_norm": 0.5262952391614587, "learning_rate": 2.60470397404704e-05, "loss": 0.5579, "step": 18193 }, { "epoch": 0.5311961694549064, "grad_norm": 0.5043250351400019, "learning_rate": 2.6045417680454177e-05, "loss": 0.5838, "step": 18194 }, { "epoch": 0.5312253656827538, "grad_norm": 0.51534876307722, "learning_rate": 2.6043795620437955e-05, "loss": 0.5848, "step": 18195 }, { "epoch": 0.5312545619106012, "grad_norm": 0.5100799842530748, "learning_rate": 2.6042173560421734e-05, "loss": 0.5919, "step": 18196 }, { "epoch": 0.5312837581384485, "grad_norm": 0.5173742146896111, "learning_rate": 2.604055150040552e-05, "loss": 0.63, "step": 18197 }, { "epoch": 0.5313129543662959, "grad_norm": 0.5048169276043835, "learning_rate": 2.6038929440389297e-05, "loss": 0.5488, "step": 18198 }, { "epoch": 0.5313421505941432, "grad_norm": 0.4642194630204469, "learning_rate": 2.6037307380373075e-05, "loss": 0.5113, "step": 18199 }, { "epoch": 0.5313713468219906, "grad_norm": 0.5311841891056668, "learning_rate": 2.6035685320356857e-05, "loss": 0.6799, "step": 18200 }, { "epoch": 0.531400543049838, "grad_norm": 0.5025904197459071, "learning_rate": 2.6034063260340636e-05, "loss": 0.5952, "step": 18201 }, { "epoch": 0.5314297392776853, "grad_norm": 0.5049260528512305, "learning_rate": 2.6032441200324414e-05, "loss": 0.5826, "step": 18202 }, { "epoch": 0.5314589355055327, "grad_norm": 0.5328950771059755, "learning_rate": 2.6030819140308192e-05, "loss": 0.634, "step": 18203 }, { "epoch": 0.53148813173338, "grad_norm": 0.4974426023042129, "learning_rate": 2.6029197080291974e-05, "loss": 0.5672, "step": 18204 }, { "epoch": 0.5315173279612274, "grad_norm": 0.49902270164465307, "learning_rate": 2.6027575020275752e-05, "loss": 0.5919, "step": 18205 }, { "epoch": 0.5315465241890748, "grad_norm": 0.49794473698048297, "learning_rate": 2.602595296025953e-05, "loss": 0.6185, "step": 18206 }, { "epoch": 0.5315757204169221, "grad_norm": 0.5115182019978513, "learning_rate": 2.602433090024331e-05, "loss": 0.5894, "step": 18207 }, { "epoch": 0.5316049166447695, "grad_norm": 0.5036719827407202, "learning_rate": 2.6022708840227088e-05, "loss": 0.5892, "step": 18208 }, { "epoch": 0.5316341128726169, "grad_norm": 0.5750715936417776, "learning_rate": 2.602108678021087e-05, "loss": 0.6563, "step": 18209 }, { "epoch": 0.5316633091004642, "grad_norm": 0.6071980235594757, "learning_rate": 2.6019464720194648e-05, "loss": 0.7138, "step": 18210 }, { "epoch": 0.5316925053283116, "grad_norm": 0.5041114440341646, "learning_rate": 2.6017842660178426e-05, "loss": 0.6107, "step": 18211 }, { "epoch": 0.5317217015561589, "grad_norm": 0.5291740362097136, "learning_rate": 2.6016220600162204e-05, "loss": 0.6026, "step": 18212 }, { "epoch": 0.5317508977840063, "grad_norm": 0.5216177288694577, "learning_rate": 2.6014598540145986e-05, "loss": 0.5361, "step": 18213 }, { "epoch": 0.5317800940118537, "grad_norm": 0.5037323328981166, "learning_rate": 2.6012976480129765e-05, "loss": 0.5756, "step": 18214 }, { "epoch": 0.531809290239701, "grad_norm": 0.5533434579586933, "learning_rate": 2.6011354420113543e-05, "loss": 0.694, "step": 18215 }, { "epoch": 0.5318384864675484, "grad_norm": 0.5206612239829929, "learning_rate": 2.6009732360097328e-05, "loss": 0.575, "step": 18216 }, { "epoch": 0.5318676826953957, "grad_norm": 0.5434273606334564, "learning_rate": 2.6008110300081107e-05, "loss": 0.6515, "step": 18217 }, { "epoch": 0.5318968789232431, "grad_norm": 0.5647143613313922, "learning_rate": 2.6006488240064885e-05, "loss": 0.6718, "step": 18218 }, { "epoch": 0.5319260751510905, "grad_norm": 0.5296060548102531, "learning_rate": 2.6004866180048663e-05, "loss": 0.6307, "step": 18219 }, { "epoch": 0.5319552713789378, "grad_norm": 0.5173613706156965, "learning_rate": 2.6003244120032445e-05, "loss": 0.6021, "step": 18220 }, { "epoch": 0.5319844676067852, "grad_norm": 0.551494012831005, "learning_rate": 2.6001622060016223e-05, "loss": 0.6333, "step": 18221 }, { "epoch": 0.5320136638346326, "grad_norm": 0.5070696542345843, "learning_rate": 2.6000000000000002e-05, "loss": 0.5873, "step": 18222 }, { "epoch": 0.5320428600624799, "grad_norm": 0.4701848069811653, "learning_rate": 2.599837793998378e-05, "loss": 0.5221, "step": 18223 }, { "epoch": 0.5320720562903273, "grad_norm": 0.5501210620681308, "learning_rate": 2.5996755879967562e-05, "loss": 0.6577, "step": 18224 }, { "epoch": 0.5321012525181746, "grad_norm": 0.5361681515303769, "learning_rate": 2.599513381995134e-05, "loss": 0.6657, "step": 18225 }, { "epoch": 0.532130448746022, "grad_norm": 0.49725395042892584, "learning_rate": 2.599351175993512e-05, "loss": 0.5503, "step": 18226 }, { "epoch": 0.5321596449738694, "grad_norm": 0.5330180401767456, "learning_rate": 2.5991889699918897e-05, "loss": 0.6313, "step": 18227 }, { "epoch": 0.5321888412017167, "grad_norm": 0.5204155893918456, "learning_rate": 2.5990267639902675e-05, "loss": 0.5933, "step": 18228 }, { "epoch": 0.5322180374295641, "grad_norm": 0.5275698467381205, "learning_rate": 2.5988645579886457e-05, "loss": 0.5949, "step": 18229 }, { "epoch": 0.5322472336574114, "grad_norm": 0.5249537658062043, "learning_rate": 2.5987023519870236e-05, "loss": 0.6353, "step": 18230 }, { "epoch": 0.5322764298852588, "grad_norm": 0.5411866117658207, "learning_rate": 2.5985401459854014e-05, "loss": 0.6636, "step": 18231 }, { "epoch": 0.5323056261131062, "grad_norm": 0.5306993165131438, "learning_rate": 2.5983779399837792e-05, "loss": 0.6216, "step": 18232 }, { "epoch": 0.5323348223409535, "grad_norm": 0.5204122233332894, "learning_rate": 2.598215733982157e-05, "loss": 0.6115, "step": 18233 }, { "epoch": 0.5323640185688009, "grad_norm": 0.5559687192177303, "learning_rate": 2.5980535279805352e-05, "loss": 0.7033, "step": 18234 }, { "epoch": 0.5323932147966483, "grad_norm": 0.5177176393271616, "learning_rate": 2.5978913219789138e-05, "loss": 0.6314, "step": 18235 }, { "epoch": 0.5324224110244956, "grad_norm": 0.5176828982693784, "learning_rate": 2.5977291159772916e-05, "loss": 0.6295, "step": 18236 }, { "epoch": 0.532451607252343, "grad_norm": 0.5167751463157584, "learning_rate": 2.5975669099756694e-05, "loss": 0.5764, "step": 18237 }, { "epoch": 0.5324808034801903, "grad_norm": 0.4947666123130899, "learning_rate": 2.5974047039740473e-05, "loss": 0.5846, "step": 18238 }, { "epoch": 0.5325099997080377, "grad_norm": 0.5566004249049348, "learning_rate": 2.597242497972425e-05, "loss": 0.6365, "step": 18239 }, { "epoch": 0.5325391959358851, "grad_norm": 0.5737057165324186, "learning_rate": 2.5970802919708033e-05, "loss": 0.692, "step": 18240 }, { "epoch": 0.5325683921637324, "grad_norm": 0.5073363538096081, "learning_rate": 2.596918085969181e-05, "loss": 0.5532, "step": 18241 }, { "epoch": 0.5325975883915798, "grad_norm": 0.5223191134117628, "learning_rate": 2.596755879967559e-05, "loss": 0.6032, "step": 18242 }, { "epoch": 0.5326267846194271, "grad_norm": 0.5253966639251885, "learning_rate": 2.5965936739659368e-05, "loss": 0.6307, "step": 18243 }, { "epoch": 0.5326559808472745, "grad_norm": 0.5336232573099678, "learning_rate": 2.5964314679643146e-05, "loss": 0.6908, "step": 18244 }, { "epoch": 0.5326851770751219, "grad_norm": 0.5267772205941113, "learning_rate": 2.5962692619626928e-05, "loss": 0.6337, "step": 18245 }, { "epoch": 0.5327143733029692, "grad_norm": 0.5090316845973824, "learning_rate": 2.5961070559610706e-05, "loss": 0.6296, "step": 18246 }, { "epoch": 0.5327435695308166, "grad_norm": 0.5007708124292811, "learning_rate": 2.5959448499594485e-05, "loss": 0.5968, "step": 18247 }, { "epoch": 0.532772765758664, "grad_norm": 0.5618730204538245, "learning_rate": 2.5957826439578263e-05, "loss": 0.7073, "step": 18248 }, { "epoch": 0.5328019619865113, "grad_norm": 0.463070442495232, "learning_rate": 2.5956204379562045e-05, "loss": 0.5023, "step": 18249 }, { "epoch": 0.5328311582143587, "grad_norm": 0.5340035217670562, "learning_rate": 2.5954582319545823e-05, "loss": 0.6622, "step": 18250 }, { "epoch": 0.532860354442206, "grad_norm": 0.5514521845086836, "learning_rate": 2.5952960259529602e-05, "loss": 0.6421, "step": 18251 }, { "epoch": 0.5328895506700534, "grad_norm": 0.4829185194490584, "learning_rate": 2.595133819951338e-05, "loss": 0.5386, "step": 18252 }, { "epoch": 0.5329187468979008, "grad_norm": 0.5049071994313566, "learning_rate": 2.594971613949716e-05, "loss": 0.5944, "step": 18253 }, { "epoch": 0.5329479431257481, "grad_norm": 0.49321758976977015, "learning_rate": 2.5948094079480944e-05, "loss": 0.5373, "step": 18254 }, { "epoch": 0.5329771393535955, "grad_norm": 0.534015229095809, "learning_rate": 2.5946472019464725e-05, "loss": 0.6496, "step": 18255 }, { "epoch": 0.5330063355814428, "grad_norm": 0.5591888119720538, "learning_rate": 2.5944849959448504e-05, "loss": 0.6239, "step": 18256 }, { "epoch": 0.5330355318092902, "grad_norm": 0.5644130442848083, "learning_rate": 2.5943227899432282e-05, "loss": 0.6626, "step": 18257 }, { "epoch": 0.5330647280371376, "grad_norm": 0.4831783731950865, "learning_rate": 2.594160583941606e-05, "loss": 0.5368, "step": 18258 }, { "epoch": 0.5330939242649849, "grad_norm": 0.5583326205086248, "learning_rate": 2.593998377939984e-05, "loss": 0.6417, "step": 18259 }, { "epoch": 0.5331231204928323, "grad_norm": 0.532992766478951, "learning_rate": 2.593836171938362e-05, "loss": 0.6461, "step": 18260 }, { "epoch": 0.5331523167206796, "grad_norm": 0.5682848935284358, "learning_rate": 2.59367396593674e-05, "loss": 0.6915, "step": 18261 }, { "epoch": 0.533181512948527, "grad_norm": 0.5464311843432161, "learning_rate": 2.5935117599351177e-05, "loss": 0.666, "step": 18262 }, { "epoch": 0.5332107091763744, "grad_norm": 0.5728833617006843, "learning_rate": 2.5933495539334956e-05, "loss": 0.7433, "step": 18263 }, { "epoch": 0.5332399054042217, "grad_norm": 0.48242984488626833, "learning_rate": 2.5931873479318734e-05, "loss": 0.5635, "step": 18264 }, { "epoch": 0.5332691016320691, "grad_norm": 0.5151889795045764, "learning_rate": 2.5930251419302516e-05, "loss": 0.6199, "step": 18265 }, { "epoch": 0.5332982978599164, "grad_norm": 0.5422345324616857, "learning_rate": 2.5928629359286294e-05, "loss": 0.673, "step": 18266 }, { "epoch": 0.5333274940877638, "grad_norm": 0.5615771612655304, "learning_rate": 2.5927007299270073e-05, "loss": 0.644, "step": 18267 }, { "epoch": 0.5333566903156112, "grad_norm": 0.5480701538564549, "learning_rate": 2.592538523925385e-05, "loss": 0.676, "step": 18268 }, { "epoch": 0.5333858865434585, "grad_norm": 0.49595106074609724, "learning_rate": 2.5923763179237633e-05, "loss": 0.5828, "step": 18269 }, { "epoch": 0.5334150827713059, "grad_norm": 0.4834234963871123, "learning_rate": 2.592214111922141e-05, "loss": 0.5301, "step": 18270 }, { "epoch": 0.5334442789991533, "grad_norm": 0.5524149493819709, "learning_rate": 2.592051905920519e-05, "loss": 0.6075, "step": 18271 }, { "epoch": 0.5334734752270007, "grad_norm": 0.5048887482277392, "learning_rate": 2.5918896999188968e-05, "loss": 0.5513, "step": 18272 }, { "epoch": 0.5335026714548481, "grad_norm": 0.5356001707358562, "learning_rate": 2.5917274939172753e-05, "loss": 0.6514, "step": 18273 }, { "epoch": 0.5335318676826954, "grad_norm": 0.5441552179934446, "learning_rate": 2.591565287915653e-05, "loss": 0.6463, "step": 18274 }, { "epoch": 0.5335610639105428, "grad_norm": 0.5111190178824193, "learning_rate": 2.591403081914031e-05, "loss": 0.5962, "step": 18275 }, { "epoch": 0.5335902601383902, "grad_norm": 0.5190148565763838, "learning_rate": 2.591240875912409e-05, "loss": 0.6456, "step": 18276 }, { "epoch": 0.5336194563662375, "grad_norm": 0.4946866025974267, "learning_rate": 2.591078669910787e-05, "loss": 0.5711, "step": 18277 }, { "epoch": 0.5336486525940849, "grad_norm": 0.5225801922636845, "learning_rate": 2.590916463909165e-05, "loss": 0.6385, "step": 18278 }, { "epoch": 0.5336778488219323, "grad_norm": 0.5002292255751055, "learning_rate": 2.5907542579075427e-05, "loss": 0.5703, "step": 18279 }, { "epoch": 0.5337070450497796, "grad_norm": 0.5108124136224474, "learning_rate": 2.590592051905921e-05, "loss": 0.5968, "step": 18280 }, { "epoch": 0.533736241277627, "grad_norm": 0.5202176808500636, "learning_rate": 2.5904298459042987e-05, "loss": 0.6127, "step": 18281 }, { "epoch": 0.5337654375054743, "grad_norm": 0.5072303553227094, "learning_rate": 2.5902676399026765e-05, "loss": 0.6216, "step": 18282 }, { "epoch": 0.5337946337333217, "grad_norm": 0.49173567406976704, "learning_rate": 2.5901054339010544e-05, "loss": 0.5922, "step": 18283 }, { "epoch": 0.5338238299611691, "grad_norm": 0.5408522207890047, "learning_rate": 2.5899432278994322e-05, "loss": 0.716, "step": 18284 }, { "epoch": 0.5338530261890164, "grad_norm": 0.5694513107327155, "learning_rate": 2.5897810218978104e-05, "loss": 0.7123, "step": 18285 }, { "epoch": 0.5338822224168638, "grad_norm": 0.536940592478716, "learning_rate": 2.5896188158961882e-05, "loss": 0.6518, "step": 18286 }, { "epoch": 0.5339114186447111, "grad_norm": 0.5326579456789534, "learning_rate": 2.589456609894566e-05, "loss": 0.6278, "step": 18287 }, { "epoch": 0.5339406148725585, "grad_norm": 0.5507824362048305, "learning_rate": 2.589294403892944e-05, "loss": 0.6496, "step": 18288 }, { "epoch": 0.5339698111004059, "grad_norm": 0.6134794291109423, "learning_rate": 2.5891321978913217e-05, "loss": 0.5975, "step": 18289 }, { "epoch": 0.5339990073282532, "grad_norm": 0.5508065464021232, "learning_rate": 2.5889699918897e-05, "loss": 0.671, "step": 18290 }, { "epoch": 0.5340282035561006, "grad_norm": 0.5302503497594996, "learning_rate": 2.5888077858880777e-05, "loss": 0.6557, "step": 18291 }, { "epoch": 0.534057399783948, "grad_norm": 0.48696471806002123, "learning_rate": 2.5886455798864563e-05, "loss": 0.5328, "step": 18292 }, { "epoch": 0.5340865960117953, "grad_norm": 0.48685214519460174, "learning_rate": 2.588483373884834e-05, "loss": 0.5591, "step": 18293 }, { "epoch": 0.5341157922396427, "grad_norm": 0.5980933320232161, "learning_rate": 2.588321167883212e-05, "loss": 0.6932, "step": 18294 }, { "epoch": 0.53414498846749, "grad_norm": 0.5723093342478187, "learning_rate": 2.5881589618815898e-05, "loss": 0.6617, "step": 18295 }, { "epoch": 0.5341741846953374, "grad_norm": 0.531538061247972, "learning_rate": 2.587996755879968e-05, "loss": 0.5603, "step": 18296 }, { "epoch": 0.5342033809231848, "grad_norm": 0.489045716761407, "learning_rate": 2.5878345498783458e-05, "loss": 0.5458, "step": 18297 }, { "epoch": 0.5342325771510321, "grad_norm": 0.5024478388480876, "learning_rate": 2.5876723438767236e-05, "loss": 0.5658, "step": 18298 }, { "epoch": 0.5342617733788795, "grad_norm": 0.5544701773577321, "learning_rate": 2.5875101378751015e-05, "loss": 0.6305, "step": 18299 }, { "epoch": 0.5342909696067268, "grad_norm": 0.5360654072175128, "learning_rate": 2.5873479318734796e-05, "loss": 0.5815, "step": 18300 }, { "epoch": 0.5343201658345742, "grad_norm": 0.5349617537672686, "learning_rate": 2.5871857258718575e-05, "loss": 0.616, "step": 18301 }, { "epoch": 0.5343493620624216, "grad_norm": 0.5163920334420877, "learning_rate": 2.5870235198702353e-05, "loss": 0.5568, "step": 18302 }, { "epoch": 0.5343785582902689, "grad_norm": 0.6111365878153427, "learning_rate": 2.586861313868613e-05, "loss": 0.7417, "step": 18303 }, { "epoch": 0.5344077545181163, "grad_norm": 0.5127584340000991, "learning_rate": 2.586699107866991e-05, "loss": 0.5541, "step": 18304 }, { "epoch": 0.5344369507459636, "grad_norm": 0.5412718998321044, "learning_rate": 2.586536901865369e-05, "loss": 0.6386, "step": 18305 }, { "epoch": 0.534466146973811, "grad_norm": 0.5123859474455473, "learning_rate": 2.586374695863747e-05, "loss": 0.5707, "step": 18306 }, { "epoch": 0.5344953432016584, "grad_norm": 0.5068216097449658, "learning_rate": 2.5862124898621248e-05, "loss": 0.5848, "step": 18307 }, { "epoch": 0.5345245394295057, "grad_norm": 0.5219983268331353, "learning_rate": 2.5860502838605027e-05, "loss": 0.6298, "step": 18308 }, { "epoch": 0.5345537356573531, "grad_norm": 0.5263689657838807, "learning_rate": 2.5858880778588805e-05, "loss": 0.5723, "step": 18309 }, { "epoch": 0.5345829318852005, "grad_norm": 0.5156231656289598, "learning_rate": 2.585725871857259e-05, "loss": 0.6203, "step": 18310 }, { "epoch": 0.5346121281130478, "grad_norm": 0.5083880561643759, "learning_rate": 2.5855636658556372e-05, "loss": 0.5577, "step": 18311 }, { "epoch": 0.5346413243408952, "grad_norm": 0.5425407793646722, "learning_rate": 2.585401459854015e-05, "loss": 0.6418, "step": 18312 }, { "epoch": 0.5346705205687425, "grad_norm": 0.5417165103386729, "learning_rate": 2.585239253852393e-05, "loss": 0.6273, "step": 18313 }, { "epoch": 0.5346997167965899, "grad_norm": 0.5509222199900847, "learning_rate": 2.5850770478507707e-05, "loss": 0.6589, "step": 18314 }, { "epoch": 0.5347289130244373, "grad_norm": 0.5150224956083975, "learning_rate": 2.5849148418491485e-05, "loss": 0.617, "step": 18315 }, { "epoch": 0.5347581092522846, "grad_norm": 0.537550262117385, "learning_rate": 2.5847526358475267e-05, "loss": 0.6264, "step": 18316 }, { "epoch": 0.534787305480132, "grad_norm": 0.5407822769218504, "learning_rate": 2.5845904298459046e-05, "loss": 0.6095, "step": 18317 }, { "epoch": 0.5348165017079793, "grad_norm": 0.49223588819174174, "learning_rate": 2.5844282238442824e-05, "loss": 0.5618, "step": 18318 }, { "epoch": 0.5348456979358267, "grad_norm": 0.5485707421991193, "learning_rate": 2.5842660178426602e-05, "loss": 0.648, "step": 18319 }, { "epoch": 0.5348748941636741, "grad_norm": 0.5233743148343682, "learning_rate": 2.584103811841038e-05, "loss": 0.6131, "step": 18320 }, { "epoch": 0.5349040903915214, "grad_norm": 0.5566409562836137, "learning_rate": 2.5839416058394162e-05, "loss": 0.5658, "step": 18321 }, { "epoch": 0.5349332866193688, "grad_norm": 0.5360692920223821, "learning_rate": 2.583779399837794e-05, "loss": 0.687, "step": 18322 }, { "epoch": 0.5349624828472161, "grad_norm": 0.5253975906611748, "learning_rate": 2.583617193836172e-05, "loss": 0.5877, "step": 18323 }, { "epoch": 0.5349916790750635, "grad_norm": 0.5108610945511806, "learning_rate": 2.5834549878345498e-05, "loss": 0.5642, "step": 18324 }, { "epoch": 0.5350208753029109, "grad_norm": 0.5245014772962655, "learning_rate": 2.583292781832928e-05, "loss": 0.6159, "step": 18325 }, { "epoch": 0.5350500715307582, "grad_norm": 0.5496076982862611, "learning_rate": 2.5831305758313058e-05, "loss": 0.6352, "step": 18326 }, { "epoch": 0.5350792677586056, "grad_norm": 0.5247650035873482, "learning_rate": 2.5829683698296836e-05, "loss": 0.6123, "step": 18327 }, { "epoch": 0.535108463986453, "grad_norm": 0.48925494081898757, "learning_rate": 2.5828061638280614e-05, "loss": 0.5478, "step": 18328 }, { "epoch": 0.5351376602143003, "grad_norm": 0.5400235037149964, "learning_rate": 2.58264395782644e-05, "loss": 0.6561, "step": 18329 }, { "epoch": 0.5351668564421477, "grad_norm": 0.5527868050370043, "learning_rate": 2.5824817518248178e-05, "loss": 0.666, "step": 18330 }, { "epoch": 0.535196052669995, "grad_norm": 0.517806680462696, "learning_rate": 2.5823195458231956e-05, "loss": 0.6198, "step": 18331 }, { "epoch": 0.5352252488978424, "grad_norm": 0.5351657566983844, "learning_rate": 2.5821573398215738e-05, "loss": 0.6361, "step": 18332 }, { "epoch": 0.5352544451256898, "grad_norm": 0.49370620133725435, "learning_rate": 2.5819951338199516e-05, "loss": 0.5833, "step": 18333 }, { "epoch": 0.5352836413535371, "grad_norm": 0.512913036098858, "learning_rate": 2.5818329278183295e-05, "loss": 0.584, "step": 18334 }, { "epoch": 0.5353128375813845, "grad_norm": 0.4995090948558082, "learning_rate": 2.5816707218167073e-05, "loss": 0.5622, "step": 18335 }, { "epoch": 0.5353420338092318, "grad_norm": 0.5475564743125796, "learning_rate": 2.5815085158150855e-05, "loss": 0.6569, "step": 18336 }, { "epoch": 0.5353712300370792, "grad_norm": 0.46798179466169165, "learning_rate": 2.5813463098134633e-05, "loss": 0.506, "step": 18337 }, { "epoch": 0.5354004262649266, "grad_norm": 0.49682584697361987, "learning_rate": 2.5811841038118412e-05, "loss": 0.5967, "step": 18338 }, { "epoch": 0.5354296224927739, "grad_norm": 0.5284348162003659, "learning_rate": 2.581021897810219e-05, "loss": 0.6084, "step": 18339 }, { "epoch": 0.5354588187206213, "grad_norm": 0.5343817518551842, "learning_rate": 2.580859691808597e-05, "loss": 0.6275, "step": 18340 }, { "epoch": 0.5354880149484686, "grad_norm": 0.49945249543341597, "learning_rate": 2.580697485806975e-05, "loss": 0.5487, "step": 18341 }, { "epoch": 0.535517211176316, "grad_norm": 0.4712861794869512, "learning_rate": 2.580535279805353e-05, "loss": 0.5558, "step": 18342 }, { "epoch": 0.5355464074041634, "grad_norm": 0.5129110700869847, "learning_rate": 2.5803730738037307e-05, "loss": 0.5847, "step": 18343 }, { "epoch": 0.5355756036320107, "grad_norm": 0.5324411201837441, "learning_rate": 2.5802108678021085e-05, "loss": 0.628, "step": 18344 }, { "epoch": 0.5356047998598581, "grad_norm": 0.5173539488582395, "learning_rate": 2.5800486618004864e-05, "loss": 0.6151, "step": 18345 }, { "epoch": 0.5356339960877055, "grad_norm": 0.5249629898086442, "learning_rate": 2.5798864557988646e-05, "loss": 0.6292, "step": 18346 }, { "epoch": 0.5356631923155528, "grad_norm": 0.5294024795394013, "learning_rate": 2.5797242497972424e-05, "loss": 0.5993, "step": 18347 }, { "epoch": 0.5356923885434002, "grad_norm": 0.5539032737199656, "learning_rate": 2.579562043795621e-05, "loss": 0.6535, "step": 18348 }, { "epoch": 0.5357215847712475, "grad_norm": 0.4856212132391693, "learning_rate": 2.5793998377939987e-05, "loss": 0.5525, "step": 18349 }, { "epoch": 0.5357507809990949, "grad_norm": 0.5594570929560276, "learning_rate": 2.5792376317923766e-05, "loss": 0.6601, "step": 18350 }, { "epoch": 0.5357799772269423, "grad_norm": 0.5109945531414375, "learning_rate": 2.5790754257907544e-05, "loss": 0.6102, "step": 18351 }, { "epoch": 0.5358091734547896, "grad_norm": 0.4942883514923662, "learning_rate": 2.5789132197891326e-05, "loss": 0.5254, "step": 18352 }, { "epoch": 0.535838369682637, "grad_norm": 0.5203506629343583, "learning_rate": 2.5787510137875104e-05, "loss": 0.6018, "step": 18353 }, { "epoch": 0.5358675659104843, "grad_norm": 0.5194830092835339, "learning_rate": 2.5785888077858883e-05, "loss": 0.6509, "step": 18354 }, { "epoch": 0.5358967621383317, "grad_norm": 0.5348212012975344, "learning_rate": 2.578426601784266e-05, "loss": 0.636, "step": 18355 }, { "epoch": 0.5359259583661791, "grad_norm": 0.5737927060974839, "learning_rate": 2.5782643957826443e-05, "loss": 0.7083, "step": 18356 }, { "epoch": 0.5359551545940264, "grad_norm": 0.5096227011619991, "learning_rate": 2.578102189781022e-05, "loss": 0.5945, "step": 18357 }, { "epoch": 0.5359843508218738, "grad_norm": 0.5163404798295373, "learning_rate": 2.5779399837794e-05, "loss": 0.5673, "step": 18358 }, { "epoch": 0.5360135470497212, "grad_norm": 0.4987893915841297, "learning_rate": 2.5777777777777778e-05, "loss": 0.5749, "step": 18359 }, { "epoch": 0.5360427432775685, "grad_norm": 0.5506908307481154, "learning_rate": 2.5776155717761556e-05, "loss": 0.6608, "step": 18360 }, { "epoch": 0.5360719395054159, "grad_norm": 0.518615538053256, "learning_rate": 2.5774533657745338e-05, "loss": 0.6762, "step": 18361 }, { "epoch": 0.5361011357332632, "grad_norm": 0.5247677965884535, "learning_rate": 2.5772911597729116e-05, "loss": 0.5993, "step": 18362 }, { "epoch": 0.5361303319611106, "grad_norm": 0.5201819007146659, "learning_rate": 2.5771289537712895e-05, "loss": 0.6023, "step": 18363 }, { "epoch": 0.536159528188958, "grad_norm": 0.5124138572356647, "learning_rate": 2.5769667477696673e-05, "loss": 0.5886, "step": 18364 }, { "epoch": 0.5361887244168053, "grad_norm": 0.5957315633650663, "learning_rate": 2.576804541768045e-05, "loss": 0.6799, "step": 18365 }, { "epoch": 0.5362179206446527, "grad_norm": 0.5204266923152987, "learning_rate": 2.5766423357664233e-05, "loss": 0.6108, "step": 18366 }, { "epoch": 0.5362471168725, "grad_norm": 0.5339594764115476, "learning_rate": 2.576480129764802e-05, "loss": 0.6344, "step": 18367 }, { "epoch": 0.5362763131003474, "grad_norm": 0.5356980365504114, "learning_rate": 2.5763179237631797e-05, "loss": 0.6504, "step": 18368 }, { "epoch": 0.5363055093281948, "grad_norm": 0.5194332229642827, "learning_rate": 2.5761557177615575e-05, "loss": 0.6177, "step": 18369 }, { "epoch": 0.5363347055560421, "grad_norm": 0.5326410299313472, "learning_rate": 2.5759935117599354e-05, "loss": 0.5874, "step": 18370 }, { "epoch": 0.5363639017838895, "grad_norm": 0.5091451149954581, "learning_rate": 2.5758313057583132e-05, "loss": 0.5516, "step": 18371 }, { "epoch": 0.5363930980117368, "grad_norm": 0.5304121277948679, "learning_rate": 2.5756690997566914e-05, "loss": 0.6177, "step": 18372 }, { "epoch": 0.5364222942395842, "grad_norm": 0.5436099183068376, "learning_rate": 2.5755068937550692e-05, "loss": 0.6741, "step": 18373 }, { "epoch": 0.5364514904674316, "grad_norm": 0.5377326067327765, "learning_rate": 2.575344687753447e-05, "loss": 0.6346, "step": 18374 }, { "epoch": 0.5364806866952789, "grad_norm": 0.53421967017043, "learning_rate": 2.575182481751825e-05, "loss": 0.6097, "step": 18375 }, { "epoch": 0.5365098829231263, "grad_norm": 0.5129359435823764, "learning_rate": 2.5750202757502027e-05, "loss": 0.548, "step": 18376 }, { "epoch": 0.5365390791509737, "grad_norm": 0.5266271271749278, "learning_rate": 2.574858069748581e-05, "loss": 0.6279, "step": 18377 }, { "epoch": 0.536568275378821, "grad_norm": 0.5369961633299869, "learning_rate": 2.5746958637469587e-05, "loss": 0.5541, "step": 18378 }, { "epoch": 0.5365974716066684, "grad_norm": 0.5514115842011384, "learning_rate": 2.5745336577453366e-05, "loss": 0.6617, "step": 18379 }, { "epoch": 0.5366266678345157, "grad_norm": 0.5432647779776242, "learning_rate": 2.5743714517437144e-05, "loss": 0.6704, "step": 18380 }, { "epoch": 0.5366558640623631, "grad_norm": 0.4728443065553999, "learning_rate": 2.5742092457420926e-05, "loss": 0.4928, "step": 18381 }, { "epoch": 0.5366850602902105, "grad_norm": 0.5358145920830663, "learning_rate": 2.5740470397404704e-05, "loss": 0.6484, "step": 18382 }, { "epoch": 0.5367142565180578, "grad_norm": 0.5263923910997188, "learning_rate": 2.5738848337388483e-05, "loss": 0.6269, "step": 18383 }, { "epoch": 0.5367434527459052, "grad_norm": 0.5443765632981015, "learning_rate": 2.573722627737226e-05, "loss": 0.627, "step": 18384 }, { "epoch": 0.5367726489737525, "grad_norm": 0.5133472829510117, "learning_rate": 2.573560421735604e-05, "loss": 0.625, "step": 18385 }, { "epoch": 0.5368018452015999, "grad_norm": 0.49282079155468705, "learning_rate": 2.5733982157339825e-05, "loss": 0.5739, "step": 18386 }, { "epoch": 0.5368310414294473, "grad_norm": 0.540093120944399, "learning_rate": 2.5732360097323603e-05, "loss": 0.6457, "step": 18387 }, { "epoch": 0.5368602376572946, "grad_norm": 0.5544878561162937, "learning_rate": 2.5730738037307385e-05, "loss": 0.6378, "step": 18388 }, { "epoch": 0.536889433885142, "grad_norm": 0.5660180734940046, "learning_rate": 2.5729115977291163e-05, "loss": 0.6661, "step": 18389 }, { "epoch": 0.5369186301129893, "grad_norm": 0.5107989639747674, "learning_rate": 2.572749391727494e-05, "loss": 0.5826, "step": 18390 }, { "epoch": 0.5369478263408367, "grad_norm": 0.5006743082497396, "learning_rate": 2.572587185725872e-05, "loss": 0.5814, "step": 18391 }, { "epoch": 0.5369770225686842, "grad_norm": 0.5212275334945293, "learning_rate": 2.57242497972425e-05, "loss": 0.6013, "step": 18392 }, { "epoch": 0.5370062187965315, "grad_norm": 0.5039611382623435, "learning_rate": 2.572262773722628e-05, "loss": 0.5622, "step": 18393 }, { "epoch": 0.5370354150243789, "grad_norm": 0.5574568338202155, "learning_rate": 2.5721005677210058e-05, "loss": 0.6729, "step": 18394 }, { "epoch": 0.5370646112522263, "grad_norm": 0.5372094979368159, "learning_rate": 2.5719383617193837e-05, "loss": 0.6036, "step": 18395 }, { "epoch": 0.5370938074800736, "grad_norm": 0.543118485383637, "learning_rate": 2.5717761557177615e-05, "loss": 0.6466, "step": 18396 }, { "epoch": 0.537123003707921, "grad_norm": 0.5142334851303196, "learning_rate": 2.5716139497161397e-05, "loss": 0.5851, "step": 18397 }, { "epoch": 0.5371521999357683, "grad_norm": 0.5573705374196904, "learning_rate": 2.5714517437145175e-05, "loss": 0.679, "step": 18398 }, { "epoch": 0.5371813961636157, "grad_norm": 0.4955698172669711, "learning_rate": 2.5712895377128954e-05, "loss": 0.552, "step": 18399 }, { "epoch": 0.5372105923914631, "grad_norm": 0.4866549823202441, "learning_rate": 2.5711273317112732e-05, "loss": 0.5395, "step": 18400 }, { "epoch": 0.5372397886193104, "grad_norm": 0.5149853005418906, "learning_rate": 2.5709651257096514e-05, "loss": 0.6069, "step": 18401 }, { "epoch": 0.5372689848471578, "grad_norm": 0.5490135813330123, "learning_rate": 2.5708029197080292e-05, "loss": 0.6467, "step": 18402 }, { "epoch": 0.5372981810750052, "grad_norm": 0.531631823537024, "learning_rate": 2.570640713706407e-05, "loss": 0.6522, "step": 18403 }, { "epoch": 0.5373273773028525, "grad_norm": 0.5227786708172509, "learning_rate": 2.570478507704785e-05, "loss": 0.6607, "step": 18404 }, { "epoch": 0.5373565735306999, "grad_norm": 0.5194623436287017, "learning_rate": 2.5703163017031634e-05, "loss": 0.6068, "step": 18405 }, { "epoch": 0.5373857697585472, "grad_norm": 0.5304456524852241, "learning_rate": 2.5701540957015412e-05, "loss": 0.6459, "step": 18406 }, { "epoch": 0.5374149659863946, "grad_norm": 0.5900784284789602, "learning_rate": 2.569991889699919e-05, "loss": 0.6896, "step": 18407 }, { "epoch": 0.537444162214242, "grad_norm": 0.5267915342098093, "learning_rate": 2.5698296836982972e-05, "loss": 0.6256, "step": 18408 }, { "epoch": 0.5374733584420893, "grad_norm": 0.5268317358621112, "learning_rate": 2.569667477696675e-05, "loss": 0.6483, "step": 18409 }, { "epoch": 0.5375025546699367, "grad_norm": 0.510238206695765, "learning_rate": 2.569505271695053e-05, "loss": 0.5689, "step": 18410 }, { "epoch": 0.537531750897784, "grad_norm": 0.5070765227532026, "learning_rate": 2.5693430656934308e-05, "loss": 0.597, "step": 18411 }, { "epoch": 0.5375609471256314, "grad_norm": 0.5052920236628202, "learning_rate": 2.569180859691809e-05, "loss": 0.6079, "step": 18412 }, { "epoch": 0.5375901433534788, "grad_norm": 0.5243471246520075, "learning_rate": 2.5690186536901868e-05, "loss": 0.6326, "step": 18413 }, { "epoch": 0.5376193395813261, "grad_norm": 0.5124072050035284, "learning_rate": 2.5688564476885646e-05, "loss": 0.6228, "step": 18414 }, { "epoch": 0.5376485358091735, "grad_norm": 0.5067517424956892, "learning_rate": 2.5686942416869424e-05, "loss": 0.5867, "step": 18415 }, { "epoch": 0.5376777320370209, "grad_norm": 0.5010990360747856, "learning_rate": 2.5685320356853203e-05, "loss": 0.5802, "step": 18416 }, { "epoch": 0.5377069282648682, "grad_norm": 0.5105879686445551, "learning_rate": 2.5683698296836985e-05, "loss": 0.5809, "step": 18417 }, { "epoch": 0.5377361244927156, "grad_norm": 0.4804071754456272, "learning_rate": 2.5682076236820763e-05, "loss": 0.5836, "step": 18418 }, { "epoch": 0.5377653207205629, "grad_norm": 0.4688591621532858, "learning_rate": 2.568045417680454e-05, "loss": 0.4992, "step": 18419 }, { "epoch": 0.5377945169484103, "grad_norm": 0.5321941759317174, "learning_rate": 2.567883211678832e-05, "loss": 0.6049, "step": 18420 }, { "epoch": 0.5378237131762577, "grad_norm": 0.5180583901770014, "learning_rate": 2.5677210056772098e-05, "loss": 0.6147, "step": 18421 }, { "epoch": 0.537852909404105, "grad_norm": 0.5379571555576413, "learning_rate": 2.567558799675588e-05, "loss": 0.6487, "step": 18422 }, { "epoch": 0.5378821056319524, "grad_norm": 0.5711967651897855, "learning_rate": 2.5673965936739658e-05, "loss": 0.6968, "step": 18423 }, { "epoch": 0.5379113018597997, "grad_norm": 0.5355473887195468, "learning_rate": 2.5672343876723443e-05, "loss": 0.6173, "step": 18424 }, { "epoch": 0.5379404980876471, "grad_norm": 0.5541416343793885, "learning_rate": 2.5670721816707222e-05, "loss": 0.6577, "step": 18425 }, { "epoch": 0.5379696943154945, "grad_norm": 0.5725611199772858, "learning_rate": 2.5669099756691e-05, "loss": 0.6591, "step": 18426 }, { "epoch": 0.5379988905433418, "grad_norm": 0.5526636920246094, "learning_rate": 2.566747769667478e-05, "loss": 0.6844, "step": 18427 }, { "epoch": 0.5380280867711892, "grad_norm": 0.5186711273481384, "learning_rate": 2.566585563665856e-05, "loss": 0.5803, "step": 18428 }, { "epoch": 0.5380572829990365, "grad_norm": 0.5487642234644253, "learning_rate": 2.566423357664234e-05, "loss": 0.6492, "step": 18429 }, { "epoch": 0.5380864792268839, "grad_norm": 0.5002285406436697, "learning_rate": 2.5662611516626117e-05, "loss": 0.5919, "step": 18430 }, { "epoch": 0.5381156754547313, "grad_norm": 0.5199397500865288, "learning_rate": 2.5660989456609895e-05, "loss": 0.5572, "step": 18431 }, { "epoch": 0.5381448716825786, "grad_norm": 0.5176745147340208, "learning_rate": 2.5659367396593674e-05, "loss": 0.5625, "step": 18432 }, { "epoch": 0.538174067910426, "grad_norm": 0.5085403202510285, "learning_rate": 2.5657745336577456e-05, "loss": 0.5835, "step": 18433 }, { "epoch": 0.5382032641382734, "grad_norm": 0.571480952866549, "learning_rate": 2.5656123276561234e-05, "loss": 0.6374, "step": 18434 }, { "epoch": 0.5382324603661207, "grad_norm": 0.563825389176211, "learning_rate": 2.5654501216545012e-05, "loss": 0.6529, "step": 18435 }, { "epoch": 0.5382616565939681, "grad_norm": 0.5313024510134976, "learning_rate": 2.565287915652879e-05, "loss": 0.5969, "step": 18436 }, { "epoch": 0.5382908528218154, "grad_norm": 0.5466165253547126, "learning_rate": 2.5651257096512572e-05, "loss": 0.6737, "step": 18437 }, { "epoch": 0.5383200490496628, "grad_norm": 0.5366108638586072, "learning_rate": 2.564963503649635e-05, "loss": 0.6435, "step": 18438 }, { "epoch": 0.5383492452775102, "grad_norm": 0.5376550975834825, "learning_rate": 2.564801297648013e-05, "loss": 0.6065, "step": 18439 }, { "epoch": 0.5383784415053575, "grad_norm": 0.53086287928272, "learning_rate": 2.5646390916463908e-05, "loss": 0.6477, "step": 18440 }, { "epoch": 0.5384076377332049, "grad_norm": 0.5004536681397178, "learning_rate": 2.5644768856447686e-05, "loss": 0.5909, "step": 18441 }, { "epoch": 0.5384368339610522, "grad_norm": 0.4887241366619268, "learning_rate": 2.5643146796431468e-05, "loss": 0.5711, "step": 18442 }, { "epoch": 0.5384660301888996, "grad_norm": 0.5372221659325124, "learning_rate": 2.564152473641525e-05, "loss": 0.6206, "step": 18443 }, { "epoch": 0.538495226416747, "grad_norm": 0.5112220065410871, "learning_rate": 2.563990267639903e-05, "loss": 0.547, "step": 18444 }, { "epoch": 0.5385244226445943, "grad_norm": 0.5560916017989395, "learning_rate": 2.563828061638281e-05, "loss": 0.6246, "step": 18445 }, { "epoch": 0.5385536188724417, "grad_norm": 0.49456179867866584, "learning_rate": 2.5636658556366588e-05, "loss": 0.598, "step": 18446 }, { "epoch": 0.538582815100289, "grad_norm": 0.5462942330198343, "learning_rate": 2.5635036496350366e-05, "loss": 0.6399, "step": 18447 }, { "epoch": 0.5386120113281364, "grad_norm": 0.4596457557878536, "learning_rate": 2.5633414436334148e-05, "loss": 0.5056, "step": 18448 }, { "epoch": 0.5386412075559838, "grad_norm": 0.5094719903490565, "learning_rate": 2.5631792376317926e-05, "loss": 0.5666, "step": 18449 }, { "epoch": 0.5386704037838311, "grad_norm": 0.5522830639454204, "learning_rate": 2.5630170316301705e-05, "loss": 0.6263, "step": 18450 }, { "epoch": 0.5386996000116785, "grad_norm": 0.49993781352581146, "learning_rate": 2.5628548256285483e-05, "loss": 0.5884, "step": 18451 }, { "epoch": 0.5387287962395259, "grad_norm": 0.5599944954872399, "learning_rate": 2.562692619626926e-05, "loss": 0.7121, "step": 18452 }, { "epoch": 0.5387579924673732, "grad_norm": 0.4920363220162626, "learning_rate": 2.5625304136253043e-05, "loss": 0.5252, "step": 18453 }, { "epoch": 0.5387871886952206, "grad_norm": 0.5224937017905918, "learning_rate": 2.5623682076236822e-05, "loss": 0.586, "step": 18454 }, { "epoch": 0.5388163849230679, "grad_norm": 0.518267497581675, "learning_rate": 2.56220600162206e-05, "loss": 0.574, "step": 18455 }, { "epoch": 0.5388455811509153, "grad_norm": 0.5565274808263508, "learning_rate": 2.562043795620438e-05, "loss": 0.7136, "step": 18456 }, { "epoch": 0.5388747773787627, "grad_norm": 0.502038391863966, "learning_rate": 2.561881589618816e-05, "loss": 0.5635, "step": 18457 }, { "epoch": 0.53890397360661, "grad_norm": 0.4910034074228833, "learning_rate": 2.561719383617194e-05, "loss": 0.5436, "step": 18458 }, { "epoch": 0.5389331698344574, "grad_norm": 0.5173857953147948, "learning_rate": 2.5615571776155717e-05, "loss": 0.534, "step": 18459 }, { "epoch": 0.5389623660623047, "grad_norm": 0.5152960003526013, "learning_rate": 2.5613949716139495e-05, "loss": 0.5958, "step": 18460 }, { "epoch": 0.5389915622901521, "grad_norm": 0.49616917977514247, "learning_rate": 2.561232765612328e-05, "loss": 0.5501, "step": 18461 }, { "epoch": 0.5390207585179995, "grad_norm": 0.4990686472611659, "learning_rate": 2.561070559610706e-05, "loss": 0.553, "step": 18462 }, { "epoch": 0.5390499547458468, "grad_norm": 0.5403948791612886, "learning_rate": 2.5609083536090837e-05, "loss": 0.6423, "step": 18463 }, { "epoch": 0.5390791509736942, "grad_norm": 0.5178184115668009, "learning_rate": 2.560746147607462e-05, "loss": 0.5971, "step": 18464 }, { "epoch": 0.5391083472015415, "grad_norm": 0.5291897450176607, "learning_rate": 2.5605839416058397e-05, "loss": 0.6109, "step": 18465 }, { "epoch": 0.5391375434293889, "grad_norm": 0.5092164646377865, "learning_rate": 2.5604217356042176e-05, "loss": 0.5823, "step": 18466 }, { "epoch": 0.5391667396572363, "grad_norm": 0.533078105720554, "learning_rate": 2.5602595296025954e-05, "loss": 0.6772, "step": 18467 }, { "epoch": 0.5391959358850836, "grad_norm": 0.6277767940933741, "learning_rate": 2.5600973236009736e-05, "loss": 0.6313, "step": 18468 }, { "epoch": 0.539225132112931, "grad_norm": 0.5379936315791162, "learning_rate": 2.5599351175993514e-05, "loss": 0.5954, "step": 18469 }, { "epoch": 0.5392543283407784, "grad_norm": 0.5283862216333429, "learning_rate": 2.5597729115977293e-05, "loss": 0.6223, "step": 18470 }, { "epoch": 0.5392835245686257, "grad_norm": 0.5381258534412481, "learning_rate": 2.559610705596107e-05, "loss": 0.6567, "step": 18471 }, { "epoch": 0.5393127207964731, "grad_norm": 0.5588590488937558, "learning_rate": 2.559448499594485e-05, "loss": 0.6842, "step": 18472 }, { "epoch": 0.5393419170243204, "grad_norm": 0.5224488201356517, "learning_rate": 2.559286293592863e-05, "loss": 0.6182, "step": 18473 }, { "epoch": 0.5393711132521678, "grad_norm": 0.5189343792866787, "learning_rate": 2.559124087591241e-05, "loss": 0.6197, "step": 18474 }, { "epoch": 0.5394003094800152, "grad_norm": 0.5568435225493047, "learning_rate": 2.5589618815896188e-05, "loss": 0.6196, "step": 18475 }, { "epoch": 0.5394295057078625, "grad_norm": 0.5352979041079117, "learning_rate": 2.5587996755879966e-05, "loss": 0.6335, "step": 18476 }, { "epoch": 0.5394587019357099, "grad_norm": 0.524280963654928, "learning_rate": 2.5586374695863745e-05, "loss": 0.6041, "step": 18477 }, { "epoch": 0.5394878981635572, "grad_norm": 0.5838455413324796, "learning_rate": 2.5584752635847526e-05, "loss": 0.7287, "step": 18478 }, { "epoch": 0.5395170943914046, "grad_norm": 0.5413248506195661, "learning_rate": 2.5583130575831305e-05, "loss": 0.6531, "step": 18479 }, { "epoch": 0.539546290619252, "grad_norm": 0.5524554562970159, "learning_rate": 2.558150851581509e-05, "loss": 0.6759, "step": 18480 }, { "epoch": 0.5395754868470993, "grad_norm": 0.507021272115228, "learning_rate": 2.5579886455798868e-05, "loss": 0.5849, "step": 18481 }, { "epoch": 0.5396046830749467, "grad_norm": 0.5522565135399782, "learning_rate": 2.5578264395782647e-05, "loss": 0.6529, "step": 18482 }, { "epoch": 0.539633879302794, "grad_norm": 0.4866402760932236, "learning_rate": 2.5576642335766425e-05, "loss": 0.5574, "step": 18483 }, { "epoch": 0.5396630755306414, "grad_norm": 0.5312983969967593, "learning_rate": 2.5575020275750207e-05, "loss": 0.6315, "step": 18484 }, { "epoch": 0.5396922717584888, "grad_norm": 0.5063487866390907, "learning_rate": 2.5573398215733985e-05, "loss": 0.618, "step": 18485 }, { "epoch": 0.5397214679863361, "grad_norm": 0.5255474520998649, "learning_rate": 2.5571776155717764e-05, "loss": 0.6305, "step": 18486 }, { "epoch": 0.5397506642141835, "grad_norm": 0.4978132694812966, "learning_rate": 2.5570154095701542e-05, "loss": 0.579, "step": 18487 }, { "epoch": 0.5397798604420309, "grad_norm": 0.49130246420253815, "learning_rate": 2.556853203568532e-05, "loss": 0.5476, "step": 18488 }, { "epoch": 0.5398090566698782, "grad_norm": 0.49166949701868945, "learning_rate": 2.5566909975669102e-05, "loss": 0.532, "step": 18489 }, { "epoch": 0.5398382528977256, "grad_norm": 0.5120716716708994, "learning_rate": 2.556528791565288e-05, "loss": 0.6029, "step": 18490 }, { "epoch": 0.5398674491255729, "grad_norm": 0.5105888512923314, "learning_rate": 2.556366585563666e-05, "loss": 0.5861, "step": 18491 }, { "epoch": 0.5398966453534203, "grad_norm": 0.5215584729132176, "learning_rate": 2.5562043795620437e-05, "loss": 0.6048, "step": 18492 }, { "epoch": 0.5399258415812677, "grad_norm": 0.5245737079799186, "learning_rate": 2.556042173560422e-05, "loss": 0.64, "step": 18493 }, { "epoch": 0.539955037809115, "grad_norm": 0.4887117717358548, "learning_rate": 2.5558799675587997e-05, "loss": 0.551, "step": 18494 }, { "epoch": 0.5399842340369624, "grad_norm": 0.5099440867749528, "learning_rate": 2.5557177615571776e-05, "loss": 0.5759, "step": 18495 }, { "epoch": 0.5400134302648097, "grad_norm": 0.5067676885830428, "learning_rate": 2.5555555555555554e-05, "loss": 0.5831, "step": 18496 }, { "epoch": 0.5400426264926571, "grad_norm": 0.5258708981309725, "learning_rate": 2.5553933495539332e-05, "loss": 0.6206, "step": 18497 }, { "epoch": 0.5400718227205045, "grad_norm": 0.5027866753700614, "learning_rate": 2.5552311435523114e-05, "loss": 0.6005, "step": 18498 }, { "epoch": 0.5401010189483518, "grad_norm": 0.4835341689832763, "learning_rate": 2.55506893755069e-05, "loss": 0.5434, "step": 18499 }, { "epoch": 0.5401302151761992, "grad_norm": 0.4709307873817142, "learning_rate": 2.5549067315490678e-05, "loss": 0.4858, "step": 18500 }, { "epoch": 0.5401594114040466, "grad_norm": 0.5430195901908866, "learning_rate": 2.5547445255474456e-05, "loss": 0.6118, "step": 18501 }, { "epoch": 0.5401886076318939, "grad_norm": 0.5566360648184071, "learning_rate": 2.5545823195458234e-05, "loss": 0.6554, "step": 18502 }, { "epoch": 0.5402178038597413, "grad_norm": 0.49928913010528586, "learning_rate": 2.5544201135442013e-05, "loss": 0.5692, "step": 18503 }, { "epoch": 0.5402470000875886, "grad_norm": 0.5084423945735012, "learning_rate": 2.5542579075425795e-05, "loss": 0.5684, "step": 18504 }, { "epoch": 0.540276196315436, "grad_norm": 0.524802088933258, "learning_rate": 2.5540957015409573e-05, "loss": 0.6119, "step": 18505 }, { "epoch": 0.5403053925432834, "grad_norm": 0.5340943703907611, "learning_rate": 2.553933495539335e-05, "loss": 0.6559, "step": 18506 }, { "epoch": 0.5403345887711307, "grad_norm": 0.5085760664274049, "learning_rate": 2.553771289537713e-05, "loss": 0.5932, "step": 18507 }, { "epoch": 0.5403637849989781, "grad_norm": 0.5659767473238406, "learning_rate": 2.5536090835360908e-05, "loss": 0.6732, "step": 18508 }, { "epoch": 0.5403929812268254, "grad_norm": 0.5020489355727065, "learning_rate": 2.553446877534469e-05, "loss": 0.5508, "step": 18509 }, { "epoch": 0.5404221774546728, "grad_norm": 0.5037699149094683, "learning_rate": 2.5532846715328468e-05, "loss": 0.597, "step": 18510 }, { "epoch": 0.5404513736825202, "grad_norm": 0.48110749342975706, "learning_rate": 2.5531224655312247e-05, "loss": 0.5141, "step": 18511 }, { "epoch": 0.5404805699103675, "grad_norm": 0.5709448170787362, "learning_rate": 2.5529602595296025e-05, "loss": 0.6276, "step": 18512 }, { "epoch": 0.540509766138215, "grad_norm": 0.5108375679418677, "learning_rate": 2.5527980535279807e-05, "loss": 0.5758, "step": 18513 }, { "epoch": 0.5405389623660624, "grad_norm": 0.4839747860742113, "learning_rate": 2.5526358475263585e-05, "loss": 0.5273, "step": 18514 }, { "epoch": 0.5405681585939097, "grad_norm": 0.5242663215786213, "learning_rate": 2.5524736415247364e-05, "loss": 0.5302, "step": 18515 }, { "epoch": 0.5405973548217571, "grad_norm": 0.4956201000028396, "learning_rate": 2.5523114355231142e-05, "loss": 0.5465, "step": 18516 }, { "epoch": 0.5406265510496044, "grad_norm": 0.5249406749072473, "learning_rate": 2.552149229521492e-05, "loss": 0.6119, "step": 18517 }, { "epoch": 0.5406557472774518, "grad_norm": 0.6453956406834582, "learning_rate": 2.5519870235198705e-05, "loss": 0.6916, "step": 18518 }, { "epoch": 0.5406849435052992, "grad_norm": 0.5419492981831483, "learning_rate": 2.5518248175182484e-05, "loss": 0.6512, "step": 18519 }, { "epoch": 0.5407141397331465, "grad_norm": 0.5360862110766681, "learning_rate": 2.5516626115166266e-05, "loss": 0.644, "step": 18520 }, { "epoch": 0.5407433359609939, "grad_norm": 0.5130274488280162, "learning_rate": 2.5515004055150044e-05, "loss": 0.6109, "step": 18521 }, { "epoch": 0.5407725321888412, "grad_norm": 0.530713851359182, "learning_rate": 2.5513381995133822e-05, "loss": 0.5938, "step": 18522 }, { "epoch": 0.5408017284166886, "grad_norm": 0.5336054513569387, "learning_rate": 2.55117599351176e-05, "loss": 0.5877, "step": 18523 }, { "epoch": 0.540830924644536, "grad_norm": 0.5153931178228711, "learning_rate": 2.5510137875101382e-05, "loss": 0.5997, "step": 18524 }, { "epoch": 0.5408601208723833, "grad_norm": 0.5483341043025108, "learning_rate": 2.550851581508516e-05, "loss": 0.6764, "step": 18525 }, { "epoch": 0.5408893171002307, "grad_norm": 0.5313441138744037, "learning_rate": 2.550689375506894e-05, "loss": 0.5994, "step": 18526 }, { "epoch": 0.540918513328078, "grad_norm": 0.5594132019229442, "learning_rate": 2.5505271695052718e-05, "loss": 0.6947, "step": 18527 }, { "epoch": 0.5409477095559254, "grad_norm": 0.5410054589419688, "learning_rate": 2.5503649635036496e-05, "loss": 0.6362, "step": 18528 }, { "epoch": 0.5409769057837728, "grad_norm": 0.5575562541294077, "learning_rate": 2.5502027575020278e-05, "loss": 0.6738, "step": 18529 }, { "epoch": 0.5410061020116201, "grad_norm": 0.5325171046332341, "learning_rate": 2.5500405515004056e-05, "loss": 0.6283, "step": 18530 }, { "epoch": 0.5410352982394675, "grad_norm": 0.5037035794927838, "learning_rate": 2.5498783454987834e-05, "loss": 0.5569, "step": 18531 }, { "epoch": 0.5410644944673149, "grad_norm": 0.5449676165099993, "learning_rate": 2.5497161394971613e-05, "loss": 0.6371, "step": 18532 }, { "epoch": 0.5410936906951622, "grad_norm": 0.4671989154780592, "learning_rate": 2.549553933495539e-05, "loss": 0.4982, "step": 18533 }, { "epoch": 0.5411228869230096, "grad_norm": 0.5281397088230371, "learning_rate": 2.5493917274939173e-05, "loss": 0.6317, "step": 18534 }, { "epoch": 0.5411520831508569, "grad_norm": 0.468351420629076, "learning_rate": 2.549229521492295e-05, "loss": 0.5144, "step": 18535 }, { "epoch": 0.5411812793787043, "grad_norm": 0.5275128106006941, "learning_rate": 2.549067315490673e-05, "loss": 0.6044, "step": 18536 }, { "epoch": 0.5412104756065517, "grad_norm": 0.505539961043178, "learning_rate": 2.5489051094890515e-05, "loss": 0.6045, "step": 18537 }, { "epoch": 0.541239671834399, "grad_norm": 0.5535787557309388, "learning_rate": 2.5487429034874293e-05, "loss": 0.6733, "step": 18538 }, { "epoch": 0.5412688680622464, "grad_norm": 0.4751460625313124, "learning_rate": 2.548580697485807e-05, "loss": 0.5457, "step": 18539 }, { "epoch": 0.5412980642900938, "grad_norm": 0.5226620881373679, "learning_rate": 2.5484184914841853e-05, "loss": 0.6141, "step": 18540 }, { "epoch": 0.5413272605179411, "grad_norm": 0.5032337221390211, "learning_rate": 2.5482562854825632e-05, "loss": 0.5719, "step": 18541 }, { "epoch": 0.5413564567457885, "grad_norm": 0.5554661262727355, "learning_rate": 2.548094079480941e-05, "loss": 0.6256, "step": 18542 }, { "epoch": 0.5413856529736358, "grad_norm": 0.5067173393560284, "learning_rate": 2.547931873479319e-05, "loss": 0.5471, "step": 18543 }, { "epoch": 0.5414148492014832, "grad_norm": 0.5094203177275254, "learning_rate": 2.5477696674776967e-05, "loss": 0.5418, "step": 18544 }, { "epoch": 0.5414440454293306, "grad_norm": 0.5303826183459699, "learning_rate": 2.547607461476075e-05, "loss": 0.596, "step": 18545 }, { "epoch": 0.5414732416571779, "grad_norm": 0.5344473719927169, "learning_rate": 2.5474452554744527e-05, "loss": 0.6371, "step": 18546 }, { "epoch": 0.5415024378850253, "grad_norm": 0.5429435317921301, "learning_rate": 2.5472830494728305e-05, "loss": 0.6443, "step": 18547 }, { "epoch": 0.5415316341128726, "grad_norm": 0.501985020019625, "learning_rate": 2.5471208434712084e-05, "loss": 0.5206, "step": 18548 }, { "epoch": 0.54156083034072, "grad_norm": 0.46871656013817653, "learning_rate": 2.5469586374695865e-05, "loss": 0.4779, "step": 18549 }, { "epoch": 0.5415900265685674, "grad_norm": 0.5178258138910012, "learning_rate": 2.5467964314679644e-05, "loss": 0.5597, "step": 18550 }, { "epoch": 0.5416192227964147, "grad_norm": 0.5120598095680619, "learning_rate": 2.5466342254663422e-05, "loss": 0.63, "step": 18551 }, { "epoch": 0.5416484190242621, "grad_norm": 0.5340527722798352, "learning_rate": 2.54647201946472e-05, "loss": 0.6577, "step": 18552 }, { "epoch": 0.5416776152521094, "grad_norm": 0.49423599824357634, "learning_rate": 2.546309813463098e-05, "loss": 0.5521, "step": 18553 }, { "epoch": 0.5417068114799568, "grad_norm": 0.4983473256112293, "learning_rate": 2.546147607461476e-05, "loss": 0.5567, "step": 18554 }, { "epoch": 0.5417360077078042, "grad_norm": 0.4841155514920547, "learning_rate": 2.545985401459854e-05, "loss": 0.5366, "step": 18555 }, { "epoch": 0.5417652039356515, "grad_norm": 0.5292187282910689, "learning_rate": 2.5458231954582324e-05, "loss": 0.6685, "step": 18556 }, { "epoch": 0.5417944001634989, "grad_norm": 0.5690280391711693, "learning_rate": 2.5456609894566103e-05, "loss": 0.6779, "step": 18557 }, { "epoch": 0.5418235963913463, "grad_norm": 0.5160705643717115, "learning_rate": 2.545498783454988e-05, "loss": 0.5973, "step": 18558 }, { "epoch": 0.5418527926191936, "grad_norm": 0.5001759752648258, "learning_rate": 2.545336577453366e-05, "loss": 0.5744, "step": 18559 }, { "epoch": 0.541881988847041, "grad_norm": 0.496407424735569, "learning_rate": 2.545174371451744e-05, "loss": 0.5613, "step": 18560 }, { "epoch": 0.5419111850748883, "grad_norm": 0.5055921454177786, "learning_rate": 2.545012165450122e-05, "loss": 0.5626, "step": 18561 }, { "epoch": 0.5419403813027357, "grad_norm": 0.5670594978392834, "learning_rate": 2.5448499594484998e-05, "loss": 0.7094, "step": 18562 }, { "epoch": 0.5419695775305831, "grad_norm": 0.5209113038721072, "learning_rate": 2.5446877534468776e-05, "loss": 0.5724, "step": 18563 }, { "epoch": 0.5419987737584304, "grad_norm": 0.5390476425455923, "learning_rate": 2.5445255474452555e-05, "loss": 0.6579, "step": 18564 }, { "epoch": 0.5420279699862778, "grad_norm": 0.4967221245990754, "learning_rate": 2.5443633414436336e-05, "loss": 0.5606, "step": 18565 }, { "epoch": 0.5420571662141251, "grad_norm": 0.49762379376944693, "learning_rate": 2.5442011354420115e-05, "loss": 0.5686, "step": 18566 }, { "epoch": 0.5420863624419725, "grad_norm": 0.5563996030149145, "learning_rate": 2.5440389294403893e-05, "loss": 0.693, "step": 18567 }, { "epoch": 0.5421155586698199, "grad_norm": 0.5040741969373203, "learning_rate": 2.543876723438767e-05, "loss": 0.593, "step": 18568 }, { "epoch": 0.5421447548976672, "grad_norm": 0.5519898897993313, "learning_rate": 2.5437145174371453e-05, "loss": 0.6974, "step": 18569 }, { "epoch": 0.5421739511255146, "grad_norm": 0.5118669661551817, "learning_rate": 2.543552311435523e-05, "loss": 0.5844, "step": 18570 }, { "epoch": 0.542203147353362, "grad_norm": 0.5482542243356177, "learning_rate": 2.543390105433901e-05, "loss": 0.6272, "step": 18571 }, { "epoch": 0.5422323435812093, "grad_norm": 0.5130127902931046, "learning_rate": 2.543227899432279e-05, "loss": 0.5571, "step": 18572 }, { "epoch": 0.5422615398090567, "grad_norm": 0.5071008632124784, "learning_rate": 2.5430656934306567e-05, "loss": 0.591, "step": 18573 }, { "epoch": 0.542290736036904, "grad_norm": 0.5018228051840656, "learning_rate": 2.542903487429035e-05, "loss": 0.5742, "step": 18574 }, { "epoch": 0.5423199322647514, "grad_norm": 0.5207739913279028, "learning_rate": 2.542741281427413e-05, "loss": 0.5632, "step": 18575 }, { "epoch": 0.5423491284925988, "grad_norm": 0.5372011911574843, "learning_rate": 2.5425790754257912e-05, "loss": 0.6165, "step": 18576 }, { "epoch": 0.5423783247204461, "grad_norm": 0.5138336743262959, "learning_rate": 2.542416869424169e-05, "loss": 0.5985, "step": 18577 }, { "epoch": 0.5424075209482935, "grad_norm": 0.495891367677082, "learning_rate": 2.542254663422547e-05, "loss": 0.569, "step": 18578 }, { "epoch": 0.5424367171761408, "grad_norm": 0.5309090717773985, "learning_rate": 2.5420924574209247e-05, "loss": 0.6, "step": 18579 }, { "epoch": 0.5424659134039882, "grad_norm": 0.5365767735702403, "learning_rate": 2.541930251419303e-05, "loss": 0.6195, "step": 18580 }, { "epoch": 0.5424951096318356, "grad_norm": 0.5163259353510594, "learning_rate": 2.5417680454176807e-05, "loss": 0.5311, "step": 18581 }, { "epoch": 0.5425243058596829, "grad_norm": 0.5252545989305696, "learning_rate": 2.5416058394160586e-05, "loss": 0.6028, "step": 18582 }, { "epoch": 0.5425535020875303, "grad_norm": 0.5702438769416146, "learning_rate": 2.5414436334144364e-05, "loss": 0.7215, "step": 18583 }, { "epoch": 0.5425826983153776, "grad_norm": 0.49915137809669563, "learning_rate": 2.5412814274128142e-05, "loss": 0.5658, "step": 18584 }, { "epoch": 0.542611894543225, "grad_norm": 0.5495028321282995, "learning_rate": 2.5411192214111924e-05, "loss": 0.6466, "step": 18585 }, { "epoch": 0.5426410907710724, "grad_norm": 0.49208920135390327, "learning_rate": 2.5409570154095703e-05, "loss": 0.554, "step": 18586 }, { "epoch": 0.5426702869989197, "grad_norm": 0.48447362891602036, "learning_rate": 2.540794809407948e-05, "loss": 0.5405, "step": 18587 }, { "epoch": 0.5426994832267671, "grad_norm": 0.5055732684115626, "learning_rate": 2.540632603406326e-05, "loss": 0.5603, "step": 18588 }, { "epoch": 0.5427286794546144, "grad_norm": 0.5188035453806853, "learning_rate": 2.5404703974047038e-05, "loss": 0.6018, "step": 18589 }, { "epoch": 0.5427578756824618, "grad_norm": 0.4885380605696715, "learning_rate": 2.540308191403082e-05, "loss": 0.5288, "step": 18590 }, { "epoch": 0.5427870719103092, "grad_norm": 0.5437733570581269, "learning_rate": 2.5401459854014598e-05, "loss": 0.6596, "step": 18591 }, { "epoch": 0.5428162681381565, "grad_norm": 0.5118446760894337, "learning_rate": 2.5399837793998376e-05, "loss": 0.5964, "step": 18592 }, { "epoch": 0.5428454643660039, "grad_norm": 0.5592702163549259, "learning_rate": 2.5398215733982155e-05, "loss": 0.653, "step": 18593 }, { "epoch": 0.5428746605938513, "grad_norm": 0.5303298990914387, "learning_rate": 2.539659367396594e-05, "loss": 0.5605, "step": 18594 }, { "epoch": 0.5429038568216986, "grad_norm": 0.5330007473577909, "learning_rate": 2.5394971613949718e-05, "loss": 0.6191, "step": 18595 }, { "epoch": 0.542933053049546, "grad_norm": 0.5319741449374144, "learning_rate": 2.53933495539335e-05, "loss": 0.618, "step": 18596 }, { "epoch": 0.5429622492773933, "grad_norm": 0.4744321727285386, "learning_rate": 2.5391727493917278e-05, "loss": 0.5572, "step": 18597 }, { "epoch": 0.5429914455052407, "grad_norm": 0.5148532806570274, "learning_rate": 2.5390105433901057e-05, "loss": 0.5904, "step": 18598 }, { "epoch": 0.5430206417330881, "grad_norm": 0.5408048395878722, "learning_rate": 2.5388483373884835e-05, "loss": 0.6615, "step": 18599 }, { "epoch": 0.5430498379609354, "grad_norm": 0.5390539681722063, "learning_rate": 2.5386861313868617e-05, "loss": 0.5305, "step": 18600 }, { "epoch": 0.5430790341887828, "grad_norm": 0.5315320815686646, "learning_rate": 2.5385239253852395e-05, "loss": 0.5863, "step": 18601 }, { "epoch": 0.5431082304166301, "grad_norm": 0.5159497442811759, "learning_rate": 2.5383617193836174e-05, "loss": 0.5957, "step": 18602 }, { "epoch": 0.5431374266444775, "grad_norm": 0.5153558957211267, "learning_rate": 2.5381995133819952e-05, "loss": 0.5762, "step": 18603 }, { "epoch": 0.5431666228723249, "grad_norm": 0.49854910708827727, "learning_rate": 2.538037307380373e-05, "loss": 0.5629, "step": 18604 }, { "epoch": 0.5431958191001722, "grad_norm": 0.550555737646056, "learning_rate": 2.5378751013787512e-05, "loss": 0.6422, "step": 18605 }, { "epoch": 0.5432250153280196, "grad_norm": 0.5251787667516151, "learning_rate": 2.537712895377129e-05, "loss": 0.5956, "step": 18606 }, { "epoch": 0.543254211555867, "grad_norm": 0.5112016499885127, "learning_rate": 2.537550689375507e-05, "loss": 0.5949, "step": 18607 }, { "epoch": 0.5432834077837143, "grad_norm": 0.5215100196332701, "learning_rate": 2.5373884833738847e-05, "loss": 0.6259, "step": 18608 }, { "epoch": 0.5433126040115617, "grad_norm": 0.558147277785623, "learning_rate": 2.5372262773722626e-05, "loss": 0.676, "step": 18609 }, { "epoch": 0.543341800239409, "grad_norm": 0.5672734982763225, "learning_rate": 2.5370640713706407e-05, "loss": 0.6107, "step": 18610 }, { "epoch": 0.5433709964672564, "grad_norm": 0.5830866563798882, "learning_rate": 2.5369018653690186e-05, "loss": 0.7161, "step": 18611 }, { "epoch": 0.5434001926951038, "grad_norm": 0.5219901518103526, "learning_rate": 2.536739659367397e-05, "loss": 0.5551, "step": 18612 }, { "epoch": 0.5434293889229511, "grad_norm": 0.4915147451807432, "learning_rate": 2.536577453365775e-05, "loss": 0.5286, "step": 18613 }, { "epoch": 0.5434585851507985, "grad_norm": 0.5242949448565816, "learning_rate": 2.5364152473641528e-05, "loss": 0.6144, "step": 18614 }, { "epoch": 0.5434877813786458, "grad_norm": 0.49037124539577065, "learning_rate": 2.5362530413625306e-05, "loss": 0.5367, "step": 18615 }, { "epoch": 0.5435169776064932, "grad_norm": 0.505459138022364, "learning_rate": 2.5360908353609088e-05, "loss": 0.5556, "step": 18616 }, { "epoch": 0.5435461738343406, "grad_norm": 0.4934040121182453, "learning_rate": 2.5359286293592866e-05, "loss": 0.5585, "step": 18617 }, { "epoch": 0.5435753700621879, "grad_norm": 0.5089556261904243, "learning_rate": 2.5357664233576644e-05, "loss": 0.6149, "step": 18618 }, { "epoch": 0.5436045662900353, "grad_norm": 0.5536165990059692, "learning_rate": 2.5356042173560423e-05, "loss": 0.6554, "step": 18619 }, { "epoch": 0.5436337625178826, "grad_norm": 0.5018524356871651, "learning_rate": 2.53544201135442e-05, "loss": 0.5491, "step": 18620 }, { "epoch": 0.54366295874573, "grad_norm": 0.5489564257232725, "learning_rate": 2.5352798053527983e-05, "loss": 0.6818, "step": 18621 }, { "epoch": 0.5436921549735774, "grad_norm": 0.5204400295001386, "learning_rate": 2.535117599351176e-05, "loss": 0.5817, "step": 18622 }, { "epoch": 0.5437213512014247, "grad_norm": 0.5306856604528266, "learning_rate": 2.534955393349554e-05, "loss": 0.6376, "step": 18623 }, { "epoch": 0.5437505474292721, "grad_norm": 0.5146756204828133, "learning_rate": 2.5347931873479318e-05, "loss": 0.5799, "step": 18624 }, { "epoch": 0.5437797436571195, "grad_norm": 0.5540869154915429, "learning_rate": 2.53463098134631e-05, "loss": 0.6879, "step": 18625 }, { "epoch": 0.5438089398849668, "grad_norm": 0.52201149393125, "learning_rate": 2.5344687753446878e-05, "loss": 0.6016, "step": 18626 }, { "epoch": 0.5438381361128142, "grad_norm": 0.5092629244102062, "learning_rate": 2.5343065693430657e-05, "loss": 0.5755, "step": 18627 }, { "epoch": 0.5438673323406615, "grad_norm": 0.5070564195309656, "learning_rate": 2.5341443633414435e-05, "loss": 0.5637, "step": 18628 }, { "epoch": 0.5438965285685089, "grad_norm": 0.5275756537621832, "learning_rate": 2.5339821573398213e-05, "loss": 0.6448, "step": 18629 }, { "epoch": 0.5439257247963563, "grad_norm": 0.5002791726794285, "learning_rate": 2.5338199513381995e-05, "loss": 0.5807, "step": 18630 }, { "epoch": 0.5439549210242036, "grad_norm": 0.5085479511192778, "learning_rate": 2.5336577453365777e-05, "loss": 0.5912, "step": 18631 }, { "epoch": 0.543984117252051, "grad_norm": 0.5461821384087941, "learning_rate": 2.533495539334956e-05, "loss": 0.6907, "step": 18632 }, { "epoch": 0.5440133134798983, "grad_norm": 0.534495205454915, "learning_rate": 2.5333333333333337e-05, "loss": 0.6624, "step": 18633 }, { "epoch": 0.5440425097077458, "grad_norm": 0.5289015700009876, "learning_rate": 2.5331711273317115e-05, "loss": 0.5772, "step": 18634 }, { "epoch": 0.5440717059355932, "grad_norm": 0.5374690783456135, "learning_rate": 2.5330089213300894e-05, "loss": 0.6254, "step": 18635 }, { "epoch": 0.5441009021634405, "grad_norm": 0.5525318852169052, "learning_rate": 2.5328467153284675e-05, "loss": 0.7234, "step": 18636 }, { "epoch": 0.5441300983912879, "grad_norm": 0.48866096403882553, "learning_rate": 2.5326845093268454e-05, "loss": 0.5469, "step": 18637 }, { "epoch": 0.5441592946191353, "grad_norm": 0.5449455488969487, "learning_rate": 2.5325223033252232e-05, "loss": 0.6712, "step": 18638 }, { "epoch": 0.5441884908469826, "grad_norm": 0.5136465033134024, "learning_rate": 2.532360097323601e-05, "loss": 0.5631, "step": 18639 }, { "epoch": 0.54421768707483, "grad_norm": 0.5513555804036844, "learning_rate": 2.532197891321979e-05, "loss": 0.679, "step": 18640 }, { "epoch": 0.5442468833026773, "grad_norm": 0.5495195137685174, "learning_rate": 2.532035685320357e-05, "loss": 0.6731, "step": 18641 }, { "epoch": 0.5442760795305247, "grad_norm": 0.5284899386128066, "learning_rate": 2.531873479318735e-05, "loss": 0.629, "step": 18642 }, { "epoch": 0.5443052757583721, "grad_norm": 0.5088497878854649, "learning_rate": 2.5317112733171128e-05, "loss": 0.5972, "step": 18643 }, { "epoch": 0.5443344719862194, "grad_norm": 0.4814556572831589, "learning_rate": 2.5315490673154906e-05, "loss": 0.5496, "step": 18644 }, { "epoch": 0.5443636682140668, "grad_norm": 0.5423586162439815, "learning_rate": 2.5313868613138688e-05, "loss": 0.6222, "step": 18645 }, { "epoch": 0.5443928644419141, "grad_norm": 0.5102225399466326, "learning_rate": 2.5312246553122466e-05, "loss": 0.5729, "step": 18646 }, { "epoch": 0.5444220606697615, "grad_norm": 0.5544335268756618, "learning_rate": 2.5310624493106244e-05, "loss": 0.6807, "step": 18647 }, { "epoch": 0.5444512568976089, "grad_norm": 0.5276643468645034, "learning_rate": 2.5309002433090023e-05, "loss": 0.6807, "step": 18648 }, { "epoch": 0.5444804531254562, "grad_norm": 0.5384874808260216, "learning_rate": 2.53073803730738e-05, "loss": 0.5984, "step": 18649 }, { "epoch": 0.5445096493533036, "grad_norm": 0.5159608522736979, "learning_rate": 2.5305758313057586e-05, "loss": 0.5659, "step": 18650 }, { "epoch": 0.544538845581151, "grad_norm": 0.5068809452721962, "learning_rate": 2.5304136253041365e-05, "loss": 0.5694, "step": 18651 }, { "epoch": 0.5445680418089983, "grad_norm": 0.4889370895892243, "learning_rate": 2.5302514193025146e-05, "loss": 0.587, "step": 18652 }, { "epoch": 0.5445972380368457, "grad_norm": 0.5063167574589728, "learning_rate": 2.5300892133008925e-05, "loss": 0.5396, "step": 18653 }, { "epoch": 0.544626434264693, "grad_norm": 0.5085586379736264, "learning_rate": 2.5299270072992703e-05, "loss": 0.6006, "step": 18654 }, { "epoch": 0.5446556304925404, "grad_norm": 0.5077006800652722, "learning_rate": 2.529764801297648e-05, "loss": 0.5975, "step": 18655 }, { "epoch": 0.5446848267203878, "grad_norm": 0.49991664540426856, "learning_rate": 2.5296025952960263e-05, "loss": 0.5753, "step": 18656 }, { "epoch": 0.5447140229482351, "grad_norm": 0.5053340505444145, "learning_rate": 2.529440389294404e-05, "loss": 0.5616, "step": 18657 }, { "epoch": 0.5447432191760825, "grad_norm": 0.5356512805864414, "learning_rate": 2.529278183292782e-05, "loss": 0.5972, "step": 18658 }, { "epoch": 0.5447724154039298, "grad_norm": 0.5375698060639243, "learning_rate": 2.52911597729116e-05, "loss": 0.6436, "step": 18659 }, { "epoch": 0.5448016116317772, "grad_norm": 0.5165500629915807, "learning_rate": 2.5289537712895377e-05, "loss": 0.5848, "step": 18660 }, { "epoch": 0.5448308078596246, "grad_norm": 0.5447366510192508, "learning_rate": 2.528791565287916e-05, "loss": 0.6151, "step": 18661 }, { "epoch": 0.5448600040874719, "grad_norm": 0.4507751596789275, "learning_rate": 2.5286293592862937e-05, "loss": 0.4917, "step": 18662 }, { "epoch": 0.5448892003153193, "grad_norm": 0.48914457741855144, "learning_rate": 2.5284671532846715e-05, "loss": 0.5535, "step": 18663 }, { "epoch": 0.5449183965431666, "grad_norm": 0.5228238403022802, "learning_rate": 2.5283049472830494e-05, "loss": 0.635, "step": 18664 }, { "epoch": 0.544947592771014, "grad_norm": 0.5638204311435209, "learning_rate": 2.5281427412814272e-05, "loss": 0.6869, "step": 18665 }, { "epoch": 0.5449767889988614, "grad_norm": 0.5028973911731313, "learning_rate": 2.5279805352798054e-05, "loss": 0.5692, "step": 18666 }, { "epoch": 0.5450059852267087, "grad_norm": 0.5396786934802944, "learning_rate": 2.5278183292781832e-05, "loss": 0.642, "step": 18667 }, { "epoch": 0.5450351814545561, "grad_norm": 0.5395183357391172, "learning_rate": 2.527656123276561e-05, "loss": 0.6637, "step": 18668 }, { "epoch": 0.5450643776824035, "grad_norm": 0.5040958935934218, "learning_rate": 2.5274939172749396e-05, "loss": 0.583, "step": 18669 }, { "epoch": 0.5450935739102508, "grad_norm": 0.5401501300170336, "learning_rate": 2.5273317112733174e-05, "loss": 0.6704, "step": 18670 }, { "epoch": 0.5451227701380982, "grad_norm": 0.49779692248540264, "learning_rate": 2.5271695052716952e-05, "loss": 0.5817, "step": 18671 }, { "epoch": 0.5451519663659455, "grad_norm": 0.4924818937892919, "learning_rate": 2.5270072992700734e-05, "loss": 0.5292, "step": 18672 }, { "epoch": 0.5451811625937929, "grad_norm": 0.541656667882851, "learning_rate": 2.5268450932684513e-05, "loss": 0.6054, "step": 18673 }, { "epoch": 0.5452103588216403, "grad_norm": 0.4943741819821396, "learning_rate": 2.526682887266829e-05, "loss": 0.5548, "step": 18674 }, { "epoch": 0.5452395550494876, "grad_norm": 0.48863282823269016, "learning_rate": 2.526520681265207e-05, "loss": 0.518, "step": 18675 }, { "epoch": 0.545268751277335, "grad_norm": 0.5867611824570748, "learning_rate": 2.5263584752635848e-05, "loss": 0.6292, "step": 18676 }, { "epoch": 0.5452979475051823, "grad_norm": 0.52123403960886, "learning_rate": 2.526196269261963e-05, "loss": 0.6017, "step": 18677 }, { "epoch": 0.5453271437330297, "grad_norm": 0.48557744917230855, "learning_rate": 2.5260340632603408e-05, "loss": 0.5559, "step": 18678 }, { "epoch": 0.5453563399608771, "grad_norm": 0.6034983004160268, "learning_rate": 2.5258718572587186e-05, "loss": 0.6468, "step": 18679 }, { "epoch": 0.5453855361887244, "grad_norm": 0.5352955658967373, "learning_rate": 2.5257096512570965e-05, "loss": 0.6147, "step": 18680 }, { "epoch": 0.5454147324165718, "grad_norm": 0.5412424800206266, "learning_rate": 2.5255474452554746e-05, "loss": 0.7063, "step": 18681 }, { "epoch": 0.5454439286444192, "grad_norm": 0.5946077493050912, "learning_rate": 2.5253852392538525e-05, "loss": 0.6783, "step": 18682 }, { "epoch": 0.5454731248722665, "grad_norm": 0.5454220125549328, "learning_rate": 2.5252230332522303e-05, "loss": 0.658, "step": 18683 }, { "epoch": 0.5455023211001139, "grad_norm": 0.5089056840045975, "learning_rate": 2.525060827250608e-05, "loss": 0.6288, "step": 18684 }, { "epoch": 0.5455315173279612, "grad_norm": 0.5041479029736184, "learning_rate": 2.524898621248986e-05, "loss": 0.5747, "step": 18685 }, { "epoch": 0.5455607135558086, "grad_norm": 0.5205163550207859, "learning_rate": 2.524736415247364e-05, "loss": 0.617, "step": 18686 }, { "epoch": 0.545589909783656, "grad_norm": 0.5326684772814508, "learning_rate": 2.524574209245742e-05, "loss": 0.6689, "step": 18687 }, { "epoch": 0.5456191060115033, "grad_norm": 0.543412551090992, "learning_rate": 2.5244120032441205e-05, "loss": 0.6683, "step": 18688 }, { "epoch": 0.5456483022393507, "grad_norm": 0.5259144885849298, "learning_rate": 2.5242497972424984e-05, "loss": 0.6184, "step": 18689 }, { "epoch": 0.545677498467198, "grad_norm": 0.5717323806054204, "learning_rate": 2.5240875912408762e-05, "loss": 0.6199, "step": 18690 }, { "epoch": 0.5457066946950454, "grad_norm": 0.5311125969722099, "learning_rate": 2.523925385239254e-05, "loss": 0.6115, "step": 18691 }, { "epoch": 0.5457358909228928, "grad_norm": 0.5238514381295013, "learning_rate": 2.5237631792376322e-05, "loss": 0.6046, "step": 18692 }, { "epoch": 0.5457650871507401, "grad_norm": 0.5352424675040879, "learning_rate": 2.52360097323601e-05, "loss": 0.6562, "step": 18693 }, { "epoch": 0.5457942833785875, "grad_norm": 0.508861737112995, "learning_rate": 2.523438767234388e-05, "loss": 0.6072, "step": 18694 }, { "epoch": 0.5458234796064348, "grad_norm": 0.5102612747005901, "learning_rate": 2.5232765612327657e-05, "loss": 0.5891, "step": 18695 }, { "epoch": 0.5458526758342822, "grad_norm": 0.5171676490124786, "learning_rate": 2.5231143552311436e-05, "loss": 0.5758, "step": 18696 }, { "epoch": 0.5458818720621296, "grad_norm": 0.5299805085920233, "learning_rate": 2.5229521492295217e-05, "loss": 0.5954, "step": 18697 }, { "epoch": 0.5459110682899769, "grad_norm": 0.5134555071487539, "learning_rate": 2.5227899432278996e-05, "loss": 0.6362, "step": 18698 }, { "epoch": 0.5459402645178243, "grad_norm": 0.4955201539958716, "learning_rate": 2.5226277372262774e-05, "loss": 0.5852, "step": 18699 }, { "epoch": 0.5459694607456717, "grad_norm": 0.5120757314785181, "learning_rate": 2.5224655312246552e-05, "loss": 0.6076, "step": 18700 }, { "epoch": 0.545998656973519, "grad_norm": 0.5092088767055737, "learning_rate": 2.5223033252230334e-05, "loss": 0.6092, "step": 18701 }, { "epoch": 0.5460278532013664, "grad_norm": 0.5644166256767692, "learning_rate": 2.5221411192214113e-05, "loss": 0.7044, "step": 18702 }, { "epoch": 0.5460570494292137, "grad_norm": 0.5356711902079867, "learning_rate": 2.521978913219789e-05, "loss": 0.634, "step": 18703 }, { "epoch": 0.5460862456570611, "grad_norm": 0.5206131034023859, "learning_rate": 2.521816707218167e-05, "loss": 0.5255, "step": 18704 }, { "epoch": 0.5461154418849085, "grad_norm": 0.5087845030246378, "learning_rate": 2.5216545012165448e-05, "loss": 0.568, "step": 18705 }, { "epoch": 0.5461446381127558, "grad_norm": 0.5112331192566378, "learning_rate": 2.521492295214923e-05, "loss": 0.6122, "step": 18706 }, { "epoch": 0.5461738343406032, "grad_norm": 0.5412441088432901, "learning_rate": 2.521330089213301e-05, "loss": 0.6419, "step": 18707 }, { "epoch": 0.5462030305684505, "grad_norm": 0.5046588909188059, "learning_rate": 2.5211678832116793e-05, "loss": 0.5613, "step": 18708 }, { "epoch": 0.5462322267962979, "grad_norm": 0.5222688922531384, "learning_rate": 2.521005677210057e-05, "loss": 0.6072, "step": 18709 }, { "epoch": 0.5462614230241453, "grad_norm": 0.5338566758907004, "learning_rate": 2.520843471208435e-05, "loss": 0.5767, "step": 18710 }, { "epoch": 0.5462906192519926, "grad_norm": 0.5268737876660086, "learning_rate": 2.5206812652068128e-05, "loss": 0.6318, "step": 18711 }, { "epoch": 0.54631981547984, "grad_norm": 0.5697684627409174, "learning_rate": 2.520519059205191e-05, "loss": 0.6838, "step": 18712 }, { "epoch": 0.5463490117076873, "grad_norm": 0.594695554036917, "learning_rate": 2.5203568532035688e-05, "loss": 0.7749, "step": 18713 }, { "epoch": 0.5463782079355347, "grad_norm": 0.5419250703068201, "learning_rate": 2.5201946472019467e-05, "loss": 0.6095, "step": 18714 }, { "epoch": 0.5464074041633821, "grad_norm": 0.4853123509505192, "learning_rate": 2.5200324412003245e-05, "loss": 0.5356, "step": 18715 }, { "epoch": 0.5464366003912294, "grad_norm": 0.5074678868961573, "learning_rate": 2.5198702351987023e-05, "loss": 0.6054, "step": 18716 }, { "epoch": 0.5464657966190768, "grad_norm": 0.5008067339486033, "learning_rate": 2.5197080291970805e-05, "loss": 0.5555, "step": 18717 }, { "epoch": 0.5464949928469242, "grad_norm": 0.5174270262590747, "learning_rate": 2.5195458231954583e-05, "loss": 0.6031, "step": 18718 }, { "epoch": 0.5465241890747715, "grad_norm": 0.502728190395911, "learning_rate": 2.5193836171938362e-05, "loss": 0.5947, "step": 18719 }, { "epoch": 0.5465533853026189, "grad_norm": 0.5816942285396024, "learning_rate": 2.519221411192214e-05, "loss": 0.7011, "step": 18720 }, { "epoch": 0.5465825815304662, "grad_norm": 0.5501846191188444, "learning_rate": 2.519059205190592e-05, "loss": 0.6954, "step": 18721 }, { "epoch": 0.5466117777583136, "grad_norm": 0.5497204685442532, "learning_rate": 2.51889699918897e-05, "loss": 0.6894, "step": 18722 }, { "epoch": 0.546640973986161, "grad_norm": 0.5096684130254924, "learning_rate": 2.518734793187348e-05, "loss": 0.5773, "step": 18723 }, { "epoch": 0.5466701702140083, "grad_norm": 0.5064496506407143, "learning_rate": 2.5185725871857257e-05, "loss": 0.5774, "step": 18724 }, { "epoch": 0.5466993664418557, "grad_norm": 0.5337121763901382, "learning_rate": 2.5184103811841035e-05, "loss": 0.6431, "step": 18725 }, { "epoch": 0.546728562669703, "grad_norm": 0.5243982771982169, "learning_rate": 2.518248175182482e-05, "loss": 0.604, "step": 18726 }, { "epoch": 0.5467577588975504, "grad_norm": 0.5228006490000672, "learning_rate": 2.51808596918086e-05, "loss": 0.6795, "step": 18727 }, { "epoch": 0.5467869551253978, "grad_norm": 0.6697399264474654, "learning_rate": 2.517923763179238e-05, "loss": 0.6911, "step": 18728 }, { "epoch": 0.5468161513532451, "grad_norm": 0.5173643913459692, "learning_rate": 2.517761557177616e-05, "loss": 0.5862, "step": 18729 }, { "epoch": 0.5468453475810925, "grad_norm": 0.523124591122273, "learning_rate": 2.5175993511759938e-05, "loss": 0.6452, "step": 18730 }, { "epoch": 0.5468745438089399, "grad_norm": 0.488477721138818, "learning_rate": 2.5174371451743716e-05, "loss": 0.5517, "step": 18731 }, { "epoch": 0.5469037400367872, "grad_norm": 0.49223196815823367, "learning_rate": 2.5172749391727494e-05, "loss": 0.5654, "step": 18732 }, { "epoch": 0.5469329362646346, "grad_norm": 0.49484023610026484, "learning_rate": 2.5171127331711276e-05, "loss": 0.6128, "step": 18733 }, { "epoch": 0.5469621324924819, "grad_norm": 0.5579851835101343, "learning_rate": 2.5169505271695054e-05, "loss": 0.6756, "step": 18734 }, { "epoch": 0.5469913287203293, "grad_norm": 0.5525254273382076, "learning_rate": 2.5167883211678833e-05, "loss": 0.6268, "step": 18735 }, { "epoch": 0.5470205249481767, "grad_norm": 0.522889698391751, "learning_rate": 2.516626115166261e-05, "loss": 0.6378, "step": 18736 }, { "epoch": 0.547049721176024, "grad_norm": 0.5179703668229262, "learning_rate": 2.5164639091646393e-05, "loss": 0.6111, "step": 18737 }, { "epoch": 0.5470789174038714, "grad_norm": 0.5917997076331387, "learning_rate": 2.516301703163017e-05, "loss": 0.6447, "step": 18738 }, { "epoch": 0.5471081136317187, "grad_norm": 0.5400626628996736, "learning_rate": 2.516139497161395e-05, "loss": 0.6192, "step": 18739 }, { "epoch": 0.5471373098595661, "grad_norm": 0.5047243095967249, "learning_rate": 2.5159772911597728e-05, "loss": 0.5946, "step": 18740 }, { "epoch": 0.5471665060874135, "grad_norm": 0.5270378636580122, "learning_rate": 2.5158150851581506e-05, "loss": 0.6407, "step": 18741 }, { "epoch": 0.5471957023152608, "grad_norm": 0.5353219173051863, "learning_rate": 2.5156528791565288e-05, "loss": 0.6335, "step": 18742 }, { "epoch": 0.5472248985431082, "grad_norm": 0.5866294118727766, "learning_rate": 2.5154906731549067e-05, "loss": 0.5975, "step": 18743 }, { "epoch": 0.5472540947709555, "grad_norm": 0.5409540989137398, "learning_rate": 2.5153284671532845e-05, "loss": 0.6632, "step": 18744 }, { "epoch": 0.5472832909988029, "grad_norm": 0.6767839411566778, "learning_rate": 2.515166261151663e-05, "loss": 0.646, "step": 18745 }, { "epoch": 0.5473124872266503, "grad_norm": 0.5466149997655068, "learning_rate": 2.515004055150041e-05, "loss": 0.6308, "step": 18746 }, { "epoch": 0.5473416834544976, "grad_norm": 0.5066482774479009, "learning_rate": 2.5148418491484187e-05, "loss": 0.5527, "step": 18747 }, { "epoch": 0.547370879682345, "grad_norm": 0.5277446986768937, "learning_rate": 2.514679643146797e-05, "loss": 0.5927, "step": 18748 }, { "epoch": 0.5474000759101924, "grad_norm": 0.506818615875276, "learning_rate": 2.5145174371451747e-05, "loss": 0.6058, "step": 18749 }, { "epoch": 0.5474292721380397, "grad_norm": 0.5542817436458112, "learning_rate": 2.5143552311435525e-05, "loss": 0.6315, "step": 18750 }, { "epoch": 0.5474584683658871, "grad_norm": 0.5260240335436357, "learning_rate": 2.5141930251419304e-05, "loss": 0.603, "step": 18751 }, { "epoch": 0.5474876645937344, "grad_norm": 0.5430401674150809, "learning_rate": 2.5140308191403082e-05, "loss": 0.6658, "step": 18752 }, { "epoch": 0.5475168608215818, "grad_norm": 0.5594356214153791, "learning_rate": 2.5138686131386864e-05, "loss": 0.7186, "step": 18753 }, { "epoch": 0.5475460570494293, "grad_norm": 0.5688423320299832, "learning_rate": 2.5137064071370642e-05, "loss": 0.6846, "step": 18754 }, { "epoch": 0.5475752532772766, "grad_norm": 0.48363038337667497, "learning_rate": 2.513544201135442e-05, "loss": 0.5519, "step": 18755 }, { "epoch": 0.547604449505124, "grad_norm": 0.499428949981198, "learning_rate": 2.51338199513382e-05, "loss": 0.546, "step": 18756 }, { "epoch": 0.5476336457329714, "grad_norm": 0.5315008492006702, "learning_rate": 2.513219789132198e-05, "loss": 0.6178, "step": 18757 }, { "epoch": 0.5476628419608187, "grad_norm": 0.5065678406707746, "learning_rate": 2.513057583130576e-05, "loss": 0.5847, "step": 18758 }, { "epoch": 0.5476920381886661, "grad_norm": 0.5644822634282566, "learning_rate": 2.5128953771289537e-05, "loss": 0.6922, "step": 18759 }, { "epoch": 0.5477212344165134, "grad_norm": 0.5199625378959335, "learning_rate": 2.5127331711273316e-05, "loss": 0.625, "step": 18760 }, { "epoch": 0.5477504306443608, "grad_norm": 0.525735348174344, "learning_rate": 2.5125709651257094e-05, "loss": 0.6091, "step": 18761 }, { "epoch": 0.5477796268722082, "grad_norm": 0.5652809196892282, "learning_rate": 2.5124087591240876e-05, "loss": 0.7055, "step": 18762 }, { "epoch": 0.5478088231000555, "grad_norm": 0.4911025673725353, "learning_rate": 2.5122465531224658e-05, "loss": 0.5333, "step": 18763 }, { "epoch": 0.5478380193279029, "grad_norm": 0.5074606342127794, "learning_rate": 2.512084347120844e-05, "loss": 0.5643, "step": 18764 }, { "epoch": 0.5478672155557502, "grad_norm": 0.5282123880063775, "learning_rate": 2.5119221411192218e-05, "loss": 0.6261, "step": 18765 }, { "epoch": 0.5478964117835976, "grad_norm": 0.533529079543543, "learning_rate": 2.5117599351175996e-05, "loss": 0.6235, "step": 18766 }, { "epoch": 0.547925608011445, "grad_norm": 0.49481424979014677, "learning_rate": 2.5115977291159775e-05, "loss": 0.5483, "step": 18767 }, { "epoch": 0.5479548042392923, "grad_norm": 0.5384753331064123, "learning_rate": 2.5114355231143556e-05, "loss": 0.6191, "step": 18768 }, { "epoch": 0.5479840004671397, "grad_norm": 0.5444424275819341, "learning_rate": 2.5112733171127335e-05, "loss": 0.6828, "step": 18769 }, { "epoch": 0.548013196694987, "grad_norm": 0.47562385706048643, "learning_rate": 2.5111111111111113e-05, "loss": 0.5513, "step": 18770 }, { "epoch": 0.5480423929228344, "grad_norm": 0.4877657672492754, "learning_rate": 2.510948905109489e-05, "loss": 0.5229, "step": 18771 }, { "epoch": 0.5480715891506818, "grad_norm": 0.5636339110538249, "learning_rate": 2.510786699107867e-05, "loss": 0.6943, "step": 18772 }, { "epoch": 0.5481007853785291, "grad_norm": 0.5280824805932862, "learning_rate": 2.510624493106245e-05, "loss": 0.6547, "step": 18773 }, { "epoch": 0.5481299816063765, "grad_norm": 0.5188169431976524, "learning_rate": 2.510462287104623e-05, "loss": 0.605, "step": 18774 }, { "epoch": 0.5481591778342239, "grad_norm": 0.5646437908652001, "learning_rate": 2.510300081103001e-05, "loss": 0.6458, "step": 18775 }, { "epoch": 0.5481883740620712, "grad_norm": 0.5483079139593404, "learning_rate": 2.5101378751013787e-05, "loss": 0.675, "step": 18776 }, { "epoch": 0.5482175702899186, "grad_norm": 0.5039606052299911, "learning_rate": 2.5099756690997565e-05, "loss": 0.5786, "step": 18777 }, { "epoch": 0.5482467665177659, "grad_norm": 0.5127770537226218, "learning_rate": 2.5098134630981347e-05, "loss": 0.6009, "step": 18778 }, { "epoch": 0.5482759627456133, "grad_norm": 0.5426517658406691, "learning_rate": 2.5096512570965125e-05, "loss": 0.6912, "step": 18779 }, { "epoch": 0.5483051589734607, "grad_norm": 0.5747503103245809, "learning_rate": 2.5094890510948904e-05, "loss": 0.6277, "step": 18780 }, { "epoch": 0.548334355201308, "grad_norm": 0.48968937029581655, "learning_rate": 2.5093268450932682e-05, "loss": 0.5451, "step": 18781 }, { "epoch": 0.5483635514291554, "grad_norm": 0.529154505217855, "learning_rate": 2.5091646390916467e-05, "loss": 0.6778, "step": 18782 }, { "epoch": 0.5483927476570027, "grad_norm": 0.5119244826024661, "learning_rate": 2.5090024330900246e-05, "loss": 0.6134, "step": 18783 }, { "epoch": 0.5484219438848501, "grad_norm": 0.5652832800736335, "learning_rate": 2.5088402270884027e-05, "loss": 0.7344, "step": 18784 }, { "epoch": 0.5484511401126975, "grad_norm": 0.5249724025284541, "learning_rate": 2.5086780210867806e-05, "loss": 0.6236, "step": 18785 }, { "epoch": 0.5484803363405448, "grad_norm": 0.5294162310028997, "learning_rate": 2.5085158150851584e-05, "loss": 0.6087, "step": 18786 }, { "epoch": 0.5485095325683922, "grad_norm": 0.5181665862043464, "learning_rate": 2.5083536090835362e-05, "loss": 0.5976, "step": 18787 }, { "epoch": 0.5485387287962395, "grad_norm": 0.508334744639987, "learning_rate": 2.508191403081914e-05, "loss": 0.6116, "step": 18788 }, { "epoch": 0.5485679250240869, "grad_norm": 0.5411568386775293, "learning_rate": 2.5080291970802923e-05, "loss": 0.6053, "step": 18789 }, { "epoch": 0.5485971212519343, "grad_norm": 0.5482016559201447, "learning_rate": 2.50786699107867e-05, "loss": 0.6417, "step": 18790 }, { "epoch": 0.5486263174797816, "grad_norm": 0.5438144023495869, "learning_rate": 2.507704785077048e-05, "loss": 0.68, "step": 18791 }, { "epoch": 0.548655513707629, "grad_norm": 0.5117276575499097, "learning_rate": 2.5075425790754258e-05, "loss": 0.6097, "step": 18792 }, { "epoch": 0.5486847099354764, "grad_norm": 0.4704178517073164, "learning_rate": 2.507380373073804e-05, "loss": 0.5103, "step": 18793 }, { "epoch": 0.5487139061633237, "grad_norm": 0.5082531009205553, "learning_rate": 2.5072181670721818e-05, "loss": 0.5938, "step": 18794 }, { "epoch": 0.5487431023911711, "grad_norm": 0.5269935186706483, "learning_rate": 2.5070559610705596e-05, "loss": 0.6395, "step": 18795 }, { "epoch": 0.5487722986190184, "grad_norm": 0.4984772153429784, "learning_rate": 2.5068937550689375e-05, "loss": 0.5466, "step": 18796 }, { "epoch": 0.5488014948468658, "grad_norm": 0.5293191297052767, "learning_rate": 2.5067315490673153e-05, "loss": 0.6174, "step": 18797 }, { "epoch": 0.5488306910747132, "grad_norm": 0.5347192172309279, "learning_rate": 2.5065693430656935e-05, "loss": 0.6275, "step": 18798 }, { "epoch": 0.5488598873025605, "grad_norm": 0.48363173411350235, "learning_rate": 2.5064071370640713e-05, "loss": 0.5629, "step": 18799 }, { "epoch": 0.5488890835304079, "grad_norm": 0.5362657845512339, "learning_rate": 2.506244931062449e-05, "loss": 0.6266, "step": 18800 }, { "epoch": 0.5489182797582552, "grad_norm": 0.5644078659521702, "learning_rate": 2.5060827250608277e-05, "loss": 0.6811, "step": 18801 }, { "epoch": 0.5489474759861026, "grad_norm": 0.49662229193993346, "learning_rate": 2.5059205190592055e-05, "loss": 0.543, "step": 18802 }, { "epoch": 0.54897667221395, "grad_norm": 0.5098286778666835, "learning_rate": 2.5057583130575833e-05, "loss": 0.5226, "step": 18803 }, { "epoch": 0.5490058684417973, "grad_norm": 0.5099117436746958, "learning_rate": 2.5055961070559615e-05, "loss": 0.5793, "step": 18804 }, { "epoch": 0.5490350646696447, "grad_norm": 0.5589898413120369, "learning_rate": 2.5054339010543393e-05, "loss": 0.6683, "step": 18805 }, { "epoch": 0.549064260897492, "grad_norm": 0.5360869582822008, "learning_rate": 2.5052716950527172e-05, "loss": 0.5868, "step": 18806 }, { "epoch": 0.5490934571253394, "grad_norm": 0.5179285663110976, "learning_rate": 2.505109489051095e-05, "loss": 0.5829, "step": 18807 }, { "epoch": 0.5491226533531868, "grad_norm": 0.5011320336967728, "learning_rate": 2.504947283049473e-05, "loss": 0.6148, "step": 18808 }, { "epoch": 0.5491518495810341, "grad_norm": 0.5380589296890812, "learning_rate": 2.504785077047851e-05, "loss": 0.6302, "step": 18809 }, { "epoch": 0.5491810458088815, "grad_norm": 0.5418794223813779, "learning_rate": 2.504622871046229e-05, "loss": 0.6231, "step": 18810 }, { "epoch": 0.5492102420367289, "grad_norm": 0.5728363439749504, "learning_rate": 2.5044606650446067e-05, "loss": 0.6861, "step": 18811 }, { "epoch": 0.5492394382645762, "grad_norm": 0.5184085478552835, "learning_rate": 2.5042984590429845e-05, "loss": 0.583, "step": 18812 }, { "epoch": 0.5492686344924236, "grad_norm": 0.5913810522554351, "learning_rate": 2.5041362530413627e-05, "loss": 0.6472, "step": 18813 }, { "epoch": 0.5492978307202709, "grad_norm": 0.5388229837895163, "learning_rate": 2.5039740470397406e-05, "loss": 0.6528, "step": 18814 }, { "epoch": 0.5493270269481183, "grad_norm": 0.5880611109496687, "learning_rate": 2.5038118410381184e-05, "loss": 0.5735, "step": 18815 }, { "epoch": 0.5493562231759657, "grad_norm": 0.5610746268519552, "learning_rate": 2.5036496350364962e-05, "loss": 0.6119, "step": 18816 }, { "epoch": 0.549385419403813, "grad_norm": 0.4963184550275758, "learning_rate": 2.503487429034874e-05, "loss": 0.5294, "step": 18817 }, { "epoch": 0.5494146156316604, "grad_norm": 0.5716213622953363, "learning_rate": 2.5033252230332523e-05, "loss": 0.6865, "step": 18818 }, { "epoch": 0.5494438118595077, "grad_norm": 0.5186469939776646, "learning_rate": 2.50316301703163e-05, "loss": 0.6418, "step": 18819 }, { "epoch": 0.5494730080873551, "grad_norm": 0.5334885763826253, "learning_rate": 2.5030008110300086e-05, "loss": 0.675, "step": 18820 }, { "epoch": 0.5495022043152025, "grad_norm": 0.5225117610091495, "learning_rate": 2.5028386050283864e-05, "loss": 0.6179, "step": 18821 }, { "epoch": 0.5495314005430498, "grad_norm": 0.5125155582116756, "learning_rate": 2.5026763990267643e-05, "loss": 0.5899, "step": 18822 }, { "epoch": 0.5495605967708972, "grad_norm": 0.526959170109276, "learning_rate": 2.502514193025142e-05, "loss": 0.602, "step": 18823 }, { "epoch": 0.5495897929987446, "grad_norm": 0.5437333142982123, "learning_rate": 2.5023519870235203e-05, "loss": 0.6334, "step": 18824 }, { "epoch": 0.5496189892265919, "grad_norm": 0.5453453453502751, "learning_rate": 2.502189781021898e-05, "loss": 0.6239, "step": 18825 }, { "epoch": 0.5496481854544393, "grad_norm": 0.5254836975007562, "learning_rate": 2.502027575020276e-05, "loss": 0.625, "step": 18826 }, { "epoch": 0.5496773816822866, "grad_norm": 0.5252032986035505, "learning_rate": 2.5018653690186538e-05, "loss": 0.6679, "step": 18827 }, { "epoch": 0.549706577910134, "grad_norm": 0.49369828890688083, "learning_rate": 2.5017031630170316e-05, "loss": 0.5822, "step": 18828 }, { "epoch": 0.5497357741379814, "grad_norm": 0.5373587045195934, "learning_rate": 2.5015409570154098e-05, "loss": 0.664, "step": 18829 }, { "epoch": 0.5497649703658287, "grad_norm": 0.5207146902032206, "learning_rate": 2.5013787510137877e-05, "loss": 0.5932, "step": 18830 }, { "epoch": 0.5497941665936761, "grad_norm": 0.4981800257900891, "learning_rate": 2.5012165450121655e-05, "loss": 0.5567, "step": 18831 }, { "epoch": 0.5498233628215234, "grad_norm": 0.492007616534058, "learning_rate": 2.5010543390105433e-05, "loss": 0.5712, "step": 18832 }, { "epoch": 0.5498525590493708, "grad_norm": 0.543815214988419, "learning_rate": 2.500892133008921e-05, "loss": 0.6533, "step": 18833 }, { "epoch": 0.5498817552772182, "grad_norm": 0.524960583029492, "learning_rate": 2.5007299270072993e-05, "loss": 0.5474, "step": 18834 }, { "epoch": 0.5499109515050655, "grad_norm": 0.5269947665607255, "learning_rate": 2.5005677210056772e-05, "loss": 0.6181, "step": 18835 }, { "epoch": 0.5499401477329129, "grad_norm": 0.5194799776285366, "learning_rate": 2.500405515004055e-05, "loss": 0.5634, "step": 18836 }, { "epoch": 0.5499693439607602, "grad_norm": 0.5600934323060042, "learning_rate": 2.500243309002433e-05, "loss": 0.6289, "step": 18837 }, { "epoch": 0.5499985401886076, "grad_norm": 0.5222106416590507, "learning_rate": 2.500081103000811e-05, "loss": 0.6404, "step": 18838 }, { "epoch": 0.550027736416455, "grad_norm": 0.5233253868387031, "learning_rate": 2.4999188969991892e-05, "loss": 0.6, "step": 18839 }, { "epoch": 0.5500569326443023, "grad_norm": 0.5284046494560501, "learning_rate": 2.499756690997567e-05, "loss": 0.5942, "step": 18840 }, { "epoch": 0.5500861288721497, "grad_norm": 0.5128359397829251, "learning_rate": 2.499594484995945e-05, "loss": 0.6176, "step": 18841 }, { "epoch": 0.550115325099997, "grad_norm": 0.5374215618630893, "learning_rate": 2.4994322789943227e-05, "loss": 0.64, "step": 18842 }, { "epoch": 0.5501445213278444, "grad_norm": 0.5499070789092703, "learning_rate": 2.4992700729927006e-05, "loss": 0.6505, "step": 18843 }, { "epoch": 0.5501737175556918, "grad_norm": 0.5441486771595173, "learning_rate": 2.499107866991079e-05, "loss": 0.6454, "step": 18844 }, { "epoch": 0.5502029137835391, "grad_norm": 0.529476929542022, "learning_rate": 2.498945660989457e-05, "loss": 0.5801, "step": 18845 }, { "epoch": 0.5502321100113865, "grad_norm": 0.5178578539707325, "learning_rate": 2.4987834549878347e-05, "loss": 0.6005, "step": 18846 }, { "epoch": 0.5502613062392339, "grad_norm": 0.5195519852645623, "learning_rate": 2.4986212489862126e-05, "loss": 0.5983, "step": 18847 }, { "epoch": 0.5502905024670812, "grad_norm": 0.5175200075682221, "learning_rate": 2.4984590429845904e-05, "loss": 0.6104, "step": 18848 }, { "epoch": 0.5503196986949286, "grad_norm": 0.5292327575858315, "learning_rate": 2.4982968369829686e-05, "loss": 0.6148, "step": 18849 }, { "epoch": 0.5503488949227759, "grad_norm": 0.511170932377042, "learning_rate": 2.4981346309813464e-05, "loss": 0.58, "step": 18850 }, { "epoch": 0.5503780911506233, "grad_norm": 0.543296886679857, "learning_rate": 2.4979724249797243e-05, "loss": 0.6531, "step": 18851 }, { "epoch": 0.5504072873784707, "grad_norm": 0.6330793908086884, "learning_rate": 2.497810218978102e-05, "loss": 0.6304, "step": 18852 }, { "epoch": 0.550436483606318, "grad_norm": 0.5453734808455244, "learning_rate": 2.4976480129764803e-05, "loss": 0.6753, "step": 18853 }, { "epoch": 0.5504656798341654, "grad_norm": 0.5355590500738705, "learning_rate": 2.497485806974858e-05, "loss": 0.6179, "step": 18854 }, { "epoch": 0.5504948760620128, "grad_norm": 0.5042343079554061, "learning_rate": 2.4973236009732363e-05, "loss": 0.5961, "step": 18855 }, { "epoch": 0.5505240722898601, "grad_norm": 0.49325016579463105, "learning_rate": 2.497161394971614e-05, "loss": 0.5877, "step": 18856 }, { "epoch": 0.5505532685177075, "grad_norm": 0.5518301562996561, "learning_rate": 2.496999188969992e-05, "loss": 0.664, "step": 18857 }, { "epoch": 0.5505824647455548, "grad_norm": 0.5430642742089613, "learning_rate": 2.4968369829683698e-05, "loss": 0.6455, "step": 18858 }, { "epoch": 0.5506116609734022, "grad_norm": 0.5454895553887441, "learning_rate": 2.496674776966748e-05, "loss": 0.6029, "step": 18859 }, { "epoch": 0.5506408572012496, "grad_norm": 0.5044526276114494, "learning_rate": 2.4965125709651258e-05, "loss": 0.5971, "step": 18860 }, { "epoch": 0.5506700534290969, "grad_norm": 0.47448141762470886, "learning_rate": 2.4963503649635037e-05, "loss": 0.4902, "step": 18861 }, { "epoch": 0.5506992496569443, "grad_norm": 0.5629487568865947, "learning_rate": 2.4961881589618815e-05, "loss": 0.6733, "step": 18862 }, { "epoch": 0.5507284458847916, "grad_norm": 0.5826986046478767, "learning_rate": 2.4960259529602597e-05, "loss": 0.735, "step": 18863 }, { "epoch": 0.550757642112639, "grad_norm": 0.5617271777296423, "learning_rate": 2.4958637469586375e-05, "loss": 0.6955, "step": 18864 }, { "epoch": 0.5507868383404864, "grad_norm": 0.5562155268509434, "learning_rate": 2.4957015409570157e-05, "loss": 0.6664, "step": 18865 }, { "epoch": 0.5508160345683337, "grad_norm": 0.538211399522691, "learning_rate": 2.4955393349553935e-05, "loss": 0.6232, "step": 18866 }, { "epoch": 0.5508452307961811, "grad_norm": 0.5349874094920303, "learning_rate": 2.4953771289537714e-05, "loss": 0.6367, "step": 18867 }, { "epoch": 0.5508744270240284, "grad_norm": 0.5671351718288413, "learning_rate": 2.4952149229521492e-05, "loss": 0.6478, "step": 18868 }, { "epoch": 0.5509036232518758, "grad_norm": 0.5195585988394655, "learning_rate": 2.4950527169505274e-05, "loss": 0.635, "step": 18869 }, { "epoch": 0.5509328194797232, "grad_norm": 0.5012123919176996, "learning_rate": 2.4948905109489052e-05, "loss": 0.5613, "step": 18870 }, { "epoch": 0.5509620157075705, "grad_norm": 0.5096042912670739, "learning_rate": 2.494728304947283e-05, "loss": 0.5321, "step": 18871 }, { "epoch": 0.5509912119354179, "grad_norm": 0.5050311334010685, "learning_rate": 2.4945660989456612e-05, "loss": 0.5337, "step": 18872 }, { "epoch": 0.5510204081632653, "grad_norm": 0.5603270723204321, "learning_rate": 2.494403892944039e-05, "loss": 0.6081, "step": 18873 }, { "epoch": 0.5510496043911126, "grad_norm": 0.5466177829932058, "learning_rate": 2.494241686942417e-05, "loss": 0.5788, "step": 18874 }, { "epoch": 0.5510788006189601, "grad_norm": 0.48557795577208884, "learning_rate": 2.494079480940795e-05, "loss": 0.5401, "step": 18875 }, { "epoch": 0.5511079968468074, "grad_norm": 0.5087134477999861, "learning_rate": 2.493917274939173e-05, "loss": 0.5833, "step": 18876 }, { "epoch": 0.5511371930746548, "grad_norm": 0.5074116236938143, "learning_rate": 2.4937550689375508e-05, "loss": 0.6081, "step": 18877 }, { "epoch": 0.5511663893025022, "grad_norm": 0.6553686400843595, "learning_rate": 2.4935928629359286e-05, "loss": 0.7218, "step": 18878 }, { "epoch": 0.5511955855303495, "grad_norm": 0.5618601854919075, "learning_rate": 2.4934306569343068e-05, "loss": 0.6925, "step": 18879 }, { "epoch": 0.5512247817581969, "grad_norm": 0.520294912597335, "learning_rate": 2.4932684509326846e-05, "loss": 0.5979, "step": 18880 }, { "epoch": 0.5512539779860443, "grad_norm": 0.5252920881620095, "learning_rate": 2.4931062449310628e-05, "loss": 0.646, "step": 18881 }, { "epoch": 0.5512831742138916, "grad_norm": 0.529389615600659, "learning_rate": 2.4929440389294406e-05, "loss": 0.6281, "step": 18882 }, { "epoch": 0.551312370441739, "grad_norm": 0.5758067876861677, "learning_rate": 2.4927818329278185e-05, "loss": 0.7109, "step": 18883 }, { "epoch": 0.5513415666695863, "grad_norm": 0.592426219135878, "learning_rate": 2.4926196269261963e-05, "loss": 0.7097, "step": 18884 }, { "epoch": 0.5513707628974337, "grad_norm": 0.5234994781258903, "learning_rate": 2.4924574209245745e-05, "loss": 0.619, "step": 18885 }, { "epoch": 0.5513999591252811, "grad_norm": 0.538582915746504, "learning_rate": 2.4922952149229523e-05, "loss": 0.6581, "step": 18886 }, { "epoch": 0.5514291553531284, "grad_norm": 0.4984640795947139, "learning_rate": 2.49213300892133e-05, "loss": 0.6031, "step": 18887 }, { "epoch": 0.5514583515809758, "grad_norm": 0.49345979048021704, "learning_rate": 2.491970802919708e-05, "loss": 0.5727, "step": 18888 }, { "epoch": 0.5514875478088231, "grad_norm": 0.48729822257903366, "learning_rate": 2.491808596918086e-05, "loss": 0.5384, "step": 18889 }, { "epoch": 0.5515167440366705, "grad_norm": 0.4624361513699023, "learning_rate": 2.491646390916464e-05, "loss": 0.5167, "step": 18890 }, { "epoch": 0.5515459402645179, "grad_norm": 0.5559263499641797, "learning_rate": 2.4914841849148422e-05, "loss": 0.6505, "step": 18891 }, { "epoch": 0.5515751364923652, "grad_norm": 0.5314272701896176, "learning_rate": 2.49132197891322e-05, "loss": 0.5765, "step": 18892 }, { "epoch": 0.5516043327202126, "grad_norm": 0.5298068360792078, "learning_rate": 2.491159772911598e-05, "loss": 0.6355, "step": 18893 }, { "epoch": 0.55163352894806, "grad_norm": 0.48932096164734284, "learning_rate": 2.4909975669099757e-05, "loss": 0.5392, "step": 18894 }, { "epoch": 0.5516627251759073, "grad_norm": 0.5271905971586799, "learning_rate": 2.490835360908354e-05, "loss": 0.5709, "step": 18895 }, { "epoch": 0.5516919214037547, "grad_norm": 0.5534219598969495, "learning_rate": 2.4906731549067317e-05, "loss": 0.6657, "step": 18896 }, { "epoch": 0.551721117631602, "grad_norm": 0.5218797417745819, "learning_rate": 2.4905109489051095e-05, "loss": 0.5981, "step": 18897 }, { "epoch": 0.5517503138594494, "grad_norm": 0.5086409174035201, "learning_rate": 2.4903487429034874e-05, "loss": 0.6157, "step": 18898 }, { "epoch": 0.5517795100872968, "grad_norm": 0.5579080370261531, "learning_rate": 2.4901865369018652e-05, "loss": 0.6583, "step": 18899 }, { "epoch": 0.5518087063151441, "grad_norm": 0.4909835847891245, "learning_rate": 2.4900243309002437e-05, "loss": 0.5722, "step": 18900 }, { "epoch": 0.5518379025429915, "grad_norm": 0.5345434066995433, "learning_rate": 2.4898621248986216e-05, "loss": 0.6528, "step": 18901 }, { "epoch": 0.5518670987708388, "grad_norm": 0.49264872140714067, "learning_rate": 2.4896999188969994e-05, "loss": 0.5381, "step": 18902 }, { "epoch": 0.5518962949986862, "grad_norm": 0.5074734431687339, "learning_rate": 2.4895377128953772e-05, "loss": 0.6074, "step": 18903 }, { "epoch": 0.5519254912265336, "grad_norm": 0.5615768886643056, "learning_rate": 2.489375506893755e-05, "loss": 0.6764, "step": 18904 }, { "epoch": 0.5519546874543809, "grad_norm": 0.519628947039872, "learning_rate": 2.4892133008921333e-05, "loss": 0.6035, "step": 18905 }, { "epoch": 0.5519838836822283, "grad_norm": 0.5476360217763466, "learning_rate": 2.489051094890511e-05, "loss": 0.6482, "step": 18906 }, { "epoch": 0.5520130799100756, "grad_norm": 0.5217122990030479, "learning_rate": 2.488888888888889e-05, "loss": 0.6222, "step": 18907 }, { "epoch": 0.552042276137923, "grad_norm": 0.5244708942243079, "learning_rate": 2.4887266828872668e-05, "loss": 0.5832, "step": 18908 }, { "epoch": 0.5520714723657704, "grad_norm": 0.5356843457767192, "learning_rate": 2.4885644768856446e-05, "loss": 0.5582, "step": 18909 }, { "epoch": 0.5521006685936177, "grad_norm": 0.6290355041162784, "learning_rate": 2.4884022708840228e-05, "loss": 0.5814, "step": 18910 }, { "epoch": 0.5521298648214651, "grad_norm": 0.49772355907309757, "learning_rate": 2.488240064882401e-05, "loss": 0.575, "step": 18911 }, { "epoch": 0.5521590610493124, "grad_norm": 0.5173052424600735, "learning_rate": 2.4880778588807788e-05, "loss": 0.5378, "step": 18912 }, { "epoch": 0.5521882572771598, "grad_norm": 0.47124284276337025, "learning_rate": 2.4879156528791566e-05, "loss": 0.5154, "step": 18913 }, { "epoch": 0.5522174535050072, "grad_norm": 0.5377187704154435, "learning_rate": 2.4877534468775345e-05, "loss": 0.6012, "step": 18914 }, { "epoch": 0.5522466497328545, "grad_norm": 0.5448667841482735, "learning_rate": 2.4875912408759126e-05, "loss": 0.5737, "step": 18915 }, { "epoch": 0.5522758459607019, "grad_norm": 0.5507655787604322, "learning_rate": 2.4874290348742905e-05, "loss": 0.631, "step": 18916 }, { "epoch": 0.5523050421885493, "grad_norm": 0.5109643293629647, "learning_rate": 2.4872668288726683e-05, "loss": 0.5743, "step": 18917 }, { "epoch": 0.5523342384163966, "grad_norm": 0.5824096136685599, "learning_rate": 2.487104622871046e-05, "loss": 0.701, "step": 18918 }, { "epoch": 0.552363434644244, "grad_norm": 0.511245538630933, "learning_rate": 2.4869424168694243e-05, "loss": 0.5772, "step": 18919 }, { "epoch": 0.5523926308720913, "grad_norm": 0.5574068070912488, "learning_rate": 2.486780210867802e-05, "loss": 0.6038, "step": 18920 }, { "epoch": 0.5524218270999387, "grad_norm": 0.5149310115557765, "learning_rate": 2.4866180048661803e-05, "loss": 0.595, "step": 18921 }, { "epoch": 0.5524510233277861, "grad_norm": 0.5029720974816894, "learning_rate": 2.4864557988645582e-05, "loss": 0.6221, "step": 18922 }, { "epoch": 0.5524802195556334, "grad_norm": 0.5706763958994273, "learning_rate": 2.486293592862936e-05, "loss": 0.6616, "step": 18923 }, { "epoch": 0.5525094157834808, "grad_norm": 0.49785985023163115, "learning_rate": 2.486131386861314e-05, "loss": 0.5606, "step": 18924 }, { "epoch": 0.5525386120113281, "grad_norm": 0.5081929561570093, "learning_rate": 2.485969180859692e-05, "loss": 0.5966, "step": 18925 }, { "epoch": 0.5525678082391755, "grad_norm": 0.5126567466120904, "learning_rate": 2.48580697485807e-05, "loss": 0.5849, "step": 18926 }, { "epoch": 0.5525970044670229, "grad_norm": 0.5427142766291528, "learning_rate": 2.4856447688564477e-05, "loss": 0.6499, "step": 18927 }, { "epoch": 0.5526262006948702, "grad_norm": 0.5241689242544219, "learning_rate": 2.4854825628548255e-05, "loss": 0.5645, "step": 18928 }, { "epoch": 0.5526553969227176, "grad_norm": 0.5168235443223448, "learning_rate": 2.4853203568532037e-05, "loss": 0.5915, "step": 18929 }, { "epoch": 0.552684593150565, "grad_norm": 0.5492989101077494, "learning_rate": 2.4851581508515816e-05, "loss": 0.6779, "step": 18930 }, { "epoch": 0.5527137893784123, "grad_norm": 0.4704257858862616, "learning_rate": 2.4849959448499597e-05, "loss": 0.5144, "step": 18931 }, { "epoch": 0.5527429856062597, "grad_norm": 0.5605784456076445, "learning_rate": 2.4848337388483376e-05, "loss": 0.6844, "step": 18932 }, { "epoch": 0.552772181834107, "grad_norm": 0.5296262524302067, "learning_rate": 2.4846715328467154e-05, "loss": 0.6285, "step": 18933 }, { "epoch": 0.5528013780619544, "grad_norm": 0.5269349809330113, "learning_rate": 2.4845093268450932e-05, "loss": 0.584, "step": 18934 }, { "epoch": 0.5528305742898018, "grad_norm": 0.5684606340888142, "learning_rate": 2.4843471208434714e-05, "loss": 0.6565, "step": 18935 }, { "epoch": 0.5528597705176491, "grad_norm": 0.5071202463276133, "learning_rate": 2.4841849148418493e-05, "loss": 0.5459, "step": 18936 }, { "epoch": 0.5528889667454965, "grad_norm": 0.515752139388953, "learning_rate": 2.484022708840227e-05, "loss": 0.6233, "step": 18937 }, { "epoch": 0.5529181629733438, "grad_norm": 0.5022775030581693, "learning_rate": 2.4838605028386053e-05, "loss": 0.5717, "step": 18938 }, { "epoch": 0.5529473592011912, "grad_norm": 0.5285096370784693, "learning_rate": 2.483698296836983e-05, "loss": 0.6358, "step": 18939 }, { "epoch": 0.5529765554290386, "grad_norm": 0.49546102319847934, "learning_rate": 2.483536090835361e-05, "loss": 0.5314, "step": 18940 }, { "epoch": 0.5530057516568859, "grad_norm": 0.4953341720371291, "learning_rate": 2.483373884833739e-05, "loss": 0.5283, "step": 18941 }, { "epoch": 0.5530349478847333, "grad_norm": 0.5555169921689109, "learning_rate": 2.483211678832117e-05, "loss": 0.6881, "step": 18942 }, { "epoch": 0.5530641441125806, "grad_norm": 0.5002402312943439, "learning_rate": 2.4830494728304948e-05, "loss": 0.5735, "step": 18943 }, { "epoch": 0.553093340340428, "grad_norm": 0.49038783163940924, "learning_rate": 2.4828872668288726e-05, "loss": 0.5474, "step": 18944 }, { "epoch": 0.5531225365682754, "grad_norm": 0.5324873470221693, "learning_rate": 2.4827250608272508e-05, "loss": 0.6308, "step": 18945 }, { "epoch": 0.5531517327961227, "grad_norm": 0.5458421237483299, "learning_rate": 2.4825628548256287e-05, "loss": 0.6145, "step": 18946 }, { "epoch": 0.5531809290239701, "grad_norm": 0.4932214925471821, "learning_rate": 2.4824006488240065e-05, "loss": 0.5505, "step": 18947 }, { "epoch": 0.5532101252518175, "grad_norm": 0.5446005810854743, "learning_rate": 2.4822384428223847e-05, "loss": 0.6199, "step": 18948 }, { "epoch": 0.5532393214796648, "grad_norm": 0.5445195859104793, "learning_rate": 2.4820762368207625e-05, "loss": 0.6895, "step": 18949 }, { "epoch": 0.5532685177075122, "grad_norm": 0.5228753406104756, "learning_rate": 2.4819140308191403e-05, "loss": 0.6252, "step": 18950 }, { "epoch": 0.5532977139353595, "grad_norm": 0.5089478021876537, "learning_rate": 2.4817518248175185e-05, "loss": 0.5117, "step": 18951 }, { "epoch": 0.5533269101632069, "grad_norm": 0.5407592470734551, "learning_rate": 2.4815896188158964e-05, "loss": 0.6356, "step": 18952 }, { "epoch": 0.5533561063910543, "grad_norm": 0.5681818790770116, "learning_rate": 2.4814274128142742e-05, "loss": 0.7074, "step": 18953 }, { "epoch": 0.5533853026189016, "grad_norm": 0.4992521141169303, "learning_rate": 2.481265206812652e-05, "loss": 0.572, "step": 18954 }, { "epoch": 0.553414498846749, "grad_norm": 0.5199566102305456, "learning_rate": 2.48110300081103e-05, "loss": 0.6146, "step": 18955 }, { "epoch": 0.5534436950745963, "grad_norm": 0.5244442030429584, "learning_rate": 2.480940794809408e-05, "loss": 0.5532, "step": 18956 }, { "epoch": 0.5534728913024437, "grad_norm": 0.5554508999199677, "learning_rate": 2.4807785888077862e-05, "loss": 0.6745, "step": 18957 }, { "epoch": 0.5535020875302911, "grad_norm": 0.5416154472296957, "learning_rate": 2.480616382806164e-05, "loss": 0.5974, "step": 18958 }, { "epoch": 0.5535312837581384, "grad_norm": 0.5091411853256712, "learning_rate": 2.480454176804542e-05, "loss": 0.5969, "step": 18959 }, { "epoch": 0.5535604799859858, "grad_norm": 0.5523235444214987, "learning_rate": 2.4802919708029197e-05, "loss": 0.6366, "step": 18960 }, { "epoch": 0.5535896762138331, "grad_norm": 0.5533930830863322, "learning_rate": 2.480129764801298e-05, "loss": 0.6791, "step": 18961 }, { "epoch": 0.5536188724416805, "grad_norm": 0.5617507064099639, "learning_rate": 2.4799675587996757e-05, "loss": 0.645, "step": 18962 }, { "epoch": 0.5536480686695279, "grad_norm": 0.5126551020869853, "learning_rate": 2.4798053527980536e-05, "loss": 0.5759, "step": 18963 }, { "epoch": 0.5536772648973752, "grad_norm": 0.47765910185440874, "learning_rate": 2.4796431467964314e-05, "loss": 0.5516, "step": 18964 }, { "epoch": 0.5537064611252226, "grad_norm": 0.5185923475241949, "learning_rate": 2.4794809407948093e-05, "loss": 0.6049, "step": 18965 }, { "epoch": 0.55373565735307, "grad_norm": 0.5197035705643647, "learning_rate": 2.4793187347931878e-05, "loss": 0.556, "step": 18966 }, { "epoch": 0.5537648535809173, "grad_norm": 0.5503872242279055, "learning_rate": 2.4791565287915656e-05, "loss": 0.6442, "step": 18967 }, { "epoch": 0.5537940498087647, "grad_norm": 0.5488970545026606, "learning_rate": 2.4789943227899434e-05, "loss": 0.6575, "step": 18968 }, { "epoch": 0.553823246036612, "grad_norm": 0.5097130169685987, "learning_rate": 2.4788321167883213e-05, "loss": 0.5849, "step": 18969 }, { "epoch": 0.5538524422644594, "grad_norm": 0.5085916439551267, "learning_rate": 2.478669910786699e-05, "loss": 0.5485, "step": 18970 }, { "epoch": 0.5538816384923068, "grad_norm": 0.5010069581448782, "learning_rate": 2.4785077047850773e-05, "loss": 0.5792, "step": 18971 }, { "epoch": 0.5539108347201541, "grad_norm": 0.5203246452852359, "learning_rate": 2.478345498783455e-05, "loss": 0.6244, "step": 18972 }, { "epoch": 0.5539400309480015, "grad_norm": 0.5001033634567575, "learning_rate": 2.478183292781833e-05, "loss": 0.5418, "step": 18973 }, { "epoch": 0.5539692271758488, "grad_norm": 0.4698747742822327, "learning_rate": 2.4780210867802108e-05, "loss": 0.5134, "step": 18974 }, { "epoch": 0.5539984234036962, "grad_norm": 0.5450988309335185, "learning_rate": 2.4778588807785886e-05, "loss": 0.711, "step": 18975 }, { "epoch": 0.5540276196315436, "grad_norm": 0.49981539513379597, "learning_rate": 2.4776966747769668e-05, "loss": 0.5902, "step": 18976 }, { "epoch": 0.5540568158593909, "grad_norm": 0.5421815848569732, "learning_rate": 2.477534468775345e-05, "loss": 0.6412, "step": 18977 }, { "epoch": 0.5540860120872383, "grad_norm": 0.5189380171621213, "learning_rate": 2.477372262773723e-05, "loss": 0.5841, "step": 18978 }, { "epoch": 0.5541152083150856, "grad_norm": 0.7693538905091127, "learning_rate": 2.4772100567721007e-05, "loss": 0.7248, "step": 18979 }, { "epoch": 0.554144404542933, "grad_norm": 0.5513436070652422, "learning_rate": 2.4770478507704785e-05, "loss": 0.6245, "step": 18980 }, { "epoch": 0.5541736007707804, "grad_norm": 0.5382057396681069, "learning_rate": 2.4768856447688567e-05, "loss": 0.6205, "step": 18981 }, { "epoch": 0.5542027969986277, "grad_norm": 0.5225100255899946, "learning_rate": 2.4767234387672345e-05, "loss": 0.6212, "step": 18982 }, { "epoch": 0.5542319932264751, "grad_norm": 0.5341208539138502, "learning_rate": 2.4765612327656124e-05, "loss": 0.6115, "step": 18983 }, { "epoch": 0.5542611894543225, "grad_norm": 0.5422881155546688, "learning_rate": 2.4763990267639902e-05, "loss": 0.6429, "step": 18984 }, { "epoch": 0.5542903856821698, "grad_norm": 0.5322720404821691, "learning_rate": 2.4762368207623684e-05, "loss": 0.5953, "step": 18985 }, { "epoch": 0.5543195819100172, "grad_norm": 0.5714112812258025, "learning_rate": 2.4760746147607462e-05, "loss": 0.7187, "step": 18986 }, { "epoch": 0.5543487781378645, "grad_norm": 0.4962539392989973, "learning_rate": 2.4759124087591244e-05, "loss": 0.5517, "step": 18987 }, { "epoch": 0.5543779743657119, "grad_norm": 0.5210704590551124, "learning_rate": 2.4757502027575022e-05, "loss": 0.6269, "step": 18988 }, { "epoch": 0.5544071705935593, "grad_norm": 0.5588791406097638, "learning_rate": 2.47558799675588e-05, "loss": 0.6527, "step": 18989 }, { "epoch": 0.5544363668214066, "grad_norm": 0.5410154140437192, "learning_rate": 2.475425790754258e-05, "loss": 0.6154, "step": 18990 }, { "epoch": 0.554465563049254, "grad_norm": 0.5074708066141348, "learning_rate": 2.475263584752636e-05, "loss": 0.5323, "step": 18991 }, { "epoch": 0.5544947592771013, "grad_norm": 0.4930202320759254, "learning_rate": 2.475101378751014e-05, "loss": 0.5783, "step": 18992 }, { "epoch": 0.5545239555049487, "grad_norm": 0.524894653447395, "learning_rate": 2.4749391727493918e-05, "loss": 0.6125, "step": 18993 }, { "epoch": 0.5545531517327961, "grad_norm": 0.5234276321638474, "learning_rate": 2.4747769667477696e-05, "loss": 0.5915, "step": 18994 }, { "epoch": 0.5545823479606435, "grad_norm": 0.5231197877140373, "learning_rate": 2.4746147607461478e-05, "loss": 0.6456, "step": 18995 }, { "epoch": 0.5546115441884909, "grad_norm": 0.5211752226910366, "learning_rate": 2.4744525547445256e-05, "loss": 0.5874, "step": 18996 }, { "epoch": 0.5546407404163383, "grad_norm": 0.5152392136857267, "learning_rate": 2.4742903487429038e-05, "loss": 0.5929, "step": 18997 }, { "epoch": 0.5546699366441856, "grad_norm": 0.48890637188657804, "learning_rate": 2.4741281427412816e-05, "loss": 0.5426, "step": 18998 }, { "epoch": 0.554699132872033, "grad_norm": 0.5439271522217806, "learning_rate": 2.4739659367396595e-05, "loss": 0.6271, "step": 18999 }, { "epoch": 0.5547283290998803, "grad_norm": 0.5113289747004505, "learning_rate": 2.4738037307380373e-05, "loss": 0.5967, "step": 19000 }, { "epoch": 0.5547575253277277, "grad_norm": 0.5374482869856336, "learning_rate": 2.4736415247364155e-05, "loss": 0.6369, "step": 19001 }, { "epoch": 0.5547867215555751, "grad_norm": 0.5116550159789939, "learning_rate": 2.4734793187347933e-05, "loss": 0.6216, "step": 19002 }, { "epoch": 0.5548159177834224, "grad_norm": 0.5363401193419113, "learning_rate": 2.473317112733171e-05, "loss": 0.6094, "step": 19003 }, { "epoch": 0.5548451140112698, "grad_norm": 0.5144390357178823, "learning_rate": 2.4731549067315493e-05, "loss": 0.5693, "step": 19004 }, { "epoch": 0.5548743102391172, "grad_norm": 0.5272109501557584, "learning_rate": 2.472992700729927e-05, "loss": 0.6389, "step": 19005 }, { "epoch": 0.5549035064669645, "grad_norm": 0.57246921263912, "learning_rate": 2.472830494728305e-05, "loss": 0.676, "step": 19006 }, { "epoch": 0.5549327026948119, "grad_norm": 0.4860646395939177, "learning_rate": 2.472668288726683e-05, "loss": 0.5505, "step": 19007 }, { "epoch": 0.5549618989226592, "grad_norm": 0.5273814345055782, "learning_rate": 2.472506082725061e-05, "loss": 0.626, "step": 19008 }, { "epoch": 0.5549910951505066, "grad_norm": 0.51708797136893, "learning_rate": 2.472343876723439e-05, "loss": 0.6211, "step": 19009 }, { "epoch": 0.555020291378354, "grad_norm": 0.5547592045110906, "learning_rate": 2.4721816707218167e-05, "loss": 0.6259, "step": 19010 }, { "epoch": 0.5550494876062013, "grad_norm": 0.5264515131489409, "learning_rate": 2.472019464720195e-05, "loss": 0.6134, "step": 19011 }, { "epoch": 0.5550786838340487, "grad_norm": 0.4844619210966197, "learning_rate": 2.4718572587185727e-05, "loss": 0.5517, "step": 19012 }, { "epoch": 0.555107880061896, "grad_norm": 0.5378128572066327, "learning_rate": 2.4716950527169505e-05, "loss": 0.69, "step": 19013 }, { "epoch": 0.5551370762897434, "grad_norm": 0.5166741863244058, "learning_rate": 2.4715328467153287e-05, "loss": 0.6404, "step": 19014 }, { "epoch": 0.5551662725175908, "grad_norm": 0.5441835968717182, "learning_rate": 2.4713706407137065e-05, "loss": 0.6603, "step": 19015 }, { "epoch": 0.5551954687454381, "grad_norm": 0.6060771791375956, "learning_rate": 2.4712084347120844e-05, "loss": 0.7326, "step": 19016 }, { "epoch": 0.5552246649732855, "grad_norm": 0.5244518159741408, "learning_rate": 2.4710462287104626e-05, "loss": 0.6055, "step": 19017 }, { "epoch": 0.5552538612011328, "grad_norm": 0.5724804101106664, "learning_rate": 2.4708840227088404e-05, "loss": 0.6763, "step": 19018 }, { "epoch": 0.5552830574289802, "grad_norm": 0.5348066350508568, "learning_rate": 2.4707218167072182e-05, "loss": 0.6411, "step": 19019 }, { "epoch": 0.5553122536568276, "grad_norm": 0.5194517631023846, "learning_rate": 2.470559610705596e-05, "loss": 0.6318, "step": 19020 }, { "epoch": 0.5553414498846749, "grad_norm": 0.5335245653969309, "learning_rate": 2.470397404703974e-05, "loss": 0.6417, "step": 19021 }, { "epoch": 0.5553706461125223, "grad_norm": 0.5583409974752497, "learning_rate": 2.470235198702352e-05, "loss": 0.6968, "step": 19022 }, { "epoch": 0.5553998423403697, "grad_norm": 0.6111392912690878, "learning_rate": 2.4700729927007303e-05, "loss": 0.6589, "step": 19023 }, { "epoch": 0.555429038568217, "grad_norm": 0.5051841529611322, "learning_rate": 2.469910786699108e-05, "loss": 0.5924, "step": 19024 }, { "epoch": 0.5554582347960644, "grad_norm": 0.48559820989896174, "learning_rate": 2.469748580697486e-05, "loss": 0.5435, "step": 19025 }, { "epoch": 0.5554874310239117, "grad_norm": 0.5091056718095714, "learning_rate": 2.4695863746958638e-05, "loss": 0.5911, "step": 19026 }, { "epoch": 0.5555166272517591, "grad_norm": 0.5803538185793672, "learning_rate": 2.469424168694242e-05, "loss": 0.7238, "step": 19027 }, { "epoch": 0.5555458234796065, "grad_norm": 0.5239161227835354, "learning_rate": 2.4692619626926198e-05, "loss": 0.5884, "step": 19028 }, { "epoch": 0.5555750197074538, "grad_norm": 0.5402348786604454, "learning_rate": 2.4690997566909976e-05, "loss": 0.6291, "step": 19029 }, { "epoch": 0.5556042159353012, "grad_norm": 0.5195413495109172, "learning_rate": 2.4689375506893755e-05, "loss": 0.6358, "step": 19030 }, { "epoch": 0.5556334121631485, "grad_norm": 0.5805879037718351, "learning_rate": 2.4687753446877533e-05, "loss": 0.7191, "step": 19031 }, { "epoch": 0.5556626083909959, "grad_norm": 0.5144049846433811, "learning_rate": 2.4686131386861315e-05, "loss": 0.59, "step": 19032 }, { "epoch": 0.5556918046188433, "grad_norm": 0.5482353785676725, "learning_rate": 2.4684509326845097e-05, "loss": 0.6865, "step": 19033 }, { "epoch": 0.5557210008466906, "grad_norm": 0.522054841080433, "learning_rate": 2.4682887266828875e-05, "loss": 0.6289, "step": 19034 }, { "epoch": 0.555750197074538, "grad_norm": 0.47428782068938535, "learning_rate": 2.4681265206812653e-05, "loss": 0.518, "step": 19035 }, { "epoch": 0.5557793933023853, "grad_norm": 0.5310792329149278, "learning_rate": 2.467964314679643e-05, "loss": 0.6036, "step": 19036 }, { "epoch": 0.5558085895302327, "grad_norm": 0.5009364433748292, "learning_rate": 2.4678021086780213e-05, "loss": 0.5714, "step": 19037 }, { "epoch": 0.5558377857580801, "grad_norm": 0.5607789612009564, "learning_rate": 2.4676399026763992e-05, "loss": 0.6532, "step": 19038 }, { "epoch": 0.5558669819859274, "grad_norm": 0.5652199266310998, "learning_rate": 2.467477696674777e-05, "loss": 0.665, "step": 19039 }, { "epoch": 0.5558961782137748, "grad_norm": 0.524415675051881, "learning_rate": 2.467315490673155e-05, "loss": 0.6234, "step": 19040 }, { "epoch": 0.5559253744416222, "grad_norm": 0.4705512882200724, "learning_rate": 2.4671532846715327e-05, "loss": 0.4908, "step": 19041 }, { "epoch": 0.5559545706694695, "grad_norm": 0.5462632849388976, "learning_rate": 2.466991078669911e-05, "loss": 0.6712, "step": 19042 }, { "epoch": 0.5559837668973169, "grad_norm": 0.49795357156406195, "learning_rate": 2.466828872668289e-05, "loss": 0.5824, "step": 19043 }, { "epoch": 0.5560129631251642, "grad_norm": 0.5486079146950545, "learning_rate": 2.466666666666667e-05, "loss": 0.6269, "step": 19044 }, { "epoch": 0.5560421593530116, "grad_norm": 0.5170892207399419, "learning_rate": 2.4665044606650447e-05, "loss": 0.6055, "step": 19045 }, { "epoch": 0.556071355580859, "grad_norm": 0.526918979379576, "learning_rate": 2.4663422546634226e-05, "loss": 0.5932, "step": 19046 }, { "epoch": 0.5561005518087063, "grad_norm": 0.5390307092054932, "learning_rate": 2.4661800486618007e-05, "loss": 0.6251, "step": 19047 }, { "epoch": 0.5561297480365537, "grad_norm": 0.5265006554054764, "learning_rate": 2.4660178426601786e-05, "loss": 0.6016, "step": 19048 }, { "epoch": 0.556158944264401, "grad_norm": 0.5308958837609815, "learning_rate": 2.4658556366585564e-05, "loss": 0.6038, "step": 19049 }, { "epoch": 0.5561881404922484, "grad_norm": 0.50956253680952, "learning_rate": 2.4656934306569342e-05, "loss": 0.6086, "step": 19050 }, { "epoch": 0.5562173367200958, "grad_norm": 0.5038486307205172, "learning_rate": 2.4655312246553124e-05, "loss": 0.58, "step": 19051 }, { "epoch": 0.5562465329479431, "grad_norm": 0.5327639731543216, "learning_rate": 2.4653690186536903e-05, "loss": 0.6324, "step": 19052 }, { "epoch": 0.5562757291757905, "grad_norm": 0.5218248861649392, "learning_rate": 2.4652068126520684e-05, "loss": 0.6116, "step": 19053 }, { "epoch": 0.5563049254036379, "grad_norm": 0.5900366618508944, "learning_rate": 2.4650446066504463e-05, "loss": 0.6986, "step": 19054 }, { "epoch": 0.5563341216314852, "grad_norm": 0.5299039808980329, "learning_rate": 2.464882400648824e-05, "loss": 0.6484, "step": 19055 }, { "epoch": 0.5563633178593326, "grad_norm": 0.5503606243226123, "learning_rate": 2.464720194647202e-05, "loss": 0.6649, "step": 19056 }, { "epoch": 0.5563925140871799, "grad_norm": 0.6021031294348628, "learning_rate": 2.46455798864558e-05, "loss": 0.7455, "step": 19057 }, { "epoch": 0.5564217103150273, "grad_norm": 0.5143519517443641, "learning_rate": 2.464395782643958e-05, "loss": 0.5919, "step": 19058 }, { "epoch": 0.5564509065428747, "grad_norm": 0.49536246271161216, "learning_rate": 2.4642335766423358e-05, "loss": 0.5508, "step": 19059 }, { "epoch": 0.556480102770722, "grad_norm": 0.5273600250122777, "learning_rate": 2.4640713706407136e-05, "loss": 0.6026, "step": 19060 }, { "epoch": 0.5565092989985694, "grad_norm": 0.5098821431424082, "learning_rate": 2.4639091646390918e-05, "loss": 0.5605, "step": 19061 }, { "epoch": 0.5565384952264167, "grad_norm": 0.5257299094556926, "learning_rate": 2.4637469586374696e-05, "loss": 0.631, "step": 19062 }, { "epoch": 0.5565676914542641, "grad_norm": 0.5205183581800349, "learning_rate": 2.4635847526358478e-05, "loss": 0.5876, "step": 19063 }, { "epoch": 0.5565968876821115, "grad_norm": 0.5167890952684984, "learning_rate": 2.4634225466342257e-05, "loss": 0.6274, "step": 19064 }, { "epoch": 0.5566260839099588, "grad_norm": 0.552265641848136, "learning_rate": 2.4632603406326035e-05, "loss": 0.6389, "step": 19065 }, { "epoch": 0.5566552801378062, "grad_norm": 0.4996049354099209, "learning_rate": 2.4630981346309813e-05, "loss": 0.5309, "step": 19066 }, { "epoch": 0.5566844763656535, "grad_norm": 0.5421396155857461, "learning_rate": 2.4629359286293595e-05, "loss": 0.652, "step": 19067 }, { "epoch": 0.5567136725935009, "grad_norm": 0.5033720245562883, "learning_rate": 2.4627737226277373e-05, "loss": 0.5829, "step": 19068 }, { "epoch": 0.5567428688213483, "grad_norm": 0.5189603105326225, "learning_rate": 2.4626115166261152e-05, "loss": 0.5825, "step": 19069 }, { "epoch": 0.5567720650491956, "grad_norm": 0.48566048072083456, "learning_rate": 2.4624493106244934e-05, "loss": 0.5243, "step": 19070 }, { "epoch": 0.556801261277043, "grad_norm": 0.5402048211245629, "learning_rate": 2.4622871046228712e-05, "loss": 0.6449, "step": 19071 }, { "epoch": 0.5568304575048904, "grad_norm": 0.5297849870721364, "learning_rate": 2.462124898621249e-05, "loss": 0.6502, "step": 19072 }, { "epoch": 0.5568596537327377, "grad_norm": 0.5117842843566681, "learning_rate": 2.4619626926196272e-05, "loss": 0.6341, "step": 19073 }, { "epoch": 0.5568888499605851, "grad_norm": 0.5238393343507118, "learning_rate": 2.461800486618005e-05, "loss": 0.5755, "step": 19074 }, { "epoch": 0.5569180461884324, "grad_norm": 0.523076718717282, "learning_rate": 2.461638280616383e-05, "loss": 0.6409, "step": 19075 }, { "epoch": 0.5569472424162798, "grad_norm": 0.5453767369192244, "learning_rate": 2.4614760746147607e-05, "loss": 0.6851, "step": 19076 }, { "epoch": 0.5569764386441272, "grad_norm": 0.5244953703803151, "learning_rate": 2.4613138686131386e-05, "loss": 0.5879, "step": 19077 }, { "epoch": 0.5570056348719745, "grad_norm": 0.5494496001683556, "learning_rate": 2.4611516626115167e-05, "loss": 0.6868, "step": 19078 }, { "epoch": 0.5570348310998219, "grad_norm": 0.5589894759224319, "learning_rate": 2.4609894566098946e-05, "loss": 0.643, "step": 19079 }, { "epoch": 0.5570640273276692, "grad_norm": 0.5433268489356743, "learning_rate": 2.4608272506082728e-05, "loss": 0.6387, "step": 19080 }, { "epoch": 0.5570932235555166, "grad_norm": 0.533432657001406, "learning_rate": 2.4606650446066506e-05, "loss": 0.602, "step": 19081 }, { "epoch": 0.557122419783364, "grad_norm": 0.5182432794323772, "learning_rate": 2.4605028386050284e-05, "loss": 0.5556, "step": 19082 }, { "epoch": 0.5571516160112113, "grad_norm": 0.5256780277239932, "learning_rate": 2.4603406326034066e-05, "loss": 0.5921, "step": 19083 }, { "epoch": 0.5571808122390587, "grad_norm": 0.5018898363887712, "learning_rate": 2.4601784266017844e-05, "loss": 0.5893, "step": 19084 }, { "epoch": 0.557210008466906, "grad_norm": 0.5005142935722809, "learning_rate": 2.4600162206001623e-05, "loss": 0.5673, "step": 19085 }, { "epoch": 0.5572392046947534, "grad_norm": 0.5400828379750616, "learning_rate": 2.45985401459854e-05, "loss": 0.6466, "step": 19086 }, { "epoch": 0.5572684009226008, "grad_norm": 0.5132125972267839, "learning_rate": 2.459691808596918e-05, "loss": 0.615, "step": 19087 }, { "epoch": 0.5572975971504481, "grad_norm": 0.5326695736863399, "learning_rate": 2.459529602595296e-05, "loss": 0.6499, "step": 19088 }, { "epoch": 0.5573267933782955, "grad_norm": 0.5179288482322718, "learning_rate": 2.4593673965936743e-05, "loss": 0.6259, "step": 19089 }, { "epoch": 0.5573559896061429, "grad_norm": 0.5310516053564892, "learning_rate": 2.459205190592052e-05, "loss": 0.5933, "step": 19090 }, { "epoch": 0.5573851858339902, "grad_norm": 0.5514208199122224, "learning_rate": 2.45904298459043e-05, "loss": 0.6649, "step": 19091 }, { "epoch": 0.5574143820618376, "grad_norm": 0.4914831150422286, "learning_rate": 2.4588807785888078e-05, "loss": 0.5292, "step": 19092 }, { "epoch": 0.5574435782896849, "grad_norm": 0.5029232793873384, "learning_rate": 2.458718572587186e-05, "loss": 0.5802, "step": 19093 }, { "epoch": 0.5574727745175323, "grad_norm": 0.5442635340362496, "learning_rate": 2.458556366585564e-05, "loss": 0.6128, "step": 19094 }, { "epoch": 0.5575019707453797, "grad_norm": 0.4933822492486097, "learning_rate": 2.4583941605839417e-05, "loss": 0.5427, "step": 19095 }, { "epoch": 0.557531166973227, "grad_norm": 0.5326173224257026, "learning_rate": 2.4582319545823195e-05, "loss": 0.6385, "step": 19096 }, { "epoch": 0.5575603632010744, "grad_norm": 0.5290971154421512, "learning_rate": 2.4580697485806973e-05, "loss": 0.6221, "step": 19097 }, { "epoch": 0.5575895594289217, "grad_norm": 0.5379227066731725, "learning_rate": 2.4579075425790755e-05, "loss": 0.6187, "step": 19098 }, { "epoch": 0.5576187556567691, "grad_norm": 0.5310119742608899, "learning_rate": 2.4577453365774537e-05, "loss": 0.6401, "step": 19099 }, { "epoch": 0.5576479518846165, "grad_norm": 0.5139461842308068, "learning_rate": 2.4575831305758315e-05, "loss": 0.6217, "step": 19100 }, { "epoch": 0.5576771481124638, "grad_norm": 0.5491471176298114, "learning_rate": 2.4574209245742094e-05, "loss": 0.6537, "step": 19101 }, { "epoch": 0.5577063443403112, "grad_norm": 0.46299235433990804, "learning_rate": 2.4572587185725872e-05, "loss": 0.4851, "step": 19102 }, { "epoch": 0.5577355405681585, "grad_norm": 0.5270793824894432, "learning_rate": 2.4570965125709654e-05, "loss": 0.5931, "step": 19103 }, { "epoch": 0.5577647367960059, "grad_norm": 0.5000376018086802, "learning_rate": 2.4569343065693432e-05, "loss": 0.6067, "step": 19104 }, { "epoch": 0.5577939330238533, "grad_norm": 0.5288708929828706, "learning_rate": 2.456772100567721e-05, "loss": 0.6169, "step": 19105 }, { "epoch": 0.5578231292517006, "grad_norm": 0.5325433034920253, "learning_rate": 2.456609894566099e-05, "loss": 0.6681, "step": 19106 }, { "epoch": 0.557852325479548, "grad_norm": 0.4730671622221032, "learning_rate": 2.4564476885644767e-05, "loss": 0.5221, "step": 19107 }, { "epoch": 0.5578815217073954, "grad_norm": 0.5050096470880465, "learning_rate": 2.456285482562855e-05, "loss": 0.5776, "step": 19108 }, { "epoch": 0.5579107179352427, "grad_norm": 0.5243346093112797, "learning_rate": 2.456123276561233e-05, "loss": 0.632, "step": 19109 }, { "epoch": 0.5579399141630901, "grad_norm": 0.5038850641272575, "learning_rate": 2.455961070559611e-05, "loss": 0.5678, "step": 19110 }, { "epoch": 0.5579691103909374, "grad_norm": 0.5021923094988956, "learning_rate": 2.4557988645579888e-05, "loss": 0.5944, "step": 19111 }, { "epoch": 0.5579983066187848, "grad_norm": 0.5388917785919944, "learning_rate": 2.4556366585563666e-05, "loss": 0.5931, "step": 19112 }, { "epoch": 0.5580275028466322, "grad_norm": 0.4792335935902413, "learning_rate": 2.4554744525547448e-05, "loss": 0.5128, "step": 19113 }, { "epoch": 0.5580566990744795, "grad_norm": 0.5156566258546982, "learning_rate": 2.4553122465531226e-05, "loss": 0.6049, "step": 19114 }, { "epoch": 0.5580858953023269, "grad_norm": 0.48993461958807816, "learning_rate": 2.4551500405515004e-05, "loss": 0.6026, "step": 19115 }, { "epoch": 0.5581150915301744, "grad_norm": 0.5090481502079199, "learning_rate": 2.4549878345498783e-05, "loss": 0.5688, "step": 19116 }, { "epoch": 0.5581442877580217, "grad_norm": 0.531429043548131, "learning_rate": 2.4548256285482565e-05, "loss": 0.6666, "step": 19117 }, { "epoch": 0.5581734839858691, "grad_norm": 0.5184143782278433, "learning_rate": 2.4546634225466343e-05, "loss": 0.5787, "step": 19118 }, { "epoch": 0.5582026802137164, "grad_norm": 0.5550114254078403, "learning_rate": 2.4545012165450125e-05, "loss": 0.6558, "step": 19119 }, { "epoch": 0.5582318764415638, "grad_norm": 0.5464263856328165, "learning_rate": 2.4543390105433903e-05, "loss": 0.6502, "step": 19120 }, { "epoch": 0.5582610726694112, "grad_norm": 0.5470388315244823, "learning_rate": 2.454176804541768e-05, "loss": 0.6401, "step": 19121 }, { "epoch": 0.5582902688972585, "grad_norm": 0.5228862036061961, "learning_rate": 2.454014598540146e-05, "loss": 0.59, "step": 19122 }, { "epoch": 0.5583194651251059, "grad_norm": 0.5211559323794807, "learning_rate": 2.453852392538524e-05, "loss": 0.6091, "step": 19123 }, { "epoch": 0.5583486613529532, "grad_norm": 0.5194582296881467, "learning_rate": 2.453690186536902e-05, "loss": 0.6053, "step": 19124 }, { "epoch": 0.5583778575808006, "grad_norm": 0.6028093271588286, "learning_rate": 2.45352798053528e-05, "loss": 0.732, "step": 19125 }, { "epoch": 0.558407053808648, "grad_norm": 0.5215620064163478, "learning_rate": 2.4533657745336577e-05, "loss": 0.5905, "step": 19126 }, { "epoch": 0.5584362500364953, "grad_norm": 0.5279453795481998, "learning_rate": 2.453203568532036e-05, "loss": 0.6297, "step": 19127 }, { "epoch": 0.5584654462643427, "grad_norm": 0.5019189808021213, "learning_rate": 2.4530413625304137e-05, "loss": 0.5704, "step": 19128 }, { "epoch": 0.55849464249219, "grad_norm": 0.5408529159145247, "learning_rate": 2.452879156528792e-05, "loss": 0.6368, "step": 19129 }, { "epoch": 0.5585238387200374, "grad_norm": 0.47974496385768084, "learning_rate": 2.4527169505271697e-05, "loss": 0.5278, "step": 19130 }, { "epoch": 0.5585530349478848, "grad_norm": 0.5157780269800676, "learning_rate": 2.4525547445255475e-05, "loss": 0.6128, "step": 19131 }, { "epoch": 0.5585822311757321, "grad_norm": 0.5614142011427109, "learning_rate": 2.4523925385239254e-05, "loss": 0.7057, "step": 19132 }, { "epoch": 0.5586114274035795, "grad_norm": 0.5295535664954907, "learning_rate": 2.4522303325223036e-05, "loss": 0.5729, "step": 19133 }, { "epoch": 0.5586406236314269, "grad_norm": 0.4927324139316931, "learning_rate": 2.4520681265206814e-05, "loss": 0.534, "step": 19134 }, { "epoch": 0.5586698198592742, "grad_norm": 0.5065468276324803, "learning_rate": 2.4519059205190592e-05, "loss": 0.6139, "step": 19135 }, { "epoch": 0.5586990160871216, "grad_norm": 0.5102533746417469, "learning_rate": 2.4517437145174374e-05, "loss": 0.5736, "step": 19136 }, { "epoch": 0.5587282123149689, "grad_norm": 0.5250225896481782, "learning_rate": 2.4515815085158152e-05, "loss": 0.6203, "step": 19137 }, { "epoch": 0.5587574085428163, "grad_norm": 0.5152051963406482, "learning_rate": 2.451419302514193e-05, "loss": 0.6102, "step": 19138 }, { "epoch": 0.5587866047706637, "grad_norm": 0.5074111965021958, "learning_rate": 2.4512570965125713e-05, "loss": 0.5773, "step": 19139 }, { "epoch": 0.558815800998511, "grad_norm": 0.5305421706702017, "learning_rate": 2.451094890510949e-05, "loss": 0.6259, "step": 19140 }, { "epoch": 0.5588449972263584, "grad_norm": 0.5368983505172321, "learning_rate": 2.450932684509327e-05, "loss": 0.5935, "step": 19141 }, { "epoch": 0.5588741934542057, "grad_norm": 0.549759930873216, "learning_rate": 2.4507704785077048e-05, "loss": 0.6576, "step": 19142 }, { "epoch": 0.5589033896820531, "grad_norm": 0.4976997279223551, "learning_rate": 2.4506082725060826e-05, "loss": 0.556, "step": 19143 }, { "epoch": 0.5589325859099005, "grad_norm": 0.6061395860212553, "learning_rate": 2.4504460665044608e-05, "loss": 0.5406, "step": 19144 }, { "epoch": 0.5589617821377478, "grad_norm": 0.5465572483338083, "learning_rate": 2.4502838605028386e-05, "loss": 0.6404, "step": 19145 }, { "epoch": 0.5589909783655952, "grad_norm": 0.5412162491638698, "learning_rate": 2.4501216545012168e-05, "loss": 0.6404, "step": 19146 }, { "epoch": 0.5590201745934426, "grad_norm": 0.4981949203941144, "learning_rate": 2.4499594484995946e-05, "loss": 0.5734, "step": 19147 }, { "epoch": 0.5590493708212899, "grad_norm": 0.5884748096289723, "learning_rate": 2.4497972424979725e-05, "loss": 0.6179, "step": 19148 }, { "epoch": 0.5590785670491373, "grad_norm": 0.5718640819141062, "learning_rate": 2.4496350364963506e-05, "loss": 0.6743, "step": 19149 }, { "epoch": 0.5591077632769846, "grad_norm": 0.5406785854779953, "learning_rate": 2.4494728304947285e-05, "loss": 0.6439, "step": 19150 }, { "epoch": 0.559136959504832, "grad_norm": 0.5067004585904913, "learning_rate": 2.4493106244931063e-05, "loss": 0.6025, "step": 19151 }, { "epoch": 0.5591661557326794, "grad_norm": 0.5450116153236285, "learning_rate": 2.449148418491484e-05, "loss": 0.6503, "step": 19152 }, { "epoch": 0.5591953519605267, "grad_norm": 0.5235466322342517, "learning_rate": 2.448986212489862e-05, "loss": 0.6053, "step": 19153 }, { "epoch": 0.5592245481883741, "grad_norm": 0.583099000031038, "learning_rate": 2.4488240064882402e-05, "loss": 0.6697, "step": 19154 }, { "epoch": 0.5592537444162214, "grad_norm": 0.555260894414383, "learning_rate": 2.4486618004866183e-05, "loss": 0.6752, "step": 19155 }, { "epoch": 0.5592829406440688, "grad_norm": 0.5319746972826039, "learning_rate": 2.4484995944849962e-05, "loss": 0.6082, "step": 19156 }, { "epoch": 0.5593121368719162, "grad_norm": 0.5493538348045814, "learning_rate": 2.448337388483374e-05, "loss": 0.6568, "step": 19157 }, { "epoch": 0.5593413330997635, "grad_norm": 0.5010215655151811, "learning_rate": 2.448175182481752e-05, "loss": 0.601, "step": 19158 }, { "epoch": 0.5593705293276109, "grad_norm": 0.48056299824084814, "learning_rate": 2.44801297648013e-05, "loss": 0.546, "step": 19159 }, { "epoch": 0.5593997255554582, "grad_norm": 0.5245643700359776, "learning_rate": 2.447850770478508e-05, "loss": 0.6188, "step": 19160 }, { "epoch": 0.5594289217833056, "grad_norm": 0.5398759354938415, "learning_rate": 2.4476885644768857e-05, "loss": 0.5971, "step": 19161 }, { "epoch": 0.559458118011153, "grad_norm": 0.525569004851436, "learning_rate": 2.4475263584752636e-05, "loss": 0.6201, "step": 19162 }, { "epoch": 0.5594873142390003, "grad_norm": 0.5582359204077385, "learning_rate": 2.4473641524736414e-05, "loss": 0.6032, "step": 19163 }, { "epoch": 0.5595165104668477, "grad_norm": 0.5401923301462002, "learning_rate": 2.4472019464720196e-05, "loss": 0.658, "step": 19164 }, { "epoch": 0.559545706694695, "grad_norm": 0.5028211597620043, "learning_rate": 2.4470397404703977e-05, "loss": 0.5984, "step": 19165 }, { "epoch": 0.5595749029225424, "grad_norm": 0.542418238860525, "learning_rate": 2.4468775344687756e-05, "loss": 0.6937, "step": 19166 }, { "epoch": 0.5596040991503898, "grad_norm": 0.5030217656813977, "learning_rate": 2.4467153284671534e-05, "loss": 0.5937, "step": 19167 }, { "epoch": 0.5596332953782371, "grad_norm": 0.5338344525509924, "learning_rate": 2.4465531224655313e-05, "loss": 0.6571, "step": 19168 }, { "epoch": 0.5596624916060845, "grad_norm": 0.5062835066568413, "learning_rate": 2.4463909164639094e-05, "loss": 0.5909, "step": 19169 }, { "epoch": 0.5596916878339319, "grad_norm": 0.5292088596270945, "learning_rate": 2.4462287104622873e-05, "loss": 0.6536, "step": 19170 }, { "epoch": 0.5597208840617792, "grad_norm": 0.49971034118141716, "learning_rate": 2.446066504460665e-05, "loss": 0.5778, "step": 19171 }, { "epoch": 0.5597500802896266, "grad_norm": 0.5099606985327342, "learning_rate": 2.445904298459043e-05, "loss": 0.5497, "step": 19172 }, { "epoch": 0.559779276517474, "grad_norm": 0.508976894057931, "learning_rate": 2.4457420924574208e-05, "loss": 0.6136, "step": 19173 }, { "epoch": 0.5598084727453213, "grad_norm": 0.5039149797106304, "learning_rate": 2.445579886455799e-05, "loss": 0.5608, "step": 19174 }, { "epoch": 0.5598376689731687, "grad_norm": 0.5105451262250786, "learning_rate": 2.445417680454177e-05, "loss": 0.5948, "step": 19175 }, { "epoch": 0.559866865201016, "grad_norm": 0.591470836763401, "learning_rate": 2.445255474452555e-05, "loss": 0.6348, "step": 19176 }, { "epoch": 0.5598960614288634, "grad_norm": 0.5503759404336237, "learning_rate": 2.4450932684509328e-05, "loss": 0.6887, "step": 19177 }, { "epoch": 0.5599252576567108, "grad_norm": 0.5415984491077346, "learning_rate": 2.4449310624493106e-05, "loss": 0.6819, "step": 19178 }, { "epoch": 0.5599544538845581, "grad_norm": 0.49657585634082424, "learning_rate": 2.4447688564476888e-05, "loss": 0.5281, "step": 19179 }, { "epoch": 0.5599836501124055, "grad_norm": 0.7125384448786666, "learning_rate": 2.4446066504460667e-05, "loss": 0.6616, "step": 19180 }, { "epoch": 0.5600128463402528, "grad_norm": 0.5454582977857729, "learning_rate": 2.4444444444444445e-05, "loss": 0.646, "step": 19181 }, { "epoch": 0.5600420425681002, "grad_norm": 0.5143885153771136, "learning_rate": 2.4442822384428223e-05, "loss": 0.5745, "step": 19182 }, { "epoch": 0.5600712387959476, "grad_norm": 0.501171279960747, "learning_rate": 2.4441200324412005e-05, "loss": 0.549, "step": 19183 }, { "epoch": 0.5601004350237949, "grad_norm": 0.5104957918875304, "learning_rate": 2.4439578264395783e-05, "loss": 0.6051, "step": 19184 }, { "epoch": 0.5601296312516423, "grad_norm": 0.521235224300317, "learning_rate": 2.4437956204379565e-05, "loss": 0.6088, "step": 19185 }, { "epoch": 0.5601588274794896, "grad_norm": 0.5206896008128024, "learning_rate": 2.4436334144363344e-05, "loss": 0.5945, "step": 19186 }, { "epoch": 0.560188023707337, "grad_norm": 0.5179927214487893, "learning_rate": 2.4434712084347122e-05, "loss": 0.5957, "step": 19187 }, { "epoch": 0.5602172199351844, "grad_norm": 0.49074468687644746, "learning_rate": 2.44330900243309e-05, "loss": 0.5759, "step": 19188 }, { "epoch": 0.5602464161630317, "grad_norm": 0.5645725177411544, "learning_rate": 2.4431467964314682e-05, "loss": 0.7001, "step": 19189 }, { "epoch": 0.5602756123908791, "grad_norm": 0.5781262486172077, "learning_rate": 2.442984590429846e-05, "loss": 0.596, "step": 19190 }, { "epoch": 0.5603048086187264, "grad_norm": 0.5181004995637312, "learning_rate": 2.442822384428224e-05, "loss": 0.6344, "step": 19191 }, { "epoch": 0.5603340048465738, "grad_norm": 0.5005952557115664, "learning_rate": 2.4426601784266017e-05, "loss": 0.5737, "step": 19192 }, { "epoch": 0.5603632010744212, "grad_norm": 0.5520752034512382, "learning_rate": 2.44249797242498e-05, "loss": 0.671, "step": 19193 }, { "epoch": 0.5603923973022685, "grad_norm": 0.5085703300573854, "learning_rate": 2.4423357664233577e-05, "loss": 0.5723, "step": 19194 }, { "epoch": 0.5604215935301159, "grad_norm": 0.5217098947295077, "learning_rate": 2.442173560421736e-05, "loss": 0.6543, "step": 19195 }, { "epoch": 0.5604507897579633, "grad_norm": 0.5033305859585957, "learning_rate": 2.4420113544201137e-05, "loss": 0.5585, "step": 19196 }, { "epoch": 0.5604799859858106, "grad_norm": 0.5224904763762047, "learning_rate": 2.4418491484184916e-05, "loss": 0.6464, "step": 19197 }, { "epoch": 0.560509182213658, "grad_norm": 0.5152970443541279, "learning_rate": 2.4416869424168694e-05, "loss": 0.5937, "step": 19198 }, { "epoch": 0.5605383784415053, "grad_norm": 0.5706399819159771, "learning_rate": 2.4415247364152473e-05, "loss": 0.6396, "step": 19199 }, { "epoch": 0.5605675746693527, "grad_norm": 0.5074248953896381, "learning_rate": 2.4413625304136254e-05, "loss": 0.6209, "step": 19200 }, { "epoch": 0.5605967708972001, "grad_norm": 0.5626564838697184, "learning_rate": 2.4412003244120033e-05, "loss": 0.6677, "step": 19201 }, { "epoch": 0.5606259671250474, "grad_norm": 0.5009950229513875, "learning_rate": 2.4410381184103815e-05, "loss": 0.5563, "step": 19202 }, { "epoch": 0.5606551633528948, "grad_norm": 0.5181171538760406, "learning_rate": 2.4408759124087593e-05, "loss": 0.5766, "step": 19203 }, { "epoch": 0.5606843595807421, "grad_norm": 0.5131334648048828, "learning_rate": 2.440713706407137e-05, "loss": 0.615, "step": 19204 }, { "epoch": 0.5607135558085895, "grad_norm": 0.5170625337566732, "learning_rate": 2.4405515004055153e-05, "loss": 0.6213, "step": 19205 }, { "epoch": 0.5607427520364369, "grad_norm": 0.5992054950992243, "learning_rate": 2.440389294403893e-05, "loss": 0.727, "step": 19206 }, { "epoch": 0.5607719482642842, "grad_norm": 0.5493724922860448, "learning_rate": 2.440227088402271e-05, "loss": 0.6723, "step": 19207 }, { "epoch": 0.5608011444921316, "grad_norm": 0.5688717013116985, "learning_rate": 2.4400648824006488e-05, "loss": 0.6507, "step": 19208 }, { "epoch": 0.560830340719979, "grad_norm": 0.4466306085927293, "learning_rate": 2.4399026763990267e-05, "loss": 0.4869, "step": 19209 }, { "epoch": 0.5608595369478263, "grad_norm": 0.5302358001638048, "learning_rate": 2.4397404703974048e-05, "loss": 0.6265, "step": 19210 }, { "epoch": 0.5608887331756737, "grad_norm": 0.5295963207561456, "learning_rate": 2.4395782643957827e-05, "loss": 0.6255, "step": 19211 }, { "epoch": 0.560917929403521, "grad_norm": 0.5304363635375241, "learning_rate": 2.439416058394161e-05, "loss": 0.6177, "step": 19212 }, { "epoch": 0.5609471256313684, "grad_norm": 0.5697859346666276, "learning_rate": 2.4392538523925387e-05, "loss": 0.7094, "step": 19213 }, { "epoch": 0.5609763218592158, "grad_norm": 0.5211675716983973, "learning_rate": 2.4390916463909165e-05, "loss": 0.5963, "step": 19214 }, { "epoch": 0.5610055180870631, "grad_norm": 0.5453603415741131, "learning_rate": 2.4389294403892947e-05, "loss": 0.641, "step": 19215 }, { "epoch": 0.5610347143149105, "grad_norm": 0.5182759758797182, "learning_rate": 2.4387672343876725e-05, "loss": 0.5735, "step": 19216 }, { "epoch": 0.5610639105427578, "grad_norm": 0.5308815979515149, "learning_rate": 2.4386050283860504e-05, "loss": 0.6299, "step": 19217 }, { "epoch": 0.5610931067706052, "grad_norm": 0.49634995023852546, "learning_rate": 2.4384428223844282e-05, "loss": 0.5336, "step": 19218 }, { "epoch": 0.5611223029984526, "grad_norm": 0.5313296893098796, "learning_rate": 2.438280616382806e-05, "loss": 0.6018, "step": 19219 }, { "epoch": 0.5611514992262999, "grad_norm": 0.5492253161661454, "learning_rate": 2.4381184103811842e-05, "loss": 0.6482, "step": 19220 }, { "epoch": 0.5611806954541473, "grad_norm": 0.5337549507801435, "learning_rate": 2.4379562043795624e-05, "loss": 0.6094, "step": 19221 }, { "epoch": 0.5612098916819946, "grad_norm": 0.519411504801634, "learning_rate": 2.4377939983779402e-05, "loss": 0.6335, "step": 19222 }, { "epoch": 0.561239087909842, "grad_norm": 0.48216269888040564, "learning_rate": 2.437631792376318e-05, "loss": 0.5149, "step": 19223 }, { "epoch": 0.5612682841376894, "grad_norm": 0.5363203465309694, "learning_rate": 2.437469586374696e-05, "loss": 0.593, "step": 19224 }, { "epoch": 0.5612974803655367, "grad_norm": 0.5312708157014979, "learning_rate": 2.437307380373074e-05, "loss": 0.6666, "step": 19225 }, { "epoch": 0.5613266765933841, "grad_norm": 0.5153694904651697, "learning_rate": 2.437145174371452e-05, "loss": 0.5635, "step": 19226 }, { "epoch": 0.5613558728212314, "grad_norm": 0.526455115745957, "learning_rate": 2.4369829683698298e-05, "loss": 0.6019, "step": 19227 }, { "epoch": 0.5613850690490788, "grad_norm": 0.4918454570512286, "learning_rate": 2.4368207623682076e-05, "loss": 0.5414, "step": 19228 }, { "epoch": 0.5614142652769262, "grad_norm": 0.5388422257998293, "learning_rate": 2.4366585563665854e-05, "loss": 0.6307, "step": 19229 }, { "epoch": 0.5614434615047735, "grad_norm": 0.5453987524527095, "learning_rate": 2.4364963503649636e-05, "loss": 0.665, "step": 19230 }, { "epoch": 0.5614726577326209, "grad_norm": 0.5084466662562039, "learning_rate": 2.4363341443633418e-05, "loss": 0.5645, "step": 19231 }, { "epoch": 0.5615018539604683, "grad_norm": 0.5361572791907172, "learning_rate": 2.4361719383617196e-05, "loss": 0.6589, "step": 19232 }, { "epoch": 0.5615310501883156, "grad_norm": 0.49758914648025926, "learning_rate": 2.4360097323600975e-05, "loss": 0.5866, "step": 19233 }, { "epoch": 0.561560246416163, "grad_norm": 0.5685532751378654, "learning_rate": 2.4358475263584753e-05, "loss": 0.7527, "step": 19234 }, { "epoch": 0.5615894426440103, "grad_norm": 0.5924830211918625, "learning_rate": 2.4356853203568535e-05, "loss": 0.6994, "step": 19235 }, { "epoch": 0.5616186388718577, "grad_norm": 0.6144013996282884, "learning_rate": 2.4355231143552313e-05, "loss": 0.718, "step": 19236 }, { "epoch": 0.5616478350997052, "grad_norm": 0.5298058124691842, "learning_rate": 2.435360908353609e-05, "loss": 0.6625, "step": 19237 }, { "epoch": 0.5616770313275525, "grad_norm": 0.5164895057668747, "learning_rate": 2.435198702351987e-05, "loss": 0.613, "step": 19238 }, { "epoch": 0.5617062275553999, "grad_norm": 0.5094305296171867, "learning_rate": 2.4350364963503648e-05, "loss": 0.5617, "step": 19239 }, { "epoch": 0.5617354237832473, "grad_norm": 0.5164702550662924, "learning_rate": 2.434874290348743e-05, "loss": 0.5819, "step": 19240 }, { "epoch": 0.5617646200110946, "grad_norm": 0.5119014692649346, "learning_rate": 2.4347120843471212e-05, "loss": 0.5743, "step": 19241 }, { "epoch": 0.561793816238942, "grad_norm": 0.5160956370666238, "learning_rate": 2.434549878345499e-05, "loss": 0.5908, "step": 19242 }, { "epoch": 0.5618230124667893, "grad_norm": 0.5127034825700383, "learning_rate": 2.434387672343877e-05, "loss": 0.6411, "step": 19243 }, { "epoch": 0.5618522086946367, "grad_norm": 0.49808708265428886, "learning_rate": 2.4342254663422547e-05, "loss": 0.5772, "step": 19244 }, { "epoch": 0.5618814049224841, "grad_norm": 0.5982184697137289, "learning_rate": 2.434063260340633e-05, "loss": 0.6839, "step": 19245 }, { "epoch": 0.5619106011503314, "grad_norm": 0.4657716240739443, "learning_rate": 2.4339010543390107e-05, "loss": 0.5463, "step": 19246 }, { "epoch": 0.5619397973781788, "grad_norm": 0.49757663295655447, "learning_rate": 2.4337388483373885e-05, "loss": 0.5629, "step": 19247 }, { "epoch": 0.5619689936060261, "grad_norm": 0.5164356399095749, "learning_rate": 2.4335766423357664e-05, "loss": 0.5757, "step": 19248 }, { "epoch": 0.5619981898338735, "grad_norm": 0.546299921027006, "learning_rate": 2.4334144363341442e-05, "loss": 0.5898, "step": 19249 }, { "epoch": 0.5620273860617209, "grad_norm": 0.49765137153026273, "learning_rate": 2.4332522303325224e-05, "loss": 0.5629, "step": 19250 }, { "epoch": 0.5620565822895682, "grad_norm": 0.5588134984133039, "learning_rate": 2.4330900243309006e-05, "loss": 0.6637, "step": 19251 }, { "epoch": 0.5620857785174156, "grad_norm": 0.5828398844355895, "learning_rate": 2.4329278183292784e-05, "loss": 0.6852, "step": 19252 }, { "epoch": 0.562114974745263, "grad_norm": 0.5044138710165271, "learning_rate": 2.4327656123276562e-05, "loss": 0.5485, "step": 19253 }, { "epoch": 0.5621441709731103, "grad_norm": 0.47218130439262973, "learning_rate": 2.432603406326034e-05, "loss": 0.5096, "step": 19254 }, { "epoch": 0.5621733672009577, "grad_norm": 0.5038659592830758, "learning_rate": 2.432441200324412e-05, "loss": 0.5698, "step": 19255 }, { "epoch": 0.562202563428805, "grad_norm": 0.5712563770993847, "learning_rate": 2.43227899432279e-05, "loss": 0.5877, "step": 19256 }, { "epoch": 0.5622317596566524, "grad_norm": 0.5131457590077333, "learning_rate": 2.432116788321168e-05, "loss": 0.6027, "step": 19257 }, { "epoch": 0.5622609558844998, "grad_norm": 0.49266453853078546, "learning_rate": 2.4319545823195458e-05, "loss": 0.5517, "step": 19258 }, { "epoch": 0.5622901521123471, "grad_norm": 0.5090353205919248, "learning_rate": 2.431792376317924e-05, "loss": 0.5916, "step": 19259 }, { "epoch": 0.5623193483401945, "grad_norm": 0.5552626459761884, "learning_rate": 2.4316301703163018e-05, "loss": 0.6356, "step": 19260 }, { "epoch": 0.5623485445680418, "grad_norm": 0.5169542167139436, "learning_rate": 2.43146796431468e-05, "loss": 0.6341, "step": 19261 }, { "epoch": 0.5623777407958892, "grad_norm": 0.5113314130388042, "learning_rate": 2.4313057583130578e-05, "loss": 0.6025, "step": 19262 }, { "epoch": 0.5624069370237366, "grad_norm": 0.5031901171719314, "learning_rate": 2.4311435523114356e-05, "loss": 0.5928, "step": 19263 }, { "epoch": 0.5624361332515839, "grad_norm": 0.520104391263395, "learning_rate": 2.4309813463098135e-05, "loss": 0.5926, "step": 19264 }, { "epoch": 0.5624653294794313, "grad_norm": 0.5125086641749137, "learning_rate": 2.4308191403081913e-05, "loss": 0.6071, "step": 19265 }, { "epoch": 0.5624945257072786, "grad_norm": 0.5180380004777791, "learning_rate": 2.4306569343065695e-05, "loss": 0.5619, "step": 19266 }, { "epoch": 0.562523721935126, "grad_norm": 0.5213856293821806, "learning_rate": 2.4304947283049473e-05, "loss": 0.5798, "step": 19267 }, { "epoch": 0.5625529181629734, "grad_norm": 0.5299023968962282, "learning_rate": 2.4303325223033255e-05, "loss": 0.6188, "step": 19268 }, { "epoch": 0.5625821143908207, "grad_norm": 0.5188718138310977, "learning_rate": 2.4301703163017033e-05, "loss": 0.5916, "step": 19269 }, { "epoch": 0.5626113106186681, "grad_norm": 0.515712581102556, "learning_rate": 2.4300081103000812e-05, "loss": 0.6144, "step": 19270 }, { "epoch": 0.5626405068465155, "grad_norm": 0.5884374257177976, "learning_rate": 2.4298459042984593e-05, "loss": 0.7318, "step": 19271 }, { "epoch": 0.5626697030743628, "grad_norm": 0.5387624054705236, "learning_rate": 2.4296836982968372e-05, "loss": 0.6324, "step": 19272 }, { "epoch": 0.5626988993022102, "grad_norm": 0.49852248077229416, "learning_rate": 2.429521492295215e-05, "loss": 0.596, "step": 19273 }, { "epoch": 0.5627280955300575, "grad_norm": 0.5258458248182646, "learning_rate": 2.429359286293593e-05, "loss": 0.63, "step": 19274 }, { "epoch": 0.5627572917579049, "grad_norm": 0.4821060815339231, "learning_rate": 2.4291970802919707e-05, "loss": 0.5127, "step": 19275 }, { "epoch": 0.5627864879857523, "grad_norm": 0.4851514299020067, "learning_rate": 2.429034874290349e-05, "loss": 0.5646, "step": 19276 }, { "epoch": 0.5628156842135996, "grad_norm": 0.5513885431415666, "learning_rate": 2.4288726682887267e-05, "loss": 0.6833, "step": 19277 }, { "epoch": 0.562844880441447, "grad_norm": 0.5323332386073459, "learning_rate": 2.428710462287105e-05, "loss": 0.6168, "step": 19278 }, { "epoch": 0.5628740766692943, "grad_norm": 0.5329794928320464, "learning_rate": 2.4285482562854827e-05, "loss": 0.6154, "step": 19279 }, { "epoch": 0.5629032728971417, "grad_norm": 0.5156830982506742, "learning_rate": 2.4283860502838606e-05, "loss": 0.579, "step": 19280 }, { "epoch": 0.5629324691249891, "grad_norm": 0.49594287651704444, "learning_rate": 2.4282238442822387e-05, "loss": 0.5868, "step": 19281 }, { "epoch": 0.5629616653528364, "grad_norm": 0.5269410181398633, "learning_rate": 2.4280616382806166e-05, "loss": 0.652, "step": 19282 }, { "epoch": 0.5629908615806838, "grad_norm": 0.4808260040643061, "learning_rate": 2.4278994322789944e-05, "loss": 0.5315, "step": 19283 }, { "epoch": 0.5630200578085311, "grad_norm": 0.5196569331779722, "learning_rate": 2.4277372262773722e-05, "loss": 0.609, "step": 19284 }, { "epoch": 0.5630492540363785, "grad_norm": 0.5737827161538503, "learning_rate": 2.42757502027575e-05, "loss": 0.6577, "step": 19285 }, { "epoch": 0.5630784502642259, "grad_norm": 0.494156274145071, "learning_rate": 2.4274128142741283e-05, "loss": 0.5717, "step": 19286 }, { "epoch": 0.5631076464920732, "grad_norm": 0.5441940740565269, "learning_rate": 2.4272506082725064e-05, "loss": 0.6553, "step": 19287 }, { "epoch": 0.5631368427199206, "grad_norm": 0.523685001719017, "learning_rate": 2.4270884022708843e-05, "loss": 0.6048, "step": 19288 }, { "epoch": 0.563166038947768, "grad_norm": 0.5100129702960361, "learning_rate": 2.426926196269262e-05, "loss": 0.6284, "step": 19289 }, { "epoch": 0.5631952351756153, "grad_norm": 0.5161353366857759, "learning_rate": 2.42676399026764e-05, "loss": 0.5799, "step": 19290 }, { "epoch": 0.5632244314034627, "grad_norm": 0.5302734993933957, "learning_rate": 2.426601784266018e-05, "loss": 0.6337, "step": 19291 }, { "epoch": 0.56325362763131, "grad_norm": 0.5288646468761711, "learning_rate": 2.426439578264396e-05, "loss": 0.6097, "step": 19292 }, { "epoch": 0.5632828238591574, "grad_norm": 0.497387841931871, "learning_rate": 2.4262773722627738e-05, "loss": 0.5591, "step": 19293 }, { "epoch": 0.5633120200870048, "grad_norm": 0.48725303902843975, "learning_rate": 2.4261151662611516e-05, "loss": 0.585, "step": 19294 }, { "epoch": 0.5633412163148521, "grad_norm": 0.4960337287971348, "learning_rate": 2.4259529602595295e-05, "loss": 0.5645, "step": 19295 }, { "epoch": 0.5633704125426995, "grad_norm": 0.5217490006296263, "learning_rate": 2.4257907542579077e-05, "loss": 0.5711, "step": 19296 }, { "epoch": 0.5633996087705468, "grad_norm": 0.5047741419957789, "learning_rate": 2.4256285482562858e-05, "loss": 0.5539, "step": 19297 }, { "epoch": 0.5634288049983942, "grad_norm": 0.5378844577459027, "learning_rate": 2.4254663422546637e-05, "loss": 0.6559, "step": 19298 }, { "epoch": 0.5634580012262416, "grad_norm": 0.5264858662319303, "learning_rate": 2.4253041362530415e-05, "loss": 0.6289, "step": 19299 }, { "epoch": 0.5634871974540889, "grad_norm": 0.5424258716135556, "learning_rate": 2.4251419302514193e-05, "loss": 0.6287, "step": 19300 }, { "epoch": 0.5635163936819363, "grad_norm": 0.4907435273557972, "learning_rate": 2.4249797242497975e-05, "loss": 0.5328, "step": 19301 }, { "epoch": 0.5635455899097837, "grad_norm": 0.5255585276968667, "learning_rate": 2.4248175182481754e-05, "loss": 0.6308, "step": 19302 }, { "epoch": 0.563574786137631, "grad_norm": 0.4747238601226969, "learning_rate": 2.4246553122465532e-05, "loss": 0.5254, "step": 19303 }, { "epoch": 0.5636039823654784, "grad_norm": 0.517346951726167, "learning_rate": 2.424493106244931e-05, "loss": 0.6002, "step": 19304 }, { "epoch": 0.5636331785933257, "grad_norm": 0.5222259785556393, "learning_rate": 2.424330900243309e-05, "loss": 0.5804, "step": 19305 }, { "epoch": 0.5636623748211731, "grad_norm": 0.48665125785646424, "learning_rate": 2.424168694241687e-05, "loss": 0.5257, "step": 19306 }, { "epoch": 0.5636915710490205, "grad_norm": 0.520369525427397, "learning_rate": 2.4240064882400652e-05, "loss": 0.6255, "step": 19307 }, { "epoch": 0.5637207672768678, "grad_norm": 0.5353045039393918, "learning_rate": 2.423844282238443e-05, "loss": 0.6013, "step": 19308 }, { "epoch": 0.5637499635047152, "grad_norm": 0.5582148379580918, "learning_rate": 2.423682076236821e-05, "loss": 0.698, "step": 19309 }, { "epoch": 0.5637791597325625, "grad_norm": 0.4804712321957556, "learning_rate": 2.4235198702351987e-05, "loss": 0.5427, "step": 19310 }, { "epoch": 0.5638083559604099, "grad_norm": 0.4975763232186083, "learning_rate": 2.423357664233577e-05, "loss": 0.5377, "step": 19311 }, { "epoch": 0.5638375521882573, "grad_norm": 0.4941247810927403, "learning_rate": 2.4231954582319547e-05, "loss": 0.5961, "step": 19312 }, { "epoch": 0.5638667484161046, "grad_norm": 0.5752173601749461, "learning_rate": 2.4230332522303326e-05, "loss": 0.7124, "step": 19313 }, { "epoch": 0.563895944643952, "grad_norm": 0.5466965939230284, "learning_rate": 2.4228710462287104e-05, "loss": 0.6577, "step": 19314 }, { "epoch": 0.5639251408717993, "grad_norm": 0.5080410217103084, "learning_rate": 2.4227088402270883e-05, "loss": 0.5928, "step": 19315 }, { "epoch": 0.5639543370996467, "grad_norm": 0.5145578070945727, "learning_rate": 2.4225466342254664e-05, "loss": 0.605, "step": 19316 }, { "epoch": 0.5639835333274941, "grad_norm": 0.5769481202797283, "learning_rate": 2.4223844282238446e-05, "loss": 0.711, "step": 19317 }, { "epoch": 0.5640127295553414, "grad_norm": 0.4880405285459969, "learning_rate": 2.4222222222222224e-05, "loss": 0.5808, "step": 19318 }, { "epoch": 0.5640419257831888, "grad_norm": 0.5124306475725513, "learning_rate": 2.4220600162206003e-05, "loss": 0.6147, "step": 19319 }, { "epoch": 0.5640711220110362, "grad_norm": 0.468416104966781, "learning_rate": 2.421897810218978e-05, "loss": 0.5203, "step": 19320 }, { "epoch": 0.5641003182388835, "grad_norm": 0.5006031478411536, "learning_rate": 2.421735604217356e-05, "loss": 0.6245, "step": 19321 }, { "epoch": 0.5641295144667309, "grad_norm": 0.5209951764991898, "learning_rate": 2.421573398215734e-05, "loss": 0.579, "step": 19322 }, { "epoch": 0.5641587106945782, "grad_norm": 0.5320766200074849, "learning_rate": 2.421411192214112e-05, "loss": 0.6291, "step": 19323 }, { "epoch": 0.5641879069224256, "grad_norm": 0.5451445756799718, "learning_rate": 2.4212489862124898e-05, "loss": 0.654, "step": 19324 }, { "epoch": 0.564217103150273, "grad_norm": 0.5038221178640612, "learning_rate": 2.421086780210868e-05, "loss": 0.5742, "step": 19325 }, { "epoch": 0.5642462993781203, "grad_norm": 0.4958003369181082, "learning_rate": 2.4209245742092458e-05, "loss": 0.5364, "step": 19326 }, { "epoch": 0.5642754956059677, "grad_norm": 0.5602578661928116, "learning_rate": 2.420762368207624e-05, "loss": 0.6552, "step": 19327 }, { "epoch": 0.564304691833815, "grad_norm": 0.5210833199093466, "learning_rate": 2.420600162206002e-05, "loss": 0.6231, "step": 19328 }, { "epoch": 0.5643338880616624, "grad_norm": 0.5859558794652389, "learning_rate": 2.4204379562043797e-05, "loss": 0.7095, "step": 19329 }, { "epoch": 0.5643630842895098, "grad_norm": 0.5082909692145777, "learning_rate": 2.4202757502027575e-05, "loss": 0.6043, "step": 19330 }, { "epoch": 0.5643922805173571, "grad_norm": 0.5714961157299744, "learning_rate": 2.4201135442011353e-05, "loss": 0.6812, "step": 19331 }, { "epoch": 0.5644214767452045, "grad_norm": 0.5045622385852586, "learning_rate": 2.4199513381995135e-05, "loss": 0.5596, "step": 19332 }, { "epoch": 0.5644506729730518, "grad_norm": 0.5142589560425009, "learning_rate": 2.4197891321978914e-05, "loss": 0.5781, "step": 19333 }, { "epoch": 0.5644798692008992, "grad_norm": 0.5247995720160191, "learning_rate": 2.4196269261962692e-05, "loss": 0.5748, "step": 19334 }, { "epoch": 0.5645090654287466, "grad_norm": 0.5133619843501471, "learning_rate": 2.4194647201946474e-05, "loss": 0.6015, "step": 19335 }, { "epoch": 0.5645382616565939, "grad_norm": 0.5051594572025181, "learning_rate": 2.4193025141930252e-05, "loss": 0.6003, "step": 19336 }, { "epoch": 0.5645674578844413, "grad_norm": 0.5441675406010422, "learning_rate": 2.4191403081914034e-05, "loss": 0.6881, "step": 19337 }, { "epoch": 0.5645966541122887, "grad_norm": 0.5166549523368293, "learning_rate": 2.4189781021897812e-05, "loss": 0.5811, "step": 19338 }, { "epoch": 0.564625850340136, "grad_norm": 0.4798457656271805, "learning_rate": 2.418815896188159e-05, "loss": 0.536, "step": 19339 }, { "epoch": 0.5646550465679834, "grad_norm": 0.48032944623321067, "learning_rate": 2.418653690186537e-05, "loss": 0.5083, "step": 19340 }, { "epoch": 0.5646842427958307, "grad_norm": 0.5023025831328606, "learning_rate": 2.4184914841849147e-05, "loss": 0.6228, "step": 19341 }, { "epoch": 0.5647134390236781, "grad_norm": 0.5585528768743083, "learning_rate": 2.418329278183293e-05, "loss": 0.6553, "step": 19342 }, { "epoch": 0.5647426352515255, "grad_norm": 0.5628846878830451, "learning_rate": 2.4181670721816708e-05, "loss": 0.6372, "step": 19343 }, { "epoch": 0.5647718314793728, "grad_norm": 0.5342878830687201, "learning_rate": 2.418004866180049e-05, "loss": 0.6761, "step": 19344 }, { "epoch": 0.5648010277072202, "grad_norm": 0.5447599462026214, "learning_rate": 2.4178426601784268e-05, "loss": 0.6438, "step": 19345 }, { "epoch": 0.5648302239350675, "grad_norm": 0.5138334445710686, "learning_rate": 2.4176804541768046e-05, "loss": 0.5855, "step": 19346 }, { "epoch": 0.5648594201629149, "grad_norm": 0.5470954031589362, "learning_rate": 2.4175182481751828e-05, "loss": 0.6655, "step": 19347 }, { "epoch": 0.5648886163907623, "grad_norm": 0.5337154694795435, "learning_rate": 2.4173560421735606e-05, "loss": 0.62, "step": 19348 }, { "epoch": 0.5649178126186096, "grad_norm": 0.5368483661080101, "learning_rate": 2.4171938361719385e-05, "loss": 0.6396, "step": 19349 }, { "epoch": 0.564947008846457, "grad_norm": 0.48562818785229217, "learning_rate": 2.4170316301703163e-05, "loss": 0.563, "step": 19350 }, { "epoch": 0.5649762050743043, "grad_norm": 0.5033061098215934, "learning_rate": 2.416869424168694e-05, "loss": 0.6042, "step": 19351 }, { "epoch": 0.5650054013021517, "grad_norm": 0.5253892375979012, "learning_rate": 2.4167072181670723e-05, "loss": 0.5338, "step": 19352 }, { "epoch": 0.5650345975299991, "grad_norm": 0.5560376135005585, "learning_rate": 2.4165450121654505e-05, "loss": 0.6709, "step": 19353 }, { "epoch": 0.5650637937578464, "grad_norm": 0.5053791832936013, "learning_rate": 2.4163828061638283e-05, "loss": 0.5874, "step": 19354 }, { "epoch": 0.5650929899856938, "grad_norm": 0.555784794189264, "learning_rate": 2.416220600162206e-05, "loss": 0.6171, "step": 19355 }, { "epoch": 0.5651221862135412, "grad_norm": 0.5273789919483551, "learning_rate": 2.416058394160584e-05, "loss": 0.6218, "step": 19356 }, { "epoch": 0.5651513824413886, "grad_norm": 0.5272563519805933, "learning_rate": 2.4158961881589622e-05, "loss": 0.6121, "step": 19357 }, { "epoch": 0.565180578669236, "grad_norm": 0.5568997658730378, "learning_rate": 2.41573398215734e-05, "loss": 0.65, "step": 19358 }, { "epoch": 0.5652097748970834, "grad_norm": 0.4961919514425228, "learning_rate": 2.415571776155718e-05, "loss": 0.5433, "step": 19359 }, { "epoch": 0.5652389711249307, "grad_norm": 0.5601080048511929, "learning_rate": 2.4154095701540957e-05, "loss": 0.6077, "step": 19360 }, { "epoch": 0.5652681673527781, "grad_norm": 0.5076868020635851, "learning_rate": 2.4152473641524735e-05, "loss": 0.5892, "step": 19361 }, { "epoch": 0.5652973635806254, "grad_norm": 0.5294286984967164, "learning_rate": 2.4150851581508517e-05, "loss": 0.6853, "step": 19362 }, { "epoch": 0.5653265598084728, "grad_norm": 0.5551722211600331, "learning_rate": 2.41492295214923e-05, "loss": 0.6463, "step": 19363 }, { "epoch": 0.5653557560363202, "grad_norm": 0.4989388553899135, "learning_rate": 2.4147607461476077e-05, "loss": 0.5672, "step": 19364 }, { "epoch": 0.5653849522641675, "grad_norm": 0.5690569037638559, "learning_rate": 2.4145985401459855e-05, "loss": 0.7287, "step": 19365 }, { "epoch": 0.5654141484920149, "grad_norm": 0.545608034539406, "learning_rate": 2.4144363341443634e-05, "loss": 0.6279, "step": 19366 }, { "epoch": 0.5654433447198622, "grad_norm": 0.4901667344348702, "learning_rate": 2.4142741281427416e-05, "loss": 0.5138, "step": 19367 }, { "epoch": 0.5654725409477096, "grad_norm": 0.5404160027739915, "learning_rate": 2.4141119221411194e-05, "loss": 0.5967, "step": 19368 }, { "epoch": 0.565501737175557, "grad_norm": 0.5583471506608902, "learning_rate": 2.4139497161394972e-05, "loss": 0.6611, "step": 19369 }, { "epoch": 0.5655309334034043, "grad_norm": 0.5234574041332409, "learning_rate": 2.413787510137875e-05, "loss": 0.5962, "step": 19370 }, { "epoch": 0.5655601296312517, "grad_norm": 0.49875414611038543, "learning_rate": 2.413625304136253e-05, "loss": 0.5507, "step": 19371 }, { "epoch": 0.565589325859099, "grad_norm": 0.540091066941136, "learning_rate": 2.413463098134631e-05, "loss": 0.6329, "step": 19372 }, { "epoch": 0.5656185220869464, "grad_norm": 0.5294417537107645, "learning_rate": 2.4133008921330093e-05, "loss": 0.6348, "step": 19373 }, { "epoch": 0.5656477183147938, "grad_norm": 0.5441809256641156, "learning_rate": 2.413138686131387e-05, "loss": 0.6352, "step": 19374 }, { "epoch": 0.5656769145426411, "grad_norm": 0.5480913029333477, "learning_rate": 2.412976480129765e-05, "loss": 0.587, "step": 19375 }, { "epoch": 0.5657061107704885, "grad_norm": 0.5025627810993281, "learning_rate": 2.4128142741281428e-05, "loss": 0.5687, "step": 19376 }, { "epoch": 0.5657353069983359, "grad_norm": 0.5217111334848539, "learning_rate": 2.4126520681265206e-05, "loss": 0.5904, "step": 19377 }, { "epoch": 0.5657645032261832, "grad_norm": 0.46828219351040273, "learning_rate": 2.4124898621248988e-05, "loss": 0.5156, "step": 19378 }, { "epoch": 0.5657936994540306, "grad_norm": 0.5234911758818414, "learning_rate": 2.4123276561232766e-05, "loss": 0.5987, "step": 19379 }, { "epoch": 0.5658228956818779, "grad_norm": 0.5821945230137078, "learning_rate": 2.4121654501216545e-05, "loss": 0.6613, "step": 19380 }, { "epoch": 0.5658520919097253, "grad_norm": 0.5218815480170623, "learning_rate": 2.4120032441200323e-05, "loss": 0.6281, "step": 19381 }, { "epoch": 0.5658812881375727, "grad_norm": 0.519720884404011, "learning_rate": 2.4118410381184105e-05, "loss": 0.6243, "step": 19382 }, { "epoch": 0.56591048436542, "grad_norm": 0.5122320676302379, "learning_rate": 2.4116788321167887e-05, "loss": 0.5798, "step": 19383 }, { "epoch": 0.5659396805932674, "grad_norm": 0.5159105539442576, "learning_rate": 2.4115166261151665e-05, "loss": 0.5961, "step": 19384 }, { "epoch": 0.5659688768211147, "grad_norm": 0.5099615686683485, "learning_rate": 2.4113544201135443e-05, "loss": 0.5802, "step": 19385 }, { "epoch": 0.5659980730489621, "grad_norm": 0.5184613222159047, "learning_rate": 2.411192214111922e-05, "loss": 0.5634, "step": 19386 }, { "epoch": 0.5660272692768095, "grad_norm": 0.5093294349757745, "learning_rate": 2.4110300081103e-05, "loss": 0.5618, "step": 19387 }, { "epoch": 0.5660564655046568, "grad_norm": 0.5044736662212941, "learning_rate": 2.4108678021086782e-05, "loss": 0.5999, "step": 19388 }, { "epoch": 0.5660856617325042, "grad_norm": 0.5337867029925949, "learning_rate": 2.410705596107056e-05, "loss": 0.6098, "step": 19389 }, { "epoch": 0.5661148579603515, "grad_norm": 0.5366872936686269, "learning_rate": 2.410543390105434e-05, "loss": 0.5935, "step": 19390 }, { "epoch": 0.5661440541881989, "grad_norm": 0.536957085462085, "learning_rate": 2.410381184103812e-05, "loss": 0.6245, "step": 19391 }, { "epoch": 0.5661732504160463, "grad_norm": 0.49786059880738615, "learning_rate": 2.41021897810219e-05, "loss": 0.5532, "step": 19392 }, { "epoch": 0.5662024466438936, "grad_norm": 0.5133490598809213, "learning_rate": 2.410056772100568e-05, "loss": 0.5739, "step": 19393 }, { "epoch": 0.566231642871741, "grad_norm": 0.467985817301155, "learning_rate": 2.409894566098946e-05, "loss": 0.518, "step": 19394 }, { "epoch": 0.5662608390995884, "grad_norm": 0.5717267289742672, "learning_rate": 2.4097323600973237e-05, "loss": 0.6313, "step": 19395 }, { "epoch": 0.5662900353274357, "grad_norm": 0.48395153209906216, "learning_rate": 2.4095701540957016e-05, "loss": 0.5282, "step": 19396 }, { "epoch": 0.5663192315552831, "grad_norm": 0.4941760244077005, "learning_rate": 2.4094079480940794e-05, "loss": 0.5508, "step": 19397 }, { "epoch": 0.5663484277831304, "grad_norm": 0.5758870008250989, "learning_rate": 2.4092457420924576e-05, "loss": 0.7069, "step": 19398 }, { "epoch": 0.5663776240109778, "grad_norm": 0.5617333521585502, "learning_rate": 2.4090835360908354e-05, "loss": 0.679, "step": 19399 }, { "epoch": 0.5664068202388252, "grad_norm": 0.5691491380697553, "learning_rate": 2.4089213300892132e-05, "loss": 0.6724, "step": 19400 }, { "epoch": 0.5664360164666725, "grad_norm": 0.5365780066745912, "learning_rate": 2.4087591240875914e-05, "loss": 0.6501, "step": 19401 }, { "epoch": 0.5664652126945199, "grad_norm": 0.5358351910253571, "learning_rate": 2.4085969180859693e-05, "loss": 0.6343, "step": 19402 }, { "epoch": 0.5664944089223672, "grad_norm": 0.5125251713776483, "learning_rate": 2.4084347120843474e-05, "loss": 0.5901, "step": 19403 }, { "epoch": 0.5665236051502146, "grad_norm": 0.5243973594344276, "learning_rate": 2.4082725060827253e-05, "loss": 0.5952, "step": 19404 }, { "epoch": 0.566552801378062, "grad_norm": 0.5238047199789012, "learning_rate": 2.408110300081103e-05, "loss": 0.5995, "step": 19405 }, { "epoch": 0.5665819976059093, "grad_norm": 0.5167261951894931, "learning_rate": 2.407948094079481e-05, "loss": 0.5811, "step": 19406 }, { "epoch": 0.5666111938337567, "grad_norm": 0.502875040166056, "learning_rate": 2.4077858880778588e-05, "loss": 0.5986, "step": 19407 }, { "epoch": 0.566640390061604, "grad_norm": 0.5060700557755605, "learning_rate": 2.407623682076237e-05, "loss": 0.5629, "step": 19408 }, { "epoch": 0.5666695862894514, "grad_norm": 0.5115201464647202, "learning_rate": 2.4074614760746148e-05, "loss": 0.6093, "step": 19409 }, { "epoch": 0.5666987825172988, "grad_norm": 0.5391999992945385, "learning_rate": 2.407299270072993e-05, "loss": 0.6623, "step": 19410 }, { "epoch": 0.5667279787451461, "grad_norm": 0.5710888999661151, "learning_rate": 2.4071370640713708e-05, "loss": 0.6507, "step": 19411 }, { "epoch": 0.5667571749729935, "grad_norm": 0.5170811607907702, "learning_rate": 2.4069748580697486e-05, "loss": 0.5845, "step": 19412 }, { "epoch": 0.5667863712008409, "grad_norm": 0.563477947863976, "learning_rate": 2.4068126520681268e-05, "loss": 0.6247, "step": 19413 }, { "epoch": 0.5668155674286882, "grad_norm": 0.5195710274999057, "learning_rate": 2.4066504460665047e-05, "loss": 0.5855, "step": 19414 }, { "epoch": 0.5668447636565356, "grad_norm": 0.5064027311551849, "learning_rate": 2.4064882400648825e-05, "loss": 0.5305, "step": 19415 }, { "epoch": 0.5668739598843829, "grad_norm": 0.5213061224497731, "learning_rate": 2.4063260340632603e-05, "loss": 0.6131, "step": 19416 }, { "epoch": 0.5669031561122303, "grad_norm": 0.547665236068335, "learning_rate": 2.4061638280616382e-05, "loss": 0.6309, "step": 19417 }, { "epoch": 0.5669323523400777, "grad_norm": 0.5054377431764182, "learning_rate": 2.4060016220600164e-05, "loss": 0.5167, "step": 19418 }, { "epoch": 0.566961548567925, "grad_norm": 0.5747318053709404, "learning_rate": 2.4058394160583945e-05, "loss": 0.6475, "step": 19419 }, { "epoch": 0.5669907447957724, "grad_norm": 0.5048149785581195, "learning_rate": 2.4056772100567724e-05, "loss": 0.5566, "step": 19420 }, { "epoch": 0.5670199410236197, "grad_norm": 0.5300159223322766, "learning_rate": 2.4055150040551502e-05, "loss": 0.6242, "step": 19421 }, { "epoch": 0.5670491372514671, "grad_norm": 0.634742021820797, "learning_rate": 2.405352798053528e-05, "loss": 0.708, "step": 19422 }, { "epoch": 0.5670783334793145, "grad_norm": 0.5134134067338889, "learning_rate": 2.4051905920519062e-05, "loss": 0.5768, "step": 19423 }, { "epoch": 0.5671075297071618, "grad_norm": 0.5135060146308605, "learning_rate": 2.405028386050284e-05, "loss": 0.5594, "step": 19424 }, { "epoch": 0.5671367259350092, "grad_norm": 0.5218902639702391, "learning_rate": 2.404866180048662e-05, "loss": 0.6104, "step": 19425 }, { "epoch": 0.5671659221628566, "grad_norm": 0.5349257802490612, "learning_rate": 2.4047039740470397e-05, "loss": 0.6435, "step": 19426 }, { "epoch": 0.5671951183907039, "grad_norm": 1.1162405367858492, "learning_rate": 2.4045417680454176e-05, "loss": 0.5423, "step": 19427 }, { "epoch": 0.5672243146185513, "grad_norm": 0.5229128917989045, "learning_rate": 2.4043795620437957e-05, "loss": 0.5869, "step": 19428 }, { "epoch": 0.5672535108463986, "grad_norm": 0.5013766969100998, "learning_rate": 2.404217356042174e-05, "loss": 0.5771, "step": 19429 }, { "epoch": 0.567282707074246, "grad_norm": 0.5258441515438969, "learning_rate": 2.4040551500405518e-05, "loss": 0.6351, "step": 19430 }, { "epoch": 0.5673119033020934, "grad_norm": 0.5591511246361649, "learning_rate": 2.4038929440389296e-05, "loss": 0.5749, "step": 19431 }, { "epoch": 0.5673410995299407, "grad_norm": 0.5400787916955199, "learning_rate": 2.4037307380373074e-05, "loss": 0.6474, "step": 19432 }, { "epoch": 0.5673702957577881, "grad_norm": 0.5465538954003502, "learning_rate": 2.4035685320356856e-05, "loss": 0.6355, "step": 19433 }, { "epoch": 0.5673994919856354, "grad_norm": 0.5280537746342812, "learning_rate": 2.4034063260340634e-05, "loss": 0.6578, "step": 19434 }, { "epoch": 0.5674286882134828, "grad_norm": 0.4839727845960937, "learning_rate": 2.4032441200324413e-05, "loss": 0.5238, "step": 19435 }, { "epoch": 0.5674578844413302, "grad_norm": 0.5196810170359445, "learning_rate": 2.403081914030819e-05, "loss": 0.5405, "step": 19436 }, { "epoch": 0.5674870806691775, "grad_norm": 0.5356584413637431, "learning_rate": 2.402919708029197e-05, "loss": 0.5835, "step": 19437 }, { "epoch": 0.5675162768970249, "grad_norm": 0.5316650688652009, "learning_rate": 2.402757502027575e-05, "loss": 0.5996, "step": 19438 }, { "epoch": 0.5675454731248722, "grad_norm": 0.5180045196988831, "learning_rate": 2.4025952960259533e-05, "loss": 0.5668, "step": 19439 }, { "epoch": 0.5675746693527196, "grad_norm": 0.5308384040414156, "learning_rate": 2.402433090024331e-05, "loss": 0.6487, "step": 19440 }, { "epoch": 0.567603865580567, "grad_norm": 0.5376598874853272, "learning_rate": 2.402270884022709e-05, "loss": 0.6378, "step": 19441 }, { "epoch": 0.5676330618084143, "grad_norm": 0.4828024708795448, "learning_rate": 2.4021086780210868e-05, "loss": 0.5343, "step": 19442 }, { "epoch": 0.5676622580362617, "grad_norm": 0.521620645261324, "learning_rate": 2.4019464720194647e-05, "loss": 0.6232, "step": 19443 }, { "epoch": 0.567691454264109, "grad_norm": 0.5152811081961138, "learning_rate": 2.401784266017843e-05, "loss": 0.6381, "step": 19444 }, { "epoch": 0.5677206504919564, "grad_norm": 0.5212404848183312, "learning_rate": 2.4016220600162207e-05, "loss": 0.6152, "step": 19445 }, { "epoch": 0.5677498467198038, "grad_norm": 0.5099750848619009, "learning_rate": 2.4014598540145985e-05, "loss": 0.5958, "step": 19446 }, { "epoch": 0.5677790429476511, "grad_norm": 0.5269205992646226, "learning_rate": 2.4012976480129763e-05, "loss": 0.6277, "step": 19447 }, { "epoch": 0.5678082391754985, "grad_norm": 0.5038848019715496, "learning_rate": 2.4011354420113545e-05, "loss": 0.5665, "step": 19448 }, { "epoch": 0.5678374354033459, "grad_norm": 0.5222040012748399, "learning_rate": 2.4009732360097327e-05, "loss": 0.608, "step": 19449 }, { "epoch": 0.5678666316311932, "grad_norm": 0.5890692092095858, "learning_rate": 2.4008110300081105e-05, "loss": 0.5724, "step": 19450 }, { "epoch": 0.5678958278590406, "grad_norm": 0.5346988315021584, "learning_rate": 2.4006488240064884e-05, "loss": 0.6387, "step": 19451 }, { "epoch": 0.5679250240868879, "grad_norm": 0.49408637743680245, "learning_rate": 2.4004866180048662e-05, "loss": 0.5694, "step": 19452 }, { "epoch": 0.5679542203147353, "grad_norm": 0.4839283532246553, "learning_rate": 2.400324412003244e-05, "loss": 0.559, "step": 19453 }, { "epoch": 0.5679834165425827, "grad_norm": 0.49024205868642406, "learning_rate": 2.4001622060016222e-05, "loss": 0.5337, "step": 19454 }, { "epoch": 0.56801261277043, "grad_norm": 0.5183113355403507, "learning_rate": 2.4e-05, "loss": 0.6145, "step": 19455 }, { "epoch": 0.5680418089982774, "grad_norm": 0.49813841279523086, "learning_rate": 2.399837793998378e-05, "loss": 0.5716, "step": 19456 }, { "epoch": 0.5680710052261247, "grad_norm": 0.5028008357045401, "learning_rate": 2.399675587996756e-05, "loss": 0.5503, "step": 19457 }, { "epoch": 0.5681002014539721, "grad_norm": 0.537638299214176, "learning_rate": 2.399513381995134e-05, "loss": 0.643, "step": 19458 }, { "epoch": 0.5681293976818195, "grad_norm": 0.5145913004516591, "learning_rate": 2.399351175993512e-05, "loss": 0.6114, "step": 19459 }, { "epoch": 0.5681585939096668, "grad_norm": 0.5642610341832609, "learning_rate": 2.39918896999189e-05, "loss": 0.6896, "step": 19460 }, { "epoch": 0.5681877901375142, "grad_norm": 0.4798890014301816, "learning_rate": 2.3990267639902678e-05, "loss": 0.4995, "step": 19461 }, { "epoch": 0.5682169863653616, "grad_norm": 0.5576549489858874, "learning_rate": 2.3988645579886456e-05, "loss": 0.6488, "step": 19462 }, { "epoch": 0.5682461825932089, "grad_norm": 0.5214133807567598, "learning_rate": 2.3987023519870234e-05, "loss": 0.6154, "step": 19463 }, { "epoch": 0.5682753788210563, "grad_norm": 0.593425883501898, "learning_rate": 2.3985401459854016e-05, "loss": 0.6077, "step": 19464 }, { "epoch": 0.5683045750489036, "grad_norm": 0.5247994740608852, "learning_rate": 2.3983779399837795e-05, "loss": 0.6099, "step": 19465 }, { "epoch": 0.568333771276751, "grad_norm": 0.5285236887599892, "learning_rate": 2.3982157339821573e-05, "loss": 0.6544, "step": 19466 }, { "epoch": 0.5683629675045984, "grad_norm": 0.519336515626285, "learning_rate": 2.3980535279805355e-05, "loss": 0.6021, "step": 19467 }, { "epoch": 0.5683921637324457, "grad_norm": 0.513041395513178, "learning_rate": 2.3978913219789133e-05, "loss": 0.6274, "step": 19468 }, { "epoch": 0.5684213599602931, "grad_norm": 0.508048216836995, "learning_rate": 2.3977291159772915e-05, "loss": 0.5722, "step": 19469 }, { "epoch": 0.5684505561881404, "grad_norm": 0.5496689254726624, "learning_rate": 2.3975669099756693e-05, "loss": 0.641, "step": 19470 }, { "epoch": 0.5684797524159878, "grad_norm": 0.4916888482586569, "learning_rate": 2.397404703974047e-05, "loss": 0.5665, "step": 19471 }, { "epoch": 0.5685089486438352, "grad_norm": 0.5169070466846526, "learning_rate": 2.397242497972425e-05, "loss": 0.6058, "step": 19472 }, { "epoch": 0.5685381448716825, "grad_norm": 0.55888347485415, "learning_rate": 2.3970802919708028e-05, "loss": 0.6723, "step": 19473 }, { "epoch": 0.5685673410995299, "grad_norm": 0.5191185120109568, "learning_rate": 2.396918085969181e-05, "loss": 0.634, "step": 19474 }, { "epoch": 0.5685965373273772, "grad_norm": 0.47754354965128665, "learning_rate": 2.396755879967559e-05, "loss": 0.5673, "step": 19475 }, { "epoch": 0.5686257335552246, "grad_norm": 0.49953861982657827, "learning_rate": 2.396593673965937e-05, "loss": 0.5397, "step": 19476 }, { "epoch": 0.568654929783072, "grad_norm": 0.5068076131239033, "learning_rate": 2.396431467964315e-05, "loss": 0.595, "step": 19477 }, { "epoch": 0.5686841260109194, "grad_norm": 0.5230137342330579, "learning_rate": 2.3962692619626927e-05, "loss": 0.6716, "step": 19478 }, { "epoch": 0.5687133222387668, "grad_norm": 0.5225811431721171, "learning_rate": 2.396107055961071e-05, "loss": 0.6143, "step": 19479 }, { "epoch": 0.5687425184666142, "grad_norm": 0.6082624575469276, "learning_rate": 2.3959448499594487e-05, "loss": 0.6646, "step": 19480 }, { "epoch": 0.5687717146944615, "grad_norm": 0.5007717481298809, "learning_rate": 2.3957826439578265e-05, "loss": 0.5659, "step": 19481 }, { "epoch": 0.5688009109223089, "grad_norm": 0.5215156419437875, "learning_rate": 2.3956204379562044e-05, "loss": 0.6044, "step": 19482 }, { "epoch": 0.5688301071501563, "grad_norm": 0.539119569827791, "learning_rate": 2.3954582319545822e-05, "loss": 0.6336, "step": 19483 }, { "epoch": 0.5688593033780036, "grad_norm": 0.5268545544355214, "learning_rate": 2.3952960259529604e-05, "loss": 0.6263, "step": 19484 }, { "epoch": 0.568888499605851, "grad_norm": 0.5465701265659411, "learning_rate": 2.3951338199513382e-05, "loss": 0.6401, "step": 19485 }, { "epoch": 0.5689176958336983, "grad_norm": 0.5347401001245371, "learning_rate": 2.3949716139497164e-05, "loss": 0.6444, "step": 19486 }, { "epoch": 0.5689468920615457, "grad_norm": 0.5460273169618656, "learning_rate": 2.3948094079480942e-05, "loss": 0.6485, "step": 19487 }, { "epoch": 0.5689760882893931, "grad_norm": 0.5136094378788395, "learning_rate": 2.394647201946472e-05, "loss": 0.5838, "step": 19488 }, { "epoch": 0.5690052845172404, "grad_norm": 0.5242859318339431, "learning_rate": 2.3944849959448503e-05, "loss": 0.6225, "step": 19489 }, { "epoch": 0.5690344807450878, "grad_norm": 0.5399909869590226, "learning_rate": 2.394322789943228e-05, "loss": 0.6265, "step": 19490 }, { "epoch": 0.5690636769729351, "grad_norm": 0.5779725098698668, "learning_rate": 2.394160583941606e-05, "loss": 0.7031, "step": 19491 }, { "epoch": 0.5690928732007825, "grad_norm": 0.5328294164359992, "learning_rate": 2.3939983779399838e-05, "loss": 0.6116, "step": 19492 }, { "epoch": 0.5691220694286299, "grad_norm": 0.5195077231954744, "learning_rate": 2.3938361719383616e-05, "loss": 0.6306, "step": 19493 }, { "epoch": 0.5691512656564772, "grad_norm": 0.5263948234242443, "learning_rate": 2.3936739659367398e-05, "loss": 0.6357, "step": 19494 }, { "epoch": 0.5691804618843246, "grad_norm": 0.5192561503782324, "learning_rate": 2.393511759935118e-05, "loss": 0.6256, "step": 19495 }, { "epoch": 0.569209658112172, "grad_norm": 0.5512245443999989, "learning_rate": 2.3933495539334958e-05, "loss": 0.65, "step": 19496 }, { "epoch": 0.5692388543400193, "grad_norm": 0.5066777638906833, "learning_rate": 2.3931873479318736e-05, "loss": 0.5965, "step": 19497 }, { "epoch": 0.5692680505678667, "grad_norm": 0.5307394906696598, "learning_rate": 2.3930251419302515e-05, "loss": 0.6192, "step": 19498 }, { "epoch": 0.569297246795714, "grad_norm": 0.49283720881635823, "learning_rate": 2.3928629359286293e-05, "loss": 0.5646, "step": 19499 }, { "epoch": 0.5693264430235614, "grad_norm": 0.47860385113208087, "learning_rate": 2.3927007299270075e-05, "loss": 0.5466, "step": 19500 }, { "epoch": 0.5693556392514088, "grad_norm": 0.5847783799218421, "learning_rate": 2.3925385239253853e-05, "loss": 0.6642, "step": 19501 }, { "epoch": 0.5693848354792561, "grad_norm": 0.5541199042300878, "learning_rate": 2.392376317923763e-05, "loss": 0.6456, "step": 19502 }, { "epoch": 0.5694140317071035, "grad_norm": 0.5460628758013547, "learning_rate": 2.392214111922141e-05, "loss": 0.6356, "step": 19503 }, { "epoch": 0.5694432279349508, "grad_norm": 0.5227615197070694, "learning_rate": 2.3920519059205192e-05, "loss": 0.6122, "step": 19504 }, { "epoch": 0.5694724241627982, "grad_norm": 0.518155168153204, "learning_rate": 2.3918896999188974e-05, "loss": 0.5514, "step": 19505 }, { "epoch": 0.5695016203906456, "grad_norm": 0.5122642082803103, "learning_rate": 2.3917274939172752e-05, "loss": 0.5802, "step": 19506 }, { "epoch": 0.5695308166184929, "grad_norm": 0.48617382937735537, "learning_rate": 2.391565287915653e-05, "loss": 0.5577, "step": 19507 }, { "epoch": 0.5695600128463403, "grad_norm": 0.5181427371833409, "learning_rate": 2.391403081914031e-05, "loss": 0.6391, "step": 19508 }, { "epoch": 0.5695892090741876, "grad_norm": 0.518012604459955, "learning_rate": 2.3912408759124087e-05, "loss": 0.5616, "step": 19509 }, { "epoch": 0.569618405302035, "grad_norm": 0.5270476412283076, "learning_rate": 2.391078669910787e-05, "loss": 0.6191, "step": 19510 }, { "epoch": 0.5696476015298824, "grad_norm": 0.5207970525477679, "learning_rate": 2.3909164639091647e-05, "loss": 0.6028, "step": 19511 }, { "epoch": 0.5696767977577297, "grad_norm": 0.5242733543857935, "learning_rate": 2.3907542579075426e-05, "loss": 0.6207, "step": 19512 }, { "epoch": 0.5697059939855771, "grad_norm": 0.4825046252858525, "learning_rate": 2.3905920519059204e-05, "loss": 0.5568, "step": 19513 }, { "epoch": 0.5697351902134244, "grad_norm": 0.46127503234206996, "learning_rate": 2.3904298459042986e-05, "loss": 0.5016, "step": 19514 }, { "epoch": 0.5697643864412718, "grad_norm": 0.5375801940808804, "learning_rate": 2.3902676399026767e-05, "loss": 0.6137, "step": 19515 }, { "epoch": 0.5697935826691192, "grad_norm": 0.4914203447612784, "learning_rate": 2.3901054339010546e-05, "loss": 0.5651, "step": 19516 }, { "epoch": 0.5698227788969665, "grad_norm": 0.5042742434216557, "learning_rate": 2.3899432278994324e-05, "loss": 0.5916, "step": 19517 }, { "epoch": 0.5698519751248139, "grad_norm": 0.5380190214057503, "learning_rate": 2.3897810218978103e-05, "loss": 0.6387, "step": 19518 }, { "epoch": 0.5698811713526613, "grad_norm": 0.5025683239105325, "learning_rate": 2.389618815896188e-05, "loss": 0.6041, "step": 19519 }, { "epoch": 0.5699103675805086, "grad_norm": 0.5432076632907267, "learning_rate": 2.3894566098945663e-05, "loss": 0.6372, "step": 19520 }, { "epoch": 0.569939563808356, "grad_norm": 0.5393750618304641, "learning_rate": 2.389294403892944e-05, "loss": 0.6042, "step": 19521 }, { "epoch": 0.5699687600362033, "grad_norm": 0.5616797763171051, "learning_rate": 2.389132197891322e-05, "loss": 0.674, "step": 19522 }, { "epoch": 0.5699979562640507, "grad_norm": 0.5149869280030375, "learning_rate": 2.3889699918897e-05, "loss": 0.5872, "step": 19523 }, { "epoch": 0.5700271524918981, "grad_norm": 0.5329215524273317, "learning_rate": 2.388807785888078e-05, "loss": 0.5959, "step": 19524 }, { "epoch": 0.5700563487197454, "grad_norm": 0.5623731394071562, "learning_rate": 2.388645579886456e-05, "loss": 0.6733, "step": 19525 }, { "epoch": 0.5700855449475928, "grad_norm": 0.5161234491958907, "learning_rate": 2.388483373884834e-05, "loss": 0.6129, "step": 19526 }, { "epoch": 0.5701147411754401, "grad_norm": 0.5376167609919484, "learning_rate": 2.3883211678832118e-05, "loss": 0.6207, "step": 19527 }, { "epoch": 0.5701439374032875, "grad_norm": 0.5643152019404052, "learning_rate": 2.3881589618815896e-05, "loss": 0.6363, "step": 19528 }, { "epoch": 0.5701731336311349, "grad_norm": 0.49889457316421126, "learning_rate": 2.3879967558799675e-05, "loss": 0.5875, "step": 19529 }, { "epoch": 0.5702023298589822, "grad_norm": 0.5822798718527716, "learning_rate": 2.3878345498783457e-05, "loss": 0.6776, "step": 19530 }, { "epoch": 0.5702315260868296, "grad_norm": 0.5910973240389795, "learning_rate": 2.3876723438767235e-05, "loss": 0.7014, "step": 19531 }, { "epoch": 0.570260722314677, "grad_norm": 0.49606540423666096, "learning_rate": 2.3875101378751013e-05, "loss": 0.5687, "step": 19532 }, { "epoch": 0.5702899185425243, "grad_norm": 0.522953362243431, "learning_rate": 2.3873479318734795e-05, "loss": 0.6142, "step": 19533 }, { "epoch": 0.5703191147703717, "grad_norm": 0.5680260000724552, "learning_rate": 2.3871857258718573e-05, "loss": 0.6777, "step": 19534 }, { "epoch": 0.570348310998219, "grad_norm": 0.5493588810777912, "learning_rate": 2.3870235198702355e-05, "loss": 0.7083, "step": 19535 }, { "epoch": 0.5703775072260664, "grad_norm": 0.4985250751760659, "learning_rate": 2.3868613138686134e-05, "loss": 0.5451, "step": 19536 }, { "epoch": 0.5704067034539138, "grad_norm": 0.5301801025040841, "learning_rate": 2.3866991078669912e-05, "loss": 0.6111, "step": 19537 }, { "epoch": 0.5704358996817611, "grad_norm": 0.48633180591361436, "learning_rate": 2.386536901865369e-05, "loss": 0.5187, "step": 19538 }, { "epoch": 0.5704650959096085, "grad_norm": 0.5220202643456378, "learning_rate": 2.386374695863747e-05, "loss": 0.6114, "step": 19539 }, { "epoch": 0.5704942921374558, "grad_norm": 0.49488171346145404, "learning_rate": 2.386212489862125e-05, "loss": 0.5383, "step": 19540 }, { "epoch": 0.5705234883653032, "grad_norm": 0.5595445664515234, "learning_rate": 2.386050283860503e-05, "loss": 0.6939, "step": 19541 }, { "epoch": 0.5705526845931506, "grad_norm": 0.5317403500344166, "learning_rate": 2.385888077858881e-05, "loss": 0.6057, "step": 19542 }, { "epoch": 0.5705818808209979, "grad_norm": 0.5835579410991902, "learning_rate": 2.385725871857259e-05, "loss": 0.6818, "step": 19543 }, { "epoch": 0.5706110770488453, "grad_norm": 0.5337085985366987, "learning_rate": 2.3855636658556367e-05, "loss": 0.6248, "step": 19544 }, { "epoch": 0.5706402732766926, "grad_norm": 0.5050245471920232, "learning_rate": 2.385401459854015e-05, "loss": 0.5914, "step": 19545 }, { "epoch": 0.57066946950454, "grad_norm": 0.5063190997757635, "learning_rate": 2.3852392538523928e-05, "loss": 0.5636, "step": 19546 }, { "epoch": 0.5706986657323874, "grad_norm": 0.47684006558079883, "learning_rate": 2.3850770478507706e-05, "loss": 0.5385, "step": 19547 }, { "epoch": 0.5707278619602347, "grad_norm": 0.5464687515683749, "learning_rate": 2.3849148418491484e-05, "loss": 0.6632, "step": 19548 }, { "epoch": 0.5707570581880821, "grad_norm": 0.5021999987566282, "learning_rate": 2.3847526358475263e-05, "loss": 0.5583, "step": 19549 }, { "epoch": 0.5707862544159295, "grad_norm": 0.5121188060339529, "learning_rate": 2.3845904298459044e-05, "loss": 0.5859, "step": 19550 }, { "epoch": 0.5708154506437768, "grad_norm": 0.6152466369317439, "learning_rate": 2.3844282238442823e-05, "loss": 0.6538, "step": 19551 }, { "epoch": 0.5708446468716242, "grad_norm": 0.4918087682453672, "learning_rate": 2.3842660178426605e-05, "loss": 0.5546, "step": 19552 }, { "epoch": 0.5708738430994715, "grad_norm": 0.5459014922312592, "learning_rate": 2.3841038118410383e-05, "loss": 0.6506, "step": 19553 }, { "epoch": 0.5709030393273189, "grad_norm": 0.5325550697984746, "learning_rate": 2.383941605839416e-05, "loss": 0.5837, "step": 19554 }, { "epoch": 0.5709322355551663, "grad_norm": 0.5490797871492297, "learning_rate": 2.3837793998377943e-05, "loss": 0.6267, "step": 19555 }, { "epoch": 0.5709614317830136, "grad_norm": 0.5483408199927229, "learning_rate": 2.383617193836172e-05, "loss": 0.6599, "step": 19556 }, { "epoch": 0.570990628010861, "grad_norm": 0.5117870246330668, "learning_rate": 2.38345498783455e-05, "loss": 0.573, "step": 19557 }, { "epoch": 0.5710198242387083, "grad_norm": 0.5413408620350153, "learning_rate": 2.3832927818329278e-05, "loss": 0.6144, "step": 19558 }, { "epoch": 0.5710490204665557, "grad_norm": 0.5521478012612246, "learning_rate": 2.3831305758313057e-05, "loss": 0.6526, "step": 19559 }, { "epoch": 0.5710782166944031, "grad_norm": 0.5109248808476679, "learning_rate": 2.3829683698296838e-05, "loss": 0.6012, "step": 19560 }, { "epoch": 0.5711074129222504, "grad_norm": 0.5284859061385581, "learning_rate": 2.382806163828062e-05, "loss": 0.5929, "step": 19561 }, { "epoch": 0.5711366091500978, "grad_norm": 0.5327582355835745, "learning_rate": 2.38264395782644e-05, "loss": 0.641, "step": 19562 }, { "epoch": 0.5711658053779451, "grad_norm": 0.494606907475667, "learning_rate": 2.3824817518248177e-05, "loss": 0.5481, "step": 19563 }, { "epoch": 0.5711950016057925, "grad_norm": 0.5116312697734016, "learning_rate": 2.3823195458231955e-05, "loss": 0.6205, "step": 19564 }, { "epoch": 0.5712241978336399, "grad_norm": 0.5087853814715294, "learning_rate": 2.3821573398215734e-05, "loss": 0.6129, "step": 19565 }, { "epoch": 0.5712533940614872, "grad_norm": 0.5338805806483815, "learning_rate": 2.3819951338199515e-05, "loss": 0.6563, "step": 19566 }, { "epoch": 0.5712825902893346, "grad_norm": 0.5348284109017155, "learning_rate": 2.3818329278183294e-05, "loss": 0.6759, "step": 19567 }, { "epoch": 0.571311786517182, "grad_norm": 0.5247961338729868, "learning_rate": 2.3816707218167072e-05, "loss": 0.6314, "step": 19568 }, { "epoch": 0.5713409827450293, "grad_norm": 0.5415698535848914, "learning_rate": 2.381508515815085e-05, "loss": 0.6271, "step": 19569 }, { "epoch": 0.5713701789728767, "grad_norm": 0.4979843697457405, "learning_rate": 2.3813463098134632e-05, "loss": 0.5778, "step": 19570 }, { "epoch": 0.571399375200724, "grad_norm": 0.5362369315590705, "learning_rate": 2.3811841038118414e-05, "loss": 0.6484, "step": 19571 }, { "epoch": 0.5714285714285714, "grad_norm": 0.5268562191557783, "learning_rate": 2.3810218978102192e-05, "loss": 0.6269, "step": 19572 }, { "epoch": 0.5714577676564188, "grad_norm": 0.529784692139986, "learning_rate": 2.380859691808597e-05, "loss": 0.6394, "step": 19573 }, { "epoch": 0.5714869638842661, "grad_norm": 0.5100607996595874, "learning_rate": 2.380697485806975e-05, "loss": 0.5955, "step": 19574 }, { "epoch": 0.5715161601121135, "grad_norm": 0.496416151114559, "learning_rate": 2.3805352798053527e-05, "loss": 0.5277, "step": 19575 }, { "epoch": 0.5715453563399608, "grad_norm": 0.6147780627668956, "learning_rate": 2.380373073803731e-05, "loss": 0.6386, "step": 19576 }, { "epoch": 0.5715745525678082, "grad_norm": 0.5515227159760093, "learning_rate": 2.3802108678021088e-05, "loss": 0.7171, "step": 19577 }, { "epoch": 0.5716037487956556, "grad_norm": 0.5082245426941239, "learning_rate": 2.3800486618004866e-05, "loss": 0.5991, "step": 19578 }, { "epoch": 0.5716329450235029, "grad_norm": 0.5395715118792014, "learning_rate": 2.3798864557988644e-05, "loss": 0.6181, "step": 19579 }, { "epoch": 0.5716621412513503, "grad_norm": 0.4998722704929524, "learning_rate": 2.3797242497972426e-05, "loss": 0.5784, "step": 19580 }, { "epoch": 0.5716913374791976, "grad_norm": 0.5247658225309837, "learning_rate": 2.3795620437956208e-05, "loss": 0.5977, "step": 19581 }, { "epoch": 0.571720533707045, "grad_norm": 0.5188443996818434, "learning_rate": 2.3793998377939986e-05, "loss": 0.5686, "step": 19582 }, { "epoch": 0.5717497299348924, "grad_norm": 0.5406462553480215, "learning_rate": 2.3792376317923765e-05, "loss": 0.6383, "step": 19583 }, { "epoch": 0.5717789261627397, "grad_norm": 0.5091096866248039, "learning_rate": 2.3790754257907543e-05, "loss": 0.5876, "step": 19584 }, { "epoch": 0.5718081223905871, "grad_norm": 0.5114364989988506, "learning_rate": 2.378913219789132e-05, "loss": 0.565, "step": 19585 }, { "epoch": 0.5718373186184345, "grad_norm": 0.5464088602600415, "learning_rate": 2.3787510137875103e-05, "loss": 0.6936, "step": 19586 }, { "epoch": 0.5718665148462818, "grad_norm": 0.536648828320134, "learning_rate": 2.378588807785888e-05, "loss": 0.6266, "step": 19587 }, { "epoch": 0.5718957110741292, "grad_norm": 0.5076723889984691, "learning_rate": 2.378426601784266e-05, "loss": 0.5977, "step": 19588 }, { "epoch": 0.5719249073019765, "grad_norm": 0.516688998934216, "learning_rate": 2.378264395782644e-05, "loss": 0.604, "step": 19589 }, { "epoch": 0.5719541035298239, "grad_norm": 0.5207279641637037, "learning_rate": 2.378102189781022e-05, "loss": 0.6258, "step": 19590 }, { "epoch": 0.5719832997576713, "grad_norm": 0.791530994108332, "learning_rate": 2.3779399837794002e-05, "loss": 0.7208, "step": 19591 }, { "epoch": 0.5720124959855186, "grad_norm": 0.5506758453784288, "learning_rate": 2.377777777777778e-05, "loss": 0.6711, "step": 19592 }, { "epoch": 0.572041692213366, "grad_norm": 0.5563152005195554, "learning_rate": 2.377615571776156e-05, "loss": 0.6889, "step": 19593 }, { "epoch": 0.5720708884412133, "grad_norm": 0.5372918241943633, "learning_rate": 2.3774533657745337e-05, "loss": 0.6586, "step": 19594 }, { "epoch": 0.5721000846690607, "grad_norm": 0.4965267789285015, "learning_rate": 2.3772911597729115e-05, "loss": 0.5778, "step": 19595 }, { "epoch": 0.5721292808969081, "grad_norm": 0.5318404781132933, "learning_rate": 2.3771289537712897e-05, "loss": 0.6412, "step": 19596 }, { "epoch": 0.5721584771247554, "grad_norm": 0.498348904768195, "learning_rate": 2.3769667477696675e-05, "loss": 0.5736, "step": 19597 }, { "epoch": 0.5721876733526029, "grad_norm": 0.5195743816812832, "learning_rate": 2.3768045417680454e-05, "loss": 0.6054, "step": 19598 }, { "epoch": 0.5722168695804503, "grad_norm": 0.49167038272905633, "learning_rate": 2.3766423357664236e-05, "loss": 0.5438, "step": 19599 }, { "epoch": 0.5722460658082976, "grad_norm": 0.5303362744093605, "learning_rate": 2.3764801297648014e-05, "loss": 0.6587, "step": 19600 }, { "epoch": 0.572275262036145, "grad_norm": 0.5018732047609379, "learning_rate": 2.3763179237631796e-05, "loss": 0.5795, "step": 19601 }, { "epoch": 0.5723044582639923, "grad_norm": 0.5187339989600537, "learning_rate": 2.3761557177615574e-05, "loss": 0.5751, "step": 19602 }, { "epoch": 0.5723336544918397, "grad_norm": 0.50205313880996, "learning_rate": 2.3759935117599352e-05, "loss": 0.5768, "step": 19603 }, { "epoch": 0.5723628507196871, "grad_norm": 0.532818114997701, "learning_rate": 2.375831305758313e-05, "loss": 0.6069, "step": 19604 }, { "epoch": 0.5723920469475344, "grad_norm": 0.5592349314727453, "learning_rate": 2.375669099756691e-05, "loss": 0.7026, "step": 19605 }, { "epoch": 0.5724212431753818, "grad_norm": 0.5145135062953038, "learning_rate": 2.375506893755069e-05, "loss": 0.6101, "step": 19606 }, { "epoch": 0.5724504394032291, "grad_norm": 0.5203843666357892, "learning_rate": 2.375344687753447e-05, "loss": 0.5833, "step": 19607 }, { "epoch": 0.5724796356310765, "grad_norm": 0.5027616344036966, "learning_rate": 2.375182481751825e-05, "loss": 0.5992, "step": 19608 }, { "epoch": 0.5725088318589239, "grad_norm": 0.5332889173860117, "learning_rate": 2.375020275750203e-05, "loss": 0.6233, "step": 19609 }, { "epoch": 0.5725380280867712, "grad_norm": 0.5692926960919514, "learning_rate": 2.3748580697485808e-05, "loss": 0.652, "step": 19610 }, { "epoch": 0.5725672243146186, "grad_norm": 0.5015407388995087, "learning_rate": 2.374695863746959e-05, "loss": 0.5969, "step": 19611 }, { "epoch": 0.572596420542466, "grad_norm": 0.497859211356647, "learning_rate": 2.3745336577453368e-05, "loss": 0.5966, "step": 19612 }, { "epoch": 0.5726256167703133, "grad_norm": 0.5112919326627491, "learning_rate": 2.3743714517437146e-05, "loss": 0.598, "step": 19613 }, { "epoch": 0.5726548129981607, "grad_norm": 0.5073827015459189, "learning_rate": 2.3742092457420925e-05, "loss": 0.6041, "step": 19614 }, { "epoch": 0.572684009226008, "grad_norm": 0.5673472794506799, "learning_rate": 2.3740470397404703e-05, "loss": 0.6548, "step": 19615 }, { "epoch": 0.5727132054538554, "grad_norm": 0.5130839009832736, "learning_rate": 2.3738848337388485e-05, "loss": 0.5824, "step": 19616 }, { "epoch": 0.5727424016817028, "grad_norm": 0.49584079586404484, "learning_rate": 2.3737226277372263e-05, "loss": 0.5627, "step": 19617 }, { "epoch": 0.5727715979095501, "grad_norm": 0.5438693917920655, "learning_rate": 2.3735604217356045e-05, "loss": 0.6444, "step": 19618 }, { "epoch": 0.5728007941373975, "grad_norm": 0.623687044237419, "learning_rate": 2.3733982157339823e-05, "loss": 0.6809, "step": 19619 }, { "epoch": 0.5728299903652448, "grad_norm": 0.5546902556102733, "learning_rate": 2.3732360097323602e-05, "loss": 0.6196, "step": 19620 }, { "epoch": 0.5728591865930922, "grad_norm": 0.557232370560609, "learning_rate": 2.373073803730738e-05, "loss": 0.6998, "step": 19621 }, { "epoch": 0.5728883828209396, "grad_norm": 0.5076782446375989, "learning_rate": 2.3729115977291162e-05, "loss": 0.5658, "step": 19622 }, { "epoch": 0.5729175790487869, "grad_norm": 0.5593055183010369, "learning_rate": 2.372749391727494e-05, "loss": 0.6166, "step": 19623 }, { "epoch": 0.5729467752766343, "grad_norm": 0.5447224662255296, "learning_rate": 2.372587185725872e-05, "loss": 0.6823, "step": 19624 }, { "epoch": 0.5729759715044817, "grad_norm": 0.5132855581382997, "learning_rate": 2.3724249797242497e-05, "loss": 0.6064, "step": 19625 }, { "epoch": 0.573005167732329, "grad_norm": 0.5023062326144142, "learning_rate": 2.372262773722628e-05, "loss": 0.5799, "step": 19626 }, { "epoch": 0.5730343639601764, "grad_norm": 0.50619821803788, "learning_rate": 2.372100567721006e-05, "loss": 0.5537, "step": 19627 }, { "epoch": 0.5730635601880237, "grad_norm": 0.5288394144179013, "learning_rate": 2.371938361719384e-05, "loss": 0.6311, "step": 19628 }, { "epoch": 0.5730927564158711, "grad_norm": 0.4980040133153184, "learning_rate": 2.3717761557177617e-05, "loss": 0.5757, "step": 19629 }, { "epoch": 0.5731219526437185, "grad_norm": 0.5315901887763895, "learning_rate": 2.3716139497161396e-05, "loss": 0.6499, "step": 19630 }, { "epoch": 0.5731511488715658, "grad_norm": 0.5185429620892328, "learning_rate": 2.3714517437145174e-05, "loss": 0.608, "step": 19631 }, { "epoch": 0.5731803450994132, "grad_norm": 0.5104427248047607, "learning_rate": 2.3712895377128956e-05, "loss": 0.5785, "step": 19632 }, { "epoch": 0.5732095413272605, "grad_norm": 0.5317490760034533, "learning_rate": 2.3711273317112734e-05, "loss": 0.6476, "step": 19633 }, { "epoch": 0.5732387375551079, "grad_norm": 0.5302925439580382, "learning_rate": 2.3709651257096512e-05, "loss": 0.6693, "step": 19634 }, { "epoch": 0.5732679337829553, "grad_norm": 0.4923947059036952, "learning_rate": 2.370802919708029e-05, "loss": 0.5376, "step": 19635 }, { "epoch": 0.5732971300108026, "grad_norm": 0.5130904872573169, "learning_rate": 2.3706407137064073e-05, "loss": 0.5898, "step": 19636 }, { "epoch": 0.57332632623865, "grad_norm": 0.5545738815827231, "learning_rate": 2.3704785077047854e-05, "loss": 0.5975, "step": 19637 }, { "epoch": 0.5733555224664973, "grad_norm": 0.4833891478119973, "learning_rate": 2.3703163017031633e-05, "loss": 0.5707, "step": 19638 }, { "epoch": 0.5733847186943447, "grad_norm": 0.49382485338665233, "learning_rate": 2.370154095701541e-05, "loss": 0.558, "step": 19639 }, { "epoch": 0.5734139149221921, "grad_norm": 0.5104822126676939, "learning_rate": 2.369991889699919e-05, "loss": 0.6089, "step": 19640 }, { "epoch": 0.5734431111500394, "grad_norm": 0.6120323034898615, "learning_rate": 2.3698296836982968e-05, "loss": 0.7948, "step": 19641 }, { "epoch": 0.5734723073778868, "grad_norm": 0.5342972565190175, "learning_rate": 2.369667477696675e-05, "loss": 0.6651, "step": 19642 }, { "epoch": 0.5735015036057342, "grad_norm": 0.5677536797347095, "learning_rate": 2.3695052716950528e-05, "loss": 0.7013, "step": 19643 }, { "epoch": 0.5735306998335815, "grad_norm": 0.49521283302206315, "learning_rate": 2.3693430656934306e-05, "loss": 0.5539, "step": 19644 }, { "epoch": 0.5735598960614289, "grad_norm": 0.5113954265365799, "learning_rate": 2.3691808596918085e-05, "loss": 0.5759, "step": 19645 }, { "epoch": 0.5735890922892762, "grad_norm": 0.5403590223106751, "learning_rate": 2.3690186536901867e-05, "loss": 0.6661, "step": 19646 }, { "epoch": 0.5736182885171236, "grad_norm": 0.5193337527732771, "learning_rate": 2.3688564476885648e-05, "loss": 0.6033, "step": 19647 }, { "epoch": 0.573647484744971, "grad_norm": 0.5216492072893882, "learning_rate": 2.3686942416869427e-05, "loss": 0.6021, "step": 19648 }, { "epoch": 0.5736766809728183, "grad_norm": 0.5010024630035832, "learning_rate": 2.3685320356853205e-05, "loss": 0.5894, "step": 19649 }, { "epoch": 0.5737058772006657, "grad_norm": 0.5183370533358109, "learning_rate": 2.3683698296836983e-05, "loss": 0.5875, "step": 19650 }, { "epoch": 0.573735073428513, "grad_norm": 0.5046369952866537, "learning_rate": 2.3682076236820762e-05, "loss": 0.5961, "step": 19651 }, { "epoch": 0.5737642696563604, "grad_norm": 0.5091541053999823, "learning_rate": 2.3680454176804544e-05, "loss": 0.5822, "step": 19652 }, { "epoch": 0.5737934658842078, "grad_norm": 0.5044348719463928, "learning_rate": 2.3678832116788322e-05, "loss": 0.5431, "step": 19653 }, { "epoch": 0.5738226621120551, "grad_norm": 0.524540159226599, "learning_rate": 2.36772100567721e-05, "loss": 0.5518, "step": 19654 }, { "epoch": 0.5738518583399025, "grad_norm": 0.49753406008210926, "learning_rate": 2.3675587996755882e-05, "loss": 0.592, "step": 19655 }, { "epoch": 0.5738810545677498, "grad_norm": 0.5023886124143819, "learning_rate": 2.367396593673966e-05, "loss": 0.5846, "step": 19656 }, { "epoch": 0.5739102507955972, "grad_norm": 0.517732539297888, "learning_rate": 2.3672343876723442e-05, "loss": 0.6159, "step": 19657 }, { "epoch": 0.5739394470234446, "grad_norm": 0.4995194776816295, "learning_rate": 2.367072181670722e-05, "loss": 0.5746, "step": 19658 }, { "epoch": 0.5739686432512919, "grad_norm": 0.49800930019636186, "learning_rate": 2.3669099756691e-05, "loss": 0.4988, "step": 19659 }, { "epoch": 0.5739978394791393, "grad_norm": 0.5266629519340966, "learning_rate": 2.3667477696674777e-05, "loss": 0.6185, "step": 19660 }, { "epoch": 0.5740270357069867, "grad_norm": 0.5507123370252851, "learning_rate": 2.3665855636658556e-05, "loss": 0.6828, "step": 19661 }, { "epoch": 0.574056231934834, "grad_norm": 0.554013177897434, "learning_rate": 2.3664233576642337e-05, "loss": 0.6833, "step": 19662 }, { "epoch": 0.5740854281626814, "grad_norm": 0.5246933260847638, "learning_rate": 2.3662611516626116e-05, "loss": 0.5969, "step": 19663 }, { "epoch": 0.5741146243905287, "grad_norm": 0.48908559317052624, "learning_rate": 2.3660989456609894e-05, "loss": 0.5604, "step": 19664 }, { "epoch": 0.5741438206183761, "grad_norm": 0.5222013177037779, "learning_rate": 2.3659367396593676e-05, "loss": 0.5994, "step": 19665 }, { "epoch": 0.5741730168462235, "grad_norm": 0.4683552240899406, "learning_rate": 2.3657745336577454e-05, "loss": 0.5217, "step": 19666 }, { "epoch": 0.5742022130740708, "grad_norm": 0.5614015229539363, "learning_rate": 2.3656123276561236e-05, "loss": 0.6318, "step": 19667 }, { "epoch": 0.5742314093019182, "grad_norm": 0.5608409735395413, "learning_rate": 2.3654501216545014e-05, "loss": 0.7083, "step": 19668 }, { "epoch": 0.5742606055297655, "grad_norm": 0.5232245290531093, "learning_rate": 2.3652879156528793e-05, "loss": 0.5962, "step": 19669 }, { "epoch": 0.5742898017576129, "grad_norm": 0.5399109383656203, "learning_rate": 2.365125709651257e-05, "loss": 0.6228, "step": 19670 }, { "epoch": 0.5743189979854603, "grad_norm": 0.5625527743500308, "learning_rate": 2.364963503649635e-05, "loss": 0.6817, "step": 19671 }, { "epoch": 0.5743481942133076, "grad_norm": 0.5211554199510324, "learning_rate": 2.364801297648013e-05, "loss": 0.5824, "step": 19672 }, { "epoch": 0.574377390441155, "grad_norm": 0.5413860753793053, "learning_rate": 2.364639091646391e-05, "loss": 0.6304, "step": 19673 }, { "epoch": 0.5744065866690024, "grad_norm": 0.5269519777294726, "learning_rate": 2.364476885644769e-05, "loss": 0.5991, "step": 19674 }, { "epoch": 0.5744357828968497, "grad_norm": 0.46852102600205997, "learning_rate": 2.364314679643147e-05, "loss": 0.4756, "step": 19675 }, { "epoch": 0.5744649791246971, "grad_norm": 0.5019853053382364, "learning_rate": 2.3641524736415248e-05, "loss": 0.557, "step": 19676 }, { "epoch": 0.5744941753525444, "grad_norm": 0.548531496363154, "learning_rate": 2.363990267639903e-05, "loss": 0.6244, "step": 19677 }, { "epoch": 0.5745233715803918, "grad_norm": 0.5007910187245359, "learning_rate": 2.363828061638281e-05, "loss": 0.5597, "step": 19678 }, { "epoch": 0.5745525678082392, "grad_norm": 0.549713982696901, "learning_rate": 2.3636658556366587e-05, "loss": 0.6503, "step": 19679 }, { "epoch": 0.5745817640360865, "grad_norm": 0.5256422785110381, "learning_rate": 2.3635036496350365e-05, "loss": 0.6277, "step": 19680 }, { "epoch": 0.5746109602639339, "grad_norm": 0.5074273299337163, "learning_rate": 2.3633414436334144e-05, "loss": 0.5864, "step": 19681 }, { "epoch": 0.5746401564917812, "grad_norm": 0.4832729023147256, "learning_rate": 2.3631792376317925e-05, "loss": 0.5373, "step": 19682 }, { "epoch": 0.5746693527196286, "grad_norm": 0.5260278237084839, "learning_rate": 2.3630170316301704e-05, "loss": 0.5919, "step": 19683 }, { "epoch": 0.574698548947476, "grad_norm": 0.5379082763896429, "learning_rate": 2.3628548256285485e-05, "loss": 0.6601, "step": 19684 }, { "epoch": 0.5747277451753233, "grad_norm": 0.5460348851745043, "learning_rate": 2.3626926196269264e-05, "loss": 0.6415, "step": 19685 }, { "epoch": 0.5747569414031707, "grad_norm": 0.518902240969059, "learning_rate": 2.3625304136253042e-05, "loss": 0.5988, "step": 19686 }, { "epoch": 0.574786137631018, "grad_norm": 0.49231399956687155, "learning_rate": 2.362368207623682e-05, "loss": 0.5547, "step": 19687 }, { "epoch": 0.5748153338588654, "grad_norm": 0.5380756607253816, "learning_rate": 2.3622060016220602e-05, "loss": 0.657, "step": 19688 }, { "epoch": 0.5748445300867128, "grad_norm": 0.48196122295105603, "learning_rate": 2.362043795620438e-05, "loss": 0.5323, "step": 19689 }, { "epoch": 0.5748737263145601, "grad_norm": 0.49762907070900503, "learning_rate": 2.361881589618816e-05, "loss": 0.5818, "step": 19690 }, { "epoch": 0.5749029225424075, "grad_norm": 0.5465804995177668, "learning_rate": 2.3617193836171937e-05, "loss": 0.6528, "step": 19691 }, { "epoch": 0.5749321187702549, "grad_norm": 0.5154496011453094, "learning_rate": 2.361557177615572e-05, "loss": 0.6058, "step": 19692 }, { "epoch": 0.5749613149981022, "grad_norm": 0.5357046370391615, "learning_rate": 2.36139497161395e-05, "loss": 0.6553, "step": 19693 }, { "epoch": 0.5749905112259496, "grad_norm": 0.5493733418276637, "learning_rate": 2.361232765612328e-05, "loss": 0.62, "step": 19694 }, { "epoch": 0.5750197074537969, "grad_norm": 0.5240799957351063, "learning_rate": 2.3610705596107058e-05, "loss": 0.6201, "step": 19695 }, { "epoch": 0.5750489036816443, "grad_norm": 0.5623026937102351, "learning_rate": 2.3609083536090836e-05, "loss": 0.6728, "step": 19696 }, { "epoch": 0.5750780999094917, "grad_norm": 0.49758214115933413, "learning_rate": 2.3607461476074614e-05, "loss": 0.5717, "step": 19697 }, { "epoch": 0.575107296137339, "grad_norm": 0.5016721096768377, "learning_rate": 2.3605839416058396e-05, "loss": 0.6014, "step": 19698 }, { "epoch": 0.5751364923651864, "grad_norm": 0.5095212220488946, "learning_rate": 2.3604217356042175e-05, "loss": 0.5822, "step": 19699 }, { "epoch": 0.5751656885930337, "grad_norm": 0.5291987232027292, "learning_rate": 2.3602595296025953e-05, "loss": 0.6216, "step": 19700 }, { "epoch": 0.5751948848208811, "grad_norm": 0.5059388316143764, "learning_rate": 2.360097323600973e-05, "loss": 0.5713, "step": 19701 }, { "epoch": 0.5752240810487285, "grad_norm": 0.4727689314383087, "learning_rate": 2.3599351175993513e-05, "loss": 0.4946, "step": 19702 }, { "epoch": 0.5752532772765758, "grad_norm": 0.4751796360704407, "learning_rate": 2.3597729115977295e-05, "loss": 0.5431, "step": 19703 }, { "epoch": 0.5752824735044232, "grad_norm": 0.5561643310688358, "learning_rate": 2.3596107055961073e-05, "loss": 0.7123, "step": 19704 }, { "epoch": 0.5753116697322705, "grad_norm": 0.5639174683614887, "learning_rate": 2.359448499594485e-05, "loss": 0.6489, "step": 19705 }, { "epoch": 0.5753408659601179, "grad_norm": 0.5044797072998737, "learning_rate": 2.359286293592863e-05, "loss": 0.5738, "step": 19706 }, { "epoch": 0.5753700621879653, "grad_norm": 0.5369708095275251, "learning_rate": 2.359124087591241e-05, "loss": 0.6575, "step": 19707 }, { "epoch": 0.5753992584158126, "grad_norm": 0.5106424362111927, "learning_rate": 2.358961881589619e-05, "loss": 0.6106, "step": 19708 }, { "epoch": 0.57542845464366, "grad_norm": 0.6418094970329811, "learning_rate": 2.358799675587997e-05, "loss": 0.7529, "step": 19709 }, { "epoch": 0.5754576508715074, "grad_norm": 0.6083377120663019, "learning_rate": 2.3586374695863747e-05, "loss": 0.6562, "step": 19710 }, { "epoch": 0.5754868470993547, "grad_norm": 0.5455653677160547, "learning_rate": 2.3584752635847525e-05, "loss": 0.655, "step": 19711 }, { "epoch": 0.5755160433272021, "grad_norm": 0.5169746447786845, "learning_rate": 2.3583130575831307e-05, "loss": 0.6072, "step": 19712 }, { "epoch": 0.5755452395550494, "grad_norm": 0.502783185465725, "learning_rate": 2.358150851581509e-05, "loss": 0.5925, "step": 19713 }, { "epoch": 0.5755744357828968, "grad_norm": 0.5371646703083738, "learning_rate": 2.3579886455798867e-05, "loss": 0.6392, "step": 19714 }, { "epoch": 0.5756036320107442, "grad_norm": 0.5183532606503652, "learning_rate": 2.3578264395782645e-05, "loss": 0.6291, "step": 19715 }, { "epoch": 0.5756328282385915, "grad_norm": 0.5111547263329628, "learning_rate": 2.3576642335766424e-05, "loss": 0.5964, "step": 19716 }, { "epoch": 0.5756620244664389, "grad_norm": 0.5201299714116614, "learning_rate": 2.3575020275750202e-05, "loss": 0.6175, "step": 19717 }, { "epoch": 0.5756912206942862, "grad_norm": 0.4862823223258976, "learning_rate": 2.3573398215733984e-05, "loss": 0.5676, "step": 19718 }, { "epoch": 0.5757204169221337, "grad_norm": 0.5372496651975193, "learning_rate": 2.3571776155717762e-05, "loss": 0.63, "step": 19719 }, { "epoch": 0.5757496131499811, "grad_norm": 0.5238931349751761, "learning_rate": 2.357015409570154e-05, "loss": 0.6348, "step": 19720 }, { "epoch": 0.5757788093778284, "grad_norm": 0.4724430450023737, "learning_rate": 2.3568532035685323e-05, "loss": 0.5337, "step": 19721 }, { "epoch": 0.5758080056056758, "grad_norm": 0.5325945902992977, "learning_rate": 2.35669099756691e-05, "loss": 0.6375, "step": 19722 }, { "epoch": 0.5758372018335232, "grad_norm": 0.5403758872534765, "learning_rate": 2.3565287915652883e-05, "loss": 0.6283, "step": 19723 }, { "epoch": 0.5758663980613705, "grad_norm": 0.4846336115466849, "learning_rate": 2.356366585563666e-05, "loss": 0.5134, "step": 19724 }, { "epoch": 0.5758955942892179, "grad_norm": 0.49660812380088404, "learning_rate": 2.356204379562044e-05, "loss": 0.5857, "step": 19725 }, { "epoch": 0.5759247905170652, "grad_norm": 0.4709002355217424, "learning_rate": 2.3560421735604218e-05, "loss": 0.5085, "step": 19726 }, { "epoch": 0.5759539867449126, "grad_norm": 0.522692867636531, "learning_rate": 2.3558799675587996e-05, "loss": 0.5994, "step": 19727 }, { "epoch": 0.57598318297276, "grad_norm": 0.49050396868144647, "learning_rate": 2.3557177615571778e-05, "loss": 0.53, "step": 19728 }, { "epoch": 0.5760123792006073, "grad_norm": 0.5133066759037317, "learning_rate": 2.3555555555555556e-05, "loss": 0.5804, "step": 19729 }, { "epoch": 0.5760415754284547, "grad_norm": 0.517316267390535, "learning_rate": 2.3553933495539335e-05, "loss": 0.5341, "step": 19730 }, { "epoch": 0.576070771656302, "grad_norm": 0.4754415127007333, "learning_rate": 2.3552311435523116e-05, "loss": 0.5481, "step": 19731 }, { "epoch": 0.5760999678841494, "grad_norm": 0.4731302311998863, "learning_rate": 2.3550689375506895e-05, "loss": 0.524, "step": 19732 }, { "epoch": 0.5761291641119968, "grad_norm": 0.49387475668283487, "learning_rate": 2.3549067315490677e-05, "loss": 0.56, "step": 19733 }, { "epoch": 0.5761583603398441, "grad_norm": 0.5375930585512376, "learning_rate": 2.3547445255474455e-05, "loss": 0.5923, "step": 19734 }, { "epoch": 0.5761875565676915, "grad_norm": 0.5400041366019908, "learning_rate": 2.3545823195458233e-05, "loss": 0.6641, "step": 19735 }, { "epoch": 0.5762167527955389, "grad_norm": 0.49186899491657676, "learning_rate": 2.354420113544201e-05, "loss": 0.5856, "step": 19736 }, { "epoch": 0.5762459490233862, "grad_norm": 0.532536320683465, "learning_rate": 2.354257907542579e-05, "loss": 0.6075, "step": 19737 }, { "epoch": 0.5762751452512336, "grad_norm": 0.48186378768409244, "learning_rate": 2.3540957015409572e-05, "loss": 0.5241, "step": 19738 }, { "epoch": 0.5763043414790809, "grad_norm": 0.5526696780016213, "learning_rate": 2.353933495539335e-05, "loss": 0.6441, "step": 19739 }, { "epoch": 0.5763335377069283, "grad_norm": 0.5127529722917811, "learning_rate": 2.3537712895377132e-05, "loss": 0.5663, "step": 19740 }, { "epoch": 0.5763627339347757, "grad_norm": 0.5304961304272512, "learning_rate": 2.353609083536091e-05, "loss": 0.5923, "step": 19741 }, { "epoch": 0.576391930162623, "grad_norm": 0.5118228946933715, "learning_rate": 2.353446877534469e-05, "loss": 0.6011, "step": 19742 }, { "epoch": 0.5764211263904704, "grad_norm": 0.5081965778612039, "learning_rate": 2.3532846715328467e-05, "loss": 0.5535, "step": 19743 }, { "epoch": 0.5764503226183177, "grad_norm": 0.531321418786597, "learning_rate": 2.353122465531225e-05, "loss": 0.6128, "step": 19744 }, { "epoch": 0.5764795188461651, "grad_norm": 0.5302178673671905, "learning_rate": 2.3529602595296027e-05, "loss": 0.6438, "step": 19745 }, { "epoch": 0.5765087150740125, "grad_norm": 0.5277650698468299, "learning_rate": 2.3527980535279806e-05, "loss": 0.6728, "step": 19746 }, { "epoch": 0.5765379113018598, "grad_norm": 0.5429230895653493, "learning_rate": 2.3526358475263584e-05, "loss": 0.5624, "step": 19747 }, { "epoch": 0.5765671075297072, "grad_norm": 0.5164291467315291, "learning_rate": 2.3524736415247366e-05, "loss": 0.63, "step": 19748 }, { "epoch": 0.5765963037575546, "grad_norm": 0.4979720108099702, "learning_rate": 2.3523114355231144e-05, "loss": 0.5243, "step": 19749 }, { "epoch": 0.5766254999854019, "grad_norm": 0.5929631762099522, "learning_rate": 2.3521492295214926e-05, "loss": 0.6659, "step": 19750 }, { "epoch": 0.5766546962132493, "grad_norm": 0.5054623304578685, "learning_rate": 2.3519870235198704e-05, "loss": 0.5848, "step": 19751 }, { "epoch": 0.5766838924410966, "grad_norm": 0.48596732932488, "learning_rate": 2.3518248175182483e-05, "loss": 0.5514, "step": 19752 }, { "epoch": 0.576713088668944, "grad_norm": 0.5006387479227349, "learning_rate": 2.351662611516626e-05, "loss": 0.5818, "step": 19753 }, { "epoch": 0.5767422848967914, "grad_norm": 0.47608290761519123, "learning_rate": 2.3515004055150043e-05, "loss": 0.5089, "step": 19754 }, { "epoch": 0.5767714811246387, "grad_norm": 0.5752551998438693, "learning_rate": 2.351338199513382e-05, "loss": 0.7252, "step": 19755 }, { "epoch": 0.5768006773524861, "grad_norm": 0.5033953541489643, "learning_rate": 2.35117599351176e-05, "loss": 0.5771, "step": 19756 }, { "epoch": 0.5768298735803334, "grad_norm": 0.5258575919001063, "learning_rate": 2.3510137875101378e-05, "loss": 0.6295, "step": 19757 }, { "epoch": 0.5768590698081808, "grad_norm": 0.46905907840683625, "learning_rate": 2.350851581508516e-05, "loss": 0.4951, "step": 19758 }, { "epoch": 0.5768882660360282, "grad_norm": 0.4959057248962449, "learning_rate": 2.350689375506894e-05, "loss": 0.5381, "step": 19759 }, { "epoch": 0.5769174622638755, "grad_norm": 0.46202812298093904, "learning_rate": 2.350527169505272e-05, "loss": 0.4761, "step": 19760 }, { "epoch": 0.5769466584917229, "grad_norm": 0.5303508860918166, "learning_rate": 2.3503649635036498e-05, "loss": 0.6201, "step": 19761 }, { "epoch": 0.5769758547195702, "grad_norm": 0.5415008742597772, "learning_rate": 2.3502027575020276e-05, "loss": 0.6697, "step": 19762 }, { "epoch": 0.5770050509474176, "grad_norm": 0.4960481725762193, "learning_rate": 2.3500405515004055e-05, "loss": 0.5281, "step": 19763 }, { "epoch": 0.577034247175265, "grad_norm": 0.5218329866667718, "learning_rate": 2.3498783454987837e-05, "loss": 0.5953, "step": 19764 }, { "epoch": 0.5770634434031123, "grad_norm": 0.5531954534035735, "learning_rate": 2.3497161394971615e-05, "loss": 0.6722, "step": 19765 }, { "epoch": 0.5770926396309597, "grad_norm": 0.5099447263154131, "learning_rate": 2.3495539334955393e-05, "loss": 0.5909, "step": 19766 }, { "epoch": 0.577121835858807, "grad_norm": 0.506351400970484, "learning_rate": 2.3493917274939172e-05, "loss": 0.6006, "step": 19767 }, { "epoch": 0.5771510320866544, "grad_norm": 0.5333070528585762, "learning_rate": 2.3492295214922954e-05, "loss": 0.6949, "step": 19768 }, { "epoch": 0.5771802283145018, "grad_norm": 0.5043022306682777, "learning_rate": 2.3490673154906735e-05, "loss": 0.5504, "step": 19769 }, { "epoch": 0.5772094245423491, "grad_norm": 0.5088591946622725, "learning_rate": 2.3489051094890514e-05, "loss": 0.5398, "step": 19770 }, { "epoch": 0.5772386207701965, "grad_norm": 0.5228109644951672, "learning_rate": 2.3487429034874292e-05, "loss": 0.6099, "step": 19771 }, { "epoch": 0.5772678169980439, "grad_norm": 0.5978164420247837, "learning_rate": 2.348580697485807e-05, "loss": 0.627, "step": 19772 }, { "epoch": 0.5772970132258912, "grad_norm": 0.5054348759819366, "learning_rate": 2.348418491484185e-05, "loss": 0.5734, "step": 19773 }, { "epoch": 0.5773262094537386, "grad_norm": 0.5200813125375561, "learning_rate": 2.348256285482563e-05, "loss": 0.5907, "step": 19774 }, { "epoch": 0.5773554056815859, "grad_norm": 0.45979912885841034, "learning_rate": 2.348094079480941e-05, "loss": 0.4576, "step": 19775 }, { "epoch": 0.5773846019094333, "grad_norm": 0.5110606911711814, "learning_rate": 2.3479318734793187e-05, "loss": 0.576, "step": 19776 }, { "epoch": 0.5774137981372807, "grad_norm": 0.5096118250008649, "learning_rate": 2.3477696674776966e-05, "loss": 0.5286, "step": 19777 }, { "epoch": 0.577442994365128, "grad_norm": 0.5299228761031284, "learning_rate": 2.3476074614760747e-05, "loss": 0.6181, "step": 19778 }, { "epoch": 0.5774721905929754, "grad_norm": 0.5149010468496883, "learning_rate": 2.347445255474453e-05, "loss": 0.6102, "step": 19779 }, { "epoch": 0.5775013868208227, "grad_norm": 0.49706315166192994, "learning_rate": 2.3472830494728308e-05, "loss": 0.5869, "step": 19780 }, { "epoch": 0.5775305830486701, "grad_norm": 0.5163802235599566, "learning_rate": 2.3471208434712086e-05, "loss": 0.5697, "step": 19781 }, { "epoch": 0.5775597792765175, "grad_norm": 0.5638120000007452, "learning_rate": 2.3469586374695864e-05, "loss": 0.6569, "step": 19782 }, { "epoch": 0.5775889755043648, "grad_norm": 0.5291868601738888, "learning_rate": 2.3467964314679643e-05, "loss": 0.5942, "step": 19783 }, { "epoch": 0.5776181717322122, "grad_norm": 0.5673569648571494, "learning_rate": 2.3466342254663424e-05, "loss": 0.7076, "step": 19784 }, { "epoch": 0.5776473679600596, "grad_norm": 0.5229057267694667, "learning_rate": 2.3464720194647203e-05, "loss": 0.6154, "step": 19785 }, { "epoch": 0.5776765641879069, "grad_norm": 0.6095451675869108, "learning_rate": 2.346309813463098e-05, "loss": 0.5886, "step": 19786 }, { "epoch": 0.5777057604157543, "grad_norm": 0.5213036357289721, "learning_rate": 2.346147607461476e-05, "loss": 0.616, "step": 19787 }, { "epoch": 0.5777349566436016, "grad_norm": 0.5268726839221086, "learning_rate": 2.345985401459854e-05, "loss": 0.6015, "step": 19788 }, { "epoch": 0.577764152871449, "grad_norm": 0.5415513694017782, "learning_rate": 2.3458231954582323e-05, "loss": 0.6621, "step": 19789 }, { "epoch": 0.5777933490992964, "grad_norm": 0.5249816602318506, "learning_rate": 2.34566098945661e-05, "loss": 0.5821, "step": 19790 }, { "epoch": 0.5778225453271437, "grad_norm": 0.5023354330275565, "learning_rate": 2.345498783454988e-05, "loss": 0.6044, "step": 19791 }, { "epoch": 0.5778517415549911, "grad_norm": 0.5375337350738679, "learning_rate": 2.3453365774533658e-05, "loss": 0.6317, "step": 19792 }, { "epoch": 0.5778809377828384, "grad_norm": 0.5154482312765036, "learning_rate": 2.3451743714517437e-05, "loss": 0.6126, "step": 19793 }, { "epoch": 0.5779101340106858, "grad_norm": 0.5311490415986798, "learning_rate": 2.345012165450122e-05, "loss": 0.5902, "step": 19794 }, { "epoch": 0.5779393302385332, "grad_norm": 0.5388155847893378, "learning_rate": 2.3448499594484997e-05, "loss": 0.6529, "step": 19795 }, { "epoch": 0.5779685264663805, "grad_norm": 0.509782916833378, "learning_rate": 2.3446877534468775e-05, "loss": 0.5836, "step": 19796 }, { "epoch": 0.5779977226942279, "grad_norm": 0.5402679101285605, "learning_rate": 2.3445255474452557e-05, "loss": 0.6526, "step": 19797 }, { "epoch": 0.5780269189220753, "grad_norm": 0.5194231170294895, "learning_rate": 2.3443633414436335e-05, "loss": 0.6269, "step": 19798 }, { "epoch": 0.5780561151499226, "grad_norm": 0.5021577976477006, "learning_rate": 2.3442011354420117e-05, "loss": 0.5638, "step": 19799 }, { "epoch": 0.57808531137777, "grad_norm": 0.5309099845617805, "learning_rate": 2.3440389294403895e-05, "loss": 0.626, "step": 19800 }, { "epoch": 0.5781145076056173, "grad_norm": 0.5005780762503601, "learning_rate": 2.3438767234387674e-05, "loss": 0.5355, "step": 19801 }, { "epoch": 0.5781437038334647, "grad_norm": 0.5395759430027907, "learning_rate": 2.3437145174371452e-05, "loss": 0.6722, "step": 19802 }, { "epoch": 0.5781729000613121, "grad_norm": 0.5279833414918382, "learning_rate": 2.343552311435523e-05, "loss": 0.6193, "step": 19803 }, { "epoch": 0.5782020962891594, "grad_norm": 0.5495073869420168, "learning_rate": 2.3433901054339012e-05, "loss": 0.6396, "step": 19804 }, { "epoch": 0.5782312925170068, "grad_norm": 0.5315509063574269, "learning_rate": 2.343227899432279e-05, "loss": 0.5936, "step": 19805 }, { "epoch": 0.5782604887448541, "grad_norm": 0.525084222369439, "learning_rate": 2.3430656934306572e-05, "loss": 0.6018, "step": 19806 }, { "epoch": 0.5782896849727015, "grad_norm": 0.5262450077139842, "learning_rate": 2.342903487429035e-05, "loss": 0.5889, "step": 19807 }, { "epoch": 0.5783188812005489, "grad_norm": 0.49818237848588576, "learning_rate": 2.342741281427413e-05, "loss": 0.5758, "step": 19808 }, { "epoch": 0.5783480774283962, "grad_norm": 0.5524558875775802, "learning_rate": 2.3425790754257908e-05, "loss": 0.6012, "step": 19809 }, { "epoch": 0.5783772736562436, "grad_norm": 0.5076445527093459, "learning_rate": 2.342416869424169e-05, "loss": 0.5969, "step": 19810 }, { "epoch": 0.578406469884091, "grad_norm": 0.49310904608065975, "learning_rate": 2.3422546634225468e-05, "loss": 0.5661, "step": 19811 }, { "epoch": 0.5784356661119383, "grad_norm": 0.532729995988036, "learning_rate": 2.3420924574209246e-05, "loss": 0.623, "step": 19812 }, { "epoch": 0.5784648623397857, "grad_norm": 0.5154804147959416, "learning_rate": 2.3419302514193024e-05, "loss": 0.6047, "step": 19813 }, { "epoch": 0.578494058567633, "grad_norm": 0.49464323338460275, "learning_rate": 2.3417680454176806e-05, "loss": 0.5564, "step": 19814 }, { "epoch": 0.5785232547954804, "grad_norm": 0.5178014511241086, "learning_rate": 2.3416058394160585e-05, "loss": 0.5809, "step": 19815 }, { "epoch": 0.5785524510233278, "grad_norm": 0.5491452396568642, "learning_rate": 2.3414436334144366e-05, "loss": 0.6686, "step": 19816 }, { "epoch": 0.5785816472511751, "grad_norm": 0.516874375284032, "learning_rate": 2.3412814274128145e-05, "loss": 0.6247, "step": 19817 }, { "epoch": 0.5786108434790225, "grad_norm": 0.5932779449273612, "learning_rate": 2.3411192214111923e-05, "loss": 0.8023, "step": 19818 }, { "epoch": 0.5786400397068698, "grad_norm": 0.5209622177181716, "learning_rate": 2.34095701540957e-05, "loss": 0.6195, "step": 19819 }, { "epoch": 0.5786692359347172, "grad_norm": 0.4855408346793091, "learning_rate": 2.3407948094079483e-05, "loss": 0.5414, "step": 19820 }, { "epoch": 0.5786984321625646, "grad_norm": 0.5067673343849365, "learning_rate": 2.340632603406326e-05, "loss": 0.5571, "step": 19821 }, { "epoch": 0.5787276283904119, "grad_norm": 0.5120870077031348, "learning_rate": 2.340470397404704e-05, "loss": 0.628, "step": 19822 }, { "epoch": 0.5787568246182593, "grad_norm": 0.4845358234743527, "learning_rate": 2.3403081914030818e-05, "loss": 0.5095, "step": 19823 }, { "epoch": 0.5787860208461066, "grad_norm": 0.5244910898597537, "learning_rate": 2.34014598540146e-05, "loss": 0.6371, "step": 19824 }, { "epoch": 0.578815217073954, "grad_norm": 0.5257887447994255, "learning_rate": 2.3399837793998382e-05, "loss": 0.5435, "step": 19825 }, { "epoch": 0.5788444133018014, "grad_norm": 0.5093962276567761, "learning_rate": 2.339821573398216e-05, "loss": 0.5485, "step": 19826 }, { "epoch": 0.5788736095296487, "grad_norm": 0.5706506151746675, "learning_rate": 2.339659367396594e-05, "loss": 0.6669, "step": 19827 }, { "epoch": 0.5789028057574961, "grad_norm": 0.5140482544700181, "learning_rate": 2.3394971613949717e-05, "loss": 0.6093, "step": 19828 }, { "epoch": 0.5789320019853434, "grad_norm": 0.553660267100271, "learning_rate": 2.3393349553933495e-05, "loss": 0.6509, "step": 19829 }, { "epoch": 0.5789611982131908, "grad_norm": 0.4779880456715918, "learning_rate": 2.3391727493917277e-05, "loss": 0.5066, "step": 19830 }, { "epoch": 0.5789903944410382, "grad_norm": 0.5150832972013366, "learning_rate": 2.3390105433901055e-05, "loss": 0.5974, "step": 19831 }, { "epoch": 0.5790195906688855, "grad_norm": 0.5493449336177215, "learning_rate": 2.3388483373884834e-05, "loss": 0.6423, "step": 19832 }, { "epoch": 0.5790487868967329, "grad_norm": 0.5678914205476018, "learning_rate": 2.3386861313868612e-05, "loss": 0.6674, "step": 19833 }, { "epoch": 0.5790779831245803, "grad_norm": 0.5016184982687839, "learning_rate": 2.3385239253852394e-05, "loss": 0.5622, "step": 19834 }, { "epoch": 0.5791071793524276, "grad_norm": 0.5393589320453727, "learning_rate": 2.3383617193836176e-05, "loss": 0.66, "step": 19835 }, { "epoch": 0.579136375580275, "grad_norm": 0.5372679656592735, "learning_rate": 2.3381995133819954e-05, "loss": 0.6474, "step": 19836 }, { "epoch": 0.5791655718081223, "grad_norm": 0.5345885623115423, "learning_rate": 2.3380373073803732e-05, "loss": 0.6469, "step": 19837 }, { "epoch": 0.5791947680359697, "grad_norm": 0.5064719890440038, "learning_rate": 2.337875101378751e-05, "loss": 0.5505, "step": 19838 }, { "epoch": 0.5792239642638171, "grad_norm": 0.5513105495155639, "learning_rate": 2.337712895377129e-05, "loss": 0.6272, "step": 19839 }, { "epoch": 0.5792531604916645, "grad_norm": 0.5149844898073107, "learning_rate": 2.337550689375507e-05, "loss": 0.591, "step": 19840 }, { "epoch": 0.5792823567195119, "grad_norm": 0.5137989895859529, "learning_rate": 2.337388483373885e-05, "loss": 0.6243, "step": 19841 }, { "epoch": 0.5793115529473593, "grad_norm": 0.5210916877115636, "learning_rate": 2.3372262773722628e-05, "loss": 0.6262, "step": 19842 }, { "epoch": 0.5793407491752066, "grad_norm": 0.5531343486191876, "learning_rate": 2.3370640713706406e-05, "loss": 0.636, "step": 19843 }, { "epoch": 0.579369945403054, "grad_norm": 0.5496002214139043, "learning_rate": 2.3369018653690188e-05, "loss": 0.6423, "step": 19844 }, { "epoch": 0.5793991416309013, "grad_norm": 0.5417096871242943, "learning_rate": 2.336739659367397e-05, "loss": 0.6503, "step": 19845 }, { "epoch": 0.5794283378587487, "grad_norm": 0.556755134337384, "learning_rate": 2.3365774533657748e-05, "loss": 0.6944, "step": 19846 }, { "epoch": 0.5794575340865961, "grad_norm": 0.49326886005898374, "learning_rate": 2.3364152473641526e-05, "loss": 0.5595, "step": 19847 }, { "epoch": 0.5794867303144434, "grad_norm": 0.5383736740119415, "learning_rate": 2.3362530413625305e-05, "loss": 0.6287, "step": 19848 }, { "epoch": 0.5795159265422908, "grad_norm": 0.5359742401388492, "learning_rate": 2.3360908353609083e-05, "loss": 0.6006, "step": 19849 }, { "epoch": 0.5795451227701381, "grad_norm": 0.4975052210878264, "learning_rate": 2.3359286293592865e-05, "loss": 0.5352, "step": 19850 }, { "epoch": 0.5795743189979855, "grad_norm": 0.5022009545661182, "learning_rate": 2.3357664233576643e-05, "loss": 0.5702, "step": 19851 }, { "epoch": 0.5796035152258329, "grad_norm": 0.5482900821029142, "learning_rate": 2.335604217356042e-05, "loss": 0.6532, "step": 19852 }, { "epoch": 0.5796327114536802, "grad_norm": 0.4998253817050837, "learning_rate": 2.33544201135442e-05, "loss": 0.5926, "step": 19853 }, { "epoch": 0.5796619076815276, "grad_norm": 0.5121328226823847, "learning_rate": 2.3352798053527982e-05, "loss": 0.6111, "step": 19854 }, { "epoch": 0.579691103909375, "grad_norm": 0.5106333882304956, "learning_rate": 2.3351175993511764e-05, "loss": 0.5778, "step": 19855 }, { "epoch": 0.5797203001372223, "grad_norm": 0.5116725974841355, "learning_rate": 2.3349553933495542e-05, "loss": 0.6081, "step": 19856 }, { "epoch": 0.5797494963650697, "grad_norm": 0.4625957443188017, "learning_rate": 2.334793187347932e-05, "loss": 0.4826, "step": 19857 }, { "epoch": 0.579778692592917, "grad_norm": 0.4864438654170926, "learning_rate": 2.33463098134631e-05, "loss": 0.5256, "step": 19858 }, { "epoch": 0.5798078888207644, "grad_norm": 0.48413636212891886, "learning_rate": 2.3344687753446877e-05, "loss": 0.5445, "step": 19859 }, { "epoch": 0.5798370850486118, "grad_norm": 0.5362648321429704, "learning_rate": 2.334306569343066e-05, "loss": 0.6167, "step": 19860 }, { "epoch": 0.5798662812764591, "grad_norm": 0.4756314263381301, "learning_rate": 2.3341443633414437e-05, "loss": 0.4948, "step": 19861 }, { "epoch": 0.5798954775043065, "grad_norm": 0.5335632557554731, "learning_rate": 2.3339821573398216e-05, "loss": 0.6269, "step": 19862 }, { "epoch": 0.5799246737321538, "grad_norm": 0.5415248120830142, "learning_rate": 2.3338199513381997e-05, "loss": 0.6283, "step": 19863 }, { "epoch": 0.5799538699600012, "grad_norm": 0.5424167290022575, "learning_rate": 2.3336577453365776e-05, "loss": 0.6603, "step": 19864 }, { "epoch": 0.5799830661878486, "grad_norm": 0.5412625245456474, "learning_rate": 2.3334955393349554e-05, "loss": 0.6279, "step": 19865 }, { "epoch": 0.5800122624156959, "grad_norm": 0.5062889918344351, "learning_rate": 2.3333333333333336e-05, "loss": 0.554, "step": 19866 }, { "epoch": 0.5800414586435433, "grad_norm": 0.5019635121973309, "learning_rate": 2.3331711273317114e-05, "loss": 0.581, "step": 19867 }, { "epoch": 0.5800706548713906, "grad_norm": 0.5452363680530278, "learning_rate": 2.3330089213300893e-05, "loss": 0.6224, "step": 19868 }, { "epoch": 0.580099851099238, "grad_norm": 0.47484570584167257, "learning_rate": 2.332846715328467e-05, "loss": 0.5517, "step": 19869 }, { "epoch": 0.5801290473270854, "grad_norm": 0.5076358506301181, "learning_rate": 2.3326845093268453e-05, "loss": 0.561, "step": 19870 }, { "epoch": 0.5801582435549327, "grad_norm": 0.4815761566875874, "learning_rate": 2.332522303325223e-05, "loss": 0.5281, "step": 19871 }, { "epoch": 0.5801874397827801, "grad_norm": 0.5246096692842145, "learning_rate": 2.3323600973236013e-05, "loss": 0.6071, "step": 19872 }, { "epoch": 0.5802166360106275, "grad_norm": 0.518155358338544, "learning_rate": 2.332197891321979e-05, "loss": 0.5893, "step": 19873 }, { "epoch": 0.5802458322384748, "grad_norm": 0.5639786797175615, "learning_rate": 2.332035685320357e-05, "loss": 0.7273, "step": 19874 }, { "epoch": 0.5802750284663222, "grad_norm": 0.6162347976873778, "learning_rate": 2.3318734793187348e-05, "loss": 0.6586, "step": 19875 }, { "epoch": 0.5803042246941695, "grad_norm": 0.49036322938734783, "learning_rate": 2.331711273317113e-05, "loss": 0.5677, "step": 19876 }, { "epoch": 0.5803334209220169, "grad_norm": 0.5163150819828486, "learning_rate": 2.3315490673154908e-05, "loss": 0.5924, "step": 19877 }, { "epoch": 0.5803626171498643, "grad_norm": 0.5391493750818194, "learning_rate": 2.3313868613138686e-05, "loss": 0.6033, "step": 19878 }, { "epoch": 0.5803918133777116, "grad_norm": 0.49942172611231483, "learning_rate": 2.3312246553122465e-05, "loss": 0.5813, "step": 19879 }, { "epoch": 0.580421009605559, "grad_norm": 0.5221134444815544, "learning_rate": 2.3310624493106247e-05, "loss": 0.6143, "step": 19880 }, { "epoch": 0.5804502058334063, "grad_norm": 0.5310327912770626, "learning_rate": 2.3309002433090025e-05, "loss": 0.5859, "step": 19881 }, { "epoch": 0.5804794020612537, "grad_norm": 0.5016959213490523, "learning_rate": 2.3307380373073807e-05, "loss": 0.5794, "step": 19882 }, { "epoch": 0.5805085982891011, "grad_norm": 0.523219569957507, "learning_rate": 2.3305758313057585e-05, "loss": 0.6476, "step": 19883 }, { "epoch": 0.5805377945169484, "grad_norm": 0.4941386974063102, "learning_rate": 2.3304136253041363e-05, "loss": 0.535, "step": 19884 }, { "epoch": 0.5805669907447958, "grad_norm": 0.5045031641158563, "learning_rate": 2.3302514193025142e-05, "loss": 0.5646, "step": 19885 }, { "epoch": 0.5805961869726431, "grad_norm": 0.5282597653426306, "learning_rate": 2.3300892133008924e-05, "loss": 0.6066, "step": 19886 }, { "epoch": 0.5806253832004905, "grad_norm": 0.4656047317481967, "learning_rate": 2.3299270072992702e-05, "loss": 0.5351, "step": 19887 }, { "epoch": 0.5806545794283379, "grad_norm": 0.5382864783537863, "learning_rate": 2.329764801297648e-05, "loss": 0.5594, "step": 19888 }, { "epoch": 0.5806837756561852, "grad_norm": 0.5692721635302613, "learning_rate": 2.329602595296026e-05, "loss": 0.6857, "step": 19889 }, { "epoch": 0.5807129718840326, "grad_norm": 0.5052604456132952, "learning_rate": 2.329440389294404e-05, "loss": 0.6035, "step": 19890 }, { "epoch": 0.58074216811188, "grad_norm": 0.5170838499156147, "learning_rate": 2.3292781832927822e-05, "loss": 0.5655, "step": 19891 }, { "epoch": 0.5807713643397273, "grad_norm": 0.5485075389920345, "learning_rate": 2.32911597729116e-05, "loss": 0.6072, "step": 19892 }, { "epoch": 0.5808005605675747, "grad_norm": 0.5317546329223252, "learning_rate": 2.328953771289538e-05, "loss": 0.6526, "step": 19893 }, { "epoch": 0.580829756795422, "grad_norm": 0.48621710482556185, "learning_rate": 2.3287915652879157e-05, "loss": 0.5479, "step": 19894 }, { "epoch": 0.5808589530232694, "grad_norm": 0.49685512576081187, "learning_rate": 2.3286293592862936e-05, "loss": 0.5805, "step": 19895 }, { "epoch": 0.5808881492511168, "grad_norm": 0.519624408070083, "learning_rate": 2.3284671532846718e-05, "loss": 0.6298, "step": 19896 }, { "epoch": 0.5809173454789641, "grad_norm": 0.5577589499924663, "learning_rate": 2.3283049472830496e-05, "loss": 0.6434, "step": 19897 }, { "epoch": 0.5809465417068115, "grad_norm": 0.5192484601226393, "learning_rate": 2.3281427412814274e-05, "loss": 0.637, "step": 19898 }, { "epoch": 0.5809757379346588, "grad_norm": 0.49271455450372176, "learning_rate": 2.3279805352798053e-05, "loss": 0.5837, "step": 19899 }, { "epoch": 0.5810049341625062, "grad_norm": 0.5123529136446493, "learning_rate": 2.3278183292781834e-05, "loss": 0.6137, "step": 19900 }, { "epoch": 0.5810341303903536, "grad_norm": 0.5124455725011403, "learning_rate": 2.3276561232765616e-05, "loss": 0.6157, "step": 19901 }, { "epoch": 0.5810633266182009, "grad_norm": 0.5077793935545598, "learning_rate": 2.3274939172749395e-05, "loss": 0.6061, "step": 19902 }, { "epoch": 0.5810925228460483, "grad_norm": 0.5604973380467891, "learning_rate": 2.3273317112733173e-05, "loss": 0.7068, "step": 19903 }, { "epoch": 0.5811217190738956, "grad_norm": 0.5591425259429003, "learning_rate": 2.327169505271695e-05, "loss": 0.6648, "step": 19904 }, { "epoch": 0.581150915301743, "grad_norm": 0.5231202009508501, "learning_rate": 2.327007299270073e-05, "loss": 0.655, "step": 19905 }, { "epoch": 0.5811801115295904, "grad_norm": 0.5144588658989667, "learning_rate": 2.326845093268451e-05, "loss": 0.6241, "step": 19906 }, { "epoch": 0.5812093077574377, "grad_norm": 0.47367224480020964, "learning_rate": 2.326682887266829e-05, "loss": 0.5174, "step": 19907 }, { "epoch": 0.5812385039852851, "grad_norm": 0.5308008308446288, "learning_rate": 2.3265206812652068e-05, "loss": 0.5975, "step": 19908 }, { "epoch": 0.5812677002131325, "grad_norm": 0.46216955810686605, "learning_rate": 2.3263584752635847e-05, "loss": 0.496, "step": 19909 }, { "epoch": 0.5812968964409798, "grad_norm": 0.48778425528237296, "learning_rate": 2.3261962692619628e-05, "loss": 0.5945, "step": 19910 }, { "epoch": 0.5813260926688272, "grad_norm": 0.5124346420270491, "learning_rate": 2.326034063260341e-05, "loss": 0.5132, "step": 19911 }, { "epoch": 0.5813552888966745, "grad_norm": 0.4859577900424106, "learning_rate": 2.325871857258719e-05, "loss": 0.5294, "step": 19912 }, { "epoch": 0.5813844851245219, "grad_norm": 0.5144949668844494, "learning_rate": 2.3257096512570967e-05, "loss": 0.5988, "step": 19913 }, { "epoch": 0.5814136813523693, "grad_norm": 0.49576917834553774, "learning_rate": 2.3255474452554745e-05, "loss": 0.5498, "step": 19914 }, { "epoch": 0.5814428775802166, "grad_norm": 0.5012644175430717, "learning_rate": 2.3253852392538524e-05, "loss": 0.5693, "step": 19915 }, { "epoch": 0.581472073808064, "grad_norm": 0.49810686874671556, "learning_rate": 2.3252230332522305e-05, "loss": 0.571, "step": 19916 }, { "epoch": 0.5815012700359113, "grad_norm": 0.509899469919428, "learning_rate": 2.3250608272506084e-05, "loss": 0.5845, "step": 19917 }, { "epoch": 0.5815304662637587, "grad_norm": 0.5158272954225965, "learning_rate": 2.3248986212489862e-05, "loss": 0.5935, "step": 19918 }, { "epoch": 0.5815596624916061, "grad_norm": 0.4938863273707686, "learning_rate": 2.324736415247364e-05, "loss": 0.5414, "step": 19919 }, { "epoch": 0.5815888587194534, "grad_norm": 0.5307587096435971, "learning_rate": 2.3245742092457422e-05, "loss": 0.6043, "step": 19920 }, { "epoch": 0.5816180549473008, "grad_norm": 0.5111945107468034, "learning_rate": 2.3244120032441204e-05, "loss": 0.6026, "step": 19921 }, { "epoch": 0.5816472511751482, "grad_norm": 0.5161947827330052, "learning_rate": 2.3242497972424982e-05, "loss": 0.5745, "step": 19922 }, { "epoch": 0.5816764474029955, "grad_norm": 0.5162359824921093, "learning_rate": 2.324087591240876e-05, "loss": 0.5925, "step": 19923 }, { "epoch": 0.5817056436308429, "grad_norm": 0.5285392840204639, "learning_rate": 2.323925385239254e-05, "loss": 0.597, "step": 19924 }, { "epoch": 0.5817348398586902, "grad_norm": 0.49258291721711844, "learning_rate": 2.3237631792376317e-05, "loss": 0.6079, "step": 19925 }, { "epoch": 0.5817640360865376, "grad_norm": 0.546091542779339, "learning_rate": 2.32360097323601e-05, "loss": 0.6383, "step": 19926 }, { "epoch": 0.581793232314385, "grad_norm": 0.5157917387076769, "learning_rate": 2.3234387672343878e-05, "loss": 0.5847, "step": 19927 }, { "epoch": 0.5818224285422323, "grad_norm": 0.5184671233509139, "learning_rate": 2.3232765612327656e-05, "loss": 0.6174, "step": 19928 }, { "epoch": 0.5818516247700797, "grad_norm": 0.5226904249864757, "learning_rate": 2.3231143552311438e-05, "loss": 0.6247, "step": 19929 }, { "epoch": 0.581880820997927, "grad_norm": 0.5005531415751201, "learning_rate": 2.3229521492295216e-05, "loss": 0.5283, "step": 19930 }, { "epoch": 0.5819100172257744, "grad_norm": 0.5243740785679734, "learning_rate": 2.3227899432278994e-05, "loss": 0.6115, "step": 19931 }, { "epoch": 0.5819392134536218, "grad_norm": 0.5414004755101786, "learning_rate": 2.3226277372262776e-05, "loss": 0.6343, "step": 19932 }, { "epoch": 0.5819684096814691, "grad_norm": 0.5228043717029518, "learning_rate": 2.3224655312246555e-05, "loss": 0.5921, "step": 19933 }, { "epoch": 0.5819976059093165, "grad_norm": 0.5671942486385939, "learning_rate": 2.3223033252230333e-05, "loss": 0.7444, "step": 19934 }, { "epoch": 0.5820268021371638, "grad_norm": 0.48148367189029323, "learning_rate": 2.322141119221411e-05, "loss": 0.5382, "step": 19935 }, { "epoch": 0.5820559983650112, "grad_norm": 0.5416680163513384, "learning_rate": 2.3219789132197893e-05, "loss": 0.6402, "step": 19936 }, { "epoch": 0.5820851945928586, "grad_norm": 0.5193476000021404, "learning_rate": 2.321816707218167e-05, "loss": 0.6136, "step": 19937 }, { "epoch": 0.5821143908207059, "grad_norm": 0.5199556740472621, "learning_rate": 2.321654501216545e-05, "loss": 0.5806, "step": 19938 }, { "epoch": 0.5821435870485533, "grad_norm": 0.5327825663655936, "learning_rate": 2.321492295214923e-05, "loss": 0.6508, "step": 19939 }, { "epoch": 0.5821727832764007, "grad_norm": 0.5518556506567905, "learning_rate": 2.321330089213301e-05, "loss": 0.6382, "step": 19940 }, { "epoch": 0.582201979504248, "grad_norm": 0.5113524722518922, "learning_rate": 2.321167883211679e-05, "loss": 0.577, "step": 19941 }, { "epoch": 0.5822311757320954, "grad_norm": 0.547358164844592, "learning_rate": 2.321005677210057e-05, "loss": 0.654, "step": 19942 }, { "epoch": 0.5822603719599427, "grad_norm": 0.47522784005630275, "learning_rate": 2.320843471208435e-05, "loss": 0.5139, "step": 19943 }, { "epoch": 0.5822895681877901, "grad_norm": 0.4806340136712981, "learning_rate": 2.3206812652068127e-05, "loss": 0.5499, "step": 19944 }, { "epoch": 0.5823187644156375, "grad_norm": 0.5015193772593958, "learning_rate": 2.3205190592051905e-05, "loss": 0.5731, "step": 19945 }, { "epoch": 0.5823479606434848, "grad_norm": 0.7525860012378767, "learning_rate": 2.3203568532035687e-05, "loss": 0.629, "step": 19946 }, { "epoch": 0.5823771568713322, "grad_norm": 0.5358611531052601, "learning_rate": 2.3201946472019465e-05, "loss": 0.6068, "step": 19947 }, { "epoch": 0.5824063530991795, "grad_norm": 0.5376705363936679, "learning_rate": 2.3200324412003247e-05, "loss": 0.5886, "step": 19948 }, { "epoch": 0.5824355493270269, "grad_norm": 0.553021583098764, "learning_rate": 2.3198702351987026e-05, "loss": 0.6212, "step": 19949 }, { "epoch": 0.5824647455548743, "grad_norm": 0.5770833926645377, "learning_rate": 2.3197080291970804e-05, "loss": 0.6518, "step": 19950 }, { "epoch": 0.5824939417827216, "grad_norm": 0.5171840198662929, "learning_rate": 2.3195458231954582e-05, "loss": 0.6136, "step": 19951 }, { "epoch": 0.582523138010569, "grad_norm": 0.5389143197054175, "learning_rate": 2.3193836171938364e-05, "loss": 0.6338, "step": 19952 }, { "epoch": 0.5825523342384163, "grad_norm": 0.5137029296956325, "learning_rate": 2.3192214111922142e-05, "loss": 0.6184, "step": 19953 }, { "epoch": 0.5825815304662637, "grad_norm": 0.513837109824642, "learning_rate": 2.319059205190592e-05, "loss": 0.5918, "step": 19954 }, { "epoch": 0.5826107266941111, "grad_norm": 0.4900647346933796, "learning_rate": 2.31889699918897e-05, "loss": 0.567, "step": 19955 }, { "epoch": 0.5826399229219584, "grad_norm": 0.5403918242015805, "learning_rate": 2.318734793187348e-05, "loss": 0.633, "step": 19956 }, { "epoch": 0.5826691191498058, "grad_norm": 0.5149774249946483, "learning_rate": 2.3185725871857263e-05, "loss": 0.587, "step": 19957 }, { "epoch": 0.5826983153776532, "grad_norm": 0.5667215114606549, "learning_rate": 2.318410381184104e-05, "loss": 0.6973, "step": 19958 }, { "epoch": 0.5827275116055005, "grad_norm": 0.511942475483095, "learning_rate": 2.318248175182482e-05, "loss": 0.5967, "step": 19959 }, { "epoch": 0.582756707833348, "grad_norm": 0.5202096675263449, "learning_rate": 2.3180859691808598e-05, "loss": 0.5852, "step": 19960 }, { "epoch": 0.5827859040611953, "grad_norm": 0.5024240598353993, "learning_rate": 2.3179237631792376e-05, "loss": 0.5934, "step": 19961 }, { "epoch": 0.5828151002890427, "grad_norm": 0.5595593687551021, "learning_rate": 2.3177615571776158e-05, "loss": 0.6308, "step": 19962 }, { "epoch": 0.5828442965168901, "grad_norm": 0.49855093244876675, "learning_rate": 2.3175993511759936e-05, "loss": 0.5684, "step": 19963 }, { "epoch": 0.5828734927447374, "grad_norm": 0.4859144382265249, "learning_rate": 2.3174371451743715e-05, "loss": 0.5333, "step": 19964 }, { "epoch": 0.5829026889725848, "grad_norm": 0.5635696913980682, "learning_rate": 2.3172749391727493e-05, "loss": 0.6978, "step": 19965 }, { "epoch": 0.5829318852004322, "grad_norm": 0.5510756935644443, "learning_rate": 2.317112733171127e-05, "loss": 0.6273, "step": 19966 }, { "epoch": 0.5829610814282795, "grad_norm": 0.5459680049470262, "learning_rate": 2.3169505271695057e-05, "loss": 0.6215, "step": 19967 }, { "epoch": 0.5829902776561269, "grad_norm": 0.5535786796729626, "learning_rate": 2.3167883211678835e-05, "loss": 0.6353, "step": 19968 }, { "epoch": 0.5830194738839742, "grad_norm": 0.5343948037205591, "learning_rate": 2.3166261151662613e-05, "loss": 0.6503, "step": 19969 }, { "epoch": 0.5830486701118216, "grad_norm": 0.5669695678525662, "learning_rate": 2.3164639091646392e-05, "loss": 0.6976, "step": 19970 }, { "epoch": 0.583077866339669, "grad_norm": 0.48287743526007326, "learning_rate": 2.316301703163017e-05, "loss": 0.5679, "step": 19971 }, { "epoch": 0.5831070625675163, "grad_norm": 0.5006118342151348, "learning_rate": 2.3161394971613952e-05, "loss": 0.5803, "step": 19972 }, { "epoch": 0.5831362587953637, "grad_norm": 0.48045257643146183, "learning_rate": 2.315977291159773e-05, "loss": 0.5408, "step": 19973 }, { "epoch": 0.583165455023211, "grad_norm": 0.5082017918731124, "learning_rate": 2.315815085158151e-05, "loss": 0.5816, "step": 19974 }, { "epoch": 0.5831946512510584, "grad_norm": 0.5417458396903901, "learning_rate": 2.3156528791565287e-05, "loss": 0.6326, "step": 19975 }, { "epoch": 0.5832238474789058, "grad_norm": 0.4782292318847271, "learning_rate": 2.315490673154907e-05, "loss": 0.547, "step": 19976 }, { "epoch": 0.5832530437067531, "grad_norm": 0.5431161110581106, "learning_rate": 2.315328467153285e-05, "loss": 0.6583, "step": 19977 }, { "epoch": 0.5832822399346005, "grad_norm": 0.49694543118794937, "learning_rate": 2.315166261151663e-05, "loss": 0.58, "step": 19978 }, { "epoch": 0.5833114361624478, "grad_norm": 0.5229069219008798, "learning_rate": 2.3150040551500407e-05, "loss": 0.6518, "step": 19979 }, { "epoch": 0.5833406323902952, "grad_norm": 0.5432127333529267, "learning_rate": 2.3148418491484186e-05, "loss": 0.6326, "step": 19980 }, { "epoch": 0.5833698286181426, "grad_norm": 0.5083663548964935, "learning_rate": 2.3146796431467964e-05, "loss": 0.561, "step": 19981 }, { "epoch": 0.5833990248459899, "grad_norm": 0.5296068284491319, "learning_rate": 2.3145174371451746e-05, "loss": 0.6523, "step": 19982 }, { "epoch": 0.5834282210738373, "grad_norm": 0.49604984266157176, "learning_rate": 2.3143552311435524e-05, "loss": 0.553, "step": 19983 }, { "epoch": 0.5834574173016847, "grad_norm": 0.5049169130868983, "learning_rate": 2.3141930251419303e-05, "loss": 0.6051, "step": 19984 }, { "epoch": 0.583486613529532, "grad_norm": 0.5414061241371766, "learning_rate": 2.314030819140308e-05, "loss": 0.6367, "step": 19985 }, { "epoch": 0.5835158097573794, "grad_norm": 0.5700699665154995, "learning_rate": 2.3138686131386863e-05, "loss": 0.6531, "step": 19986 }, { "epoch": 0.5835450059852267, "grad_norm": 0.5236875110495776, "learning_rate": 2.313706407137064e-05, "loss": 0.6058, "step": 19987 }, { "epoch": 0.5835742022130741, "grad_norm": 0.48768419501704874, "learning_rate": 2.3135442011354423e-05, "loss": 0.4899, "step": 19988 }, { "epoch": 0.5836033984409215, "grad_norm": 0.4919605219922978, "learning_rate": 2.31338199513382e-05, "loss": 0.5568, "step": 19989 }, { "epoch": 0.5836325946687688, "grad_norm": 0.49982257367917815, "learning_rate": 2.313219789132198e-05, "loss": 0.5571, "step": 19990 }, { "epoch": 0.5836617908966162, "grad_norm": 0.532279379470066, "learning_rate": 2.3130575831305758e-05, "loss": 0.6352, "step": 19991 }, { "epoch": 0.5836909871244635, "grad_norm": 0.5695806820948013, "learning_rate": 2.312895377128954e-05, "loss": 0.702, "step": 19992 }, { "epoch": 0.5837201833523109, "grad_norm": 0.5173758950060589, "learning_rate": 2.3127331711273318e-05, "loss": 0.6365, "step": 19993 }, { "epoch": 0.5837493795801583, "grad_norm": 0.4836638893856114, "learning_rate": 2.3125709651257096e-05, "loss": 0.4977, "step": 19994 }, { "epoch": 0.5837785758080056, "grad_norm": 0.5268920228958923, "learning_rate": 2.3124087591240878e-05, "loss": 0.6276, "step": 19995 }, { "epoch": 0.583807772035853, "grad_norm": 0.5183322697751945, "learning_rate": 2.3122465531224657e-05, "loss": 0.5887, "step": 19996 }, { "epoch": 0.5838369682637004, "grad_norm": 0.543676237167477, "learning_rate": 2.3120843471208435e-05, "loss": 0.6161, "step": 19997 }, { "epoch": 0.5838661644915477, "grad_norm": 0.562860722264027, "learning_rate": 2.3119221411192217e-05, "loss": 0.6598, "step": 19998 }, { "epoch": 0.5838953607193951, "grad_norm": 0.5332668622038111, "learning_rate": 2.3117599351175995e-05, "loss": 0.6459, "step": 19999 }, { "epoch": 0.5839245569472424, "grad_norm": 0.5341729338465497, "learning_rate": 2.3115977291159773e-05, "loss": 0.5877, "step": 20000 }, { "epoch": 0.5839537531750898, "grad_norm": 0.4807639042201602, "learning_rate": 2.3114355231143552e-05, "loss": 0.4969, "step": 20001 }, { "epoch": 0.5839829494029372, "grad_norm": 0.4830564756011504, "learning_rate": 2.3112733171127334e-05, "loss": 0.5234, "step": 20002 }, { "epoch": 0.5840121456307845, "grad_norm": 0.49376450153533813, "learning_rate": 2.3111111111111112e-05, "loss": 0.5707, "step": 20003 }, { "epoch": 0.5840413418586319, "grad_norm": 0.44734706101637484, "learning_rate": 2.310948905109489e-05, "loss": 0.4687, "step": 20004 }, { "epoch": 0.5840705380864792, "grad_norm": 0.5285855119417845, "learning_rate": 2.3107866991078672e-05, "loss": 0.6202, "step": 20005 }, { "epoch": 0.5840997343143266, "grad_norm": 0.5201726176660667, "learning_rate": 2.310624493106245e-05, "loss": 0.6036, "step": 20006 }, { "epoch": 0.584128930542174, "grad_norm": 0.5325858863301997, "learning_rate": 2.310462287104623e-05, "loss": 0.6003, "step": 20007 }, { "epoch": 0.5841581267700213, "grad_norm": 0.545374341546994, "learning_rate": 2.310300081103001e-05, "loss": 0.6262, "step": 20008 }, { "epoch": 0.5841873229978687, "grad_norm": 0.5626591129663997, "learning_rate": 2.310137875101379e-05, "loss": 0.6034, "step": 20009 }, { "epoch": 0.584216519225716, "grad_norm": 0.5467451810406416, "learning_rate": 2.3099756690997567e-05, "loss": 0.6428, "step": 20010 }, { "epoch": 0.5842457154535634, "grad_norm": 0.5069326959095635, "learning_rate": 2.3098134630981346e-05, "loss": 0.5695, "step": 20011 }, { "epoch": 0.5842749116814108, "grad_norm": 0.5105054123736787, "learning_rate": 2.3096512570965127e-05, "loss": 0.6174, "step": 20012 }, { "epoch": 0.5843041079092581, "grad_norm": 0.4951657913527273, "learning_rate": 2.3094890510948906e-05, "loss": 0.5284, "step": 20013 }, { "epoch": 0.5843333041371055, "grad_norm": 0.5047908301610512, "learning_rate": 2.3093268450932688e-05, "loss": 0.5915, "step": 20014 }, { "epoch": 0.5843625003649529, "grad_norm": 0.5117516768966387, "learning_rate": 2.3091646390916466e-05, "loss": 0.6268, "step": 20015 }, { "epoch": 0.5843916965928002, "grad_norm": 0.5371182349483457, "learning_rate": 2.3090024330900244e-05, "loss": 0.6205, "step": 20016 }, { "epoch": 0.5844208928206476, "grad_norm": 0.564011010629598, "learning_rate": 2.3088402270884023e-05, "loss": 0.6471, "step": 20017 }, { "epoch": 0.5844500890484949, "grad_norm": 0.541173744616648, "learning_rate": 2.3086780210867804e-05, "loss": 0.6742, "step": 20018 }, { "epoch": 0.5844792852763423, "grad_norm": 0.48721996125149236, "learning_rate": 2.3085158150851583e-05, "loss": 0.569, "step": 20019 }, { "epoch": 0.5845084815041897, "grad_norm": 0.5758090754588271, "learning_rate": 2.308353609083536e-05, "loss": 0.6604, "step": 20020 }, { "epoch": 0.584537677732037, "grad_norm": 0.49864882412594197, "learning_rate": 2.308191403081914e-05, "loss": 0.5558, "step": 20021 }, { "epoch": 0.5845668739598844, "grad_norm": 0.5591002584525485, "learning_rate": 2.308029197080292e-05, "loss": 0.6916, "step": 20022 }, { "epoch": 0.5845960701877317, "grad_norm": 0.48773324112813615, "learning_rate": 2.3078669910786703e-05, "loss": 0.5454, "step": 20023 }, { "epoch": 0.5846252664155791, "grad_norm": 0.499820012215532, "learning_rate": 2.307704785077048e-05, "loss": 0.5453, "step": 20024 }, { "epoch": 0.5846544626434265, "grad_norm": 0.5132630702960945, "learning_rate": 2.307542579075426e-05, "loss": 0.605, "step": 20025 }, { "epoch": 0.5846836588712738, "grad_norm": 0.5336144071135686, "learning_rate": 2.3073803730738038e-05, "loss": 0.5906, "step": 20026 }, { "epoch": 0.5847128550991212, "grad_norm": 0.5274552135349629, "learning_rate": 2.3072181670721817e-05, "loss": 0.5882, "step": 20027 }, { "epoch": 0.5847420513269685, "grad_norm": 0.5071212318021097, "learning_rate": 2.30705596107056e-05, "loss": 0.5497, "step": 20028 }, { "epoch": 0.5847712475548159, "grad_norm": 0.5048588830267681, "learning_rate": 2.3068937550689377e-05, "loss": 0.5481, "step": 20029 }, { "epoch": 0.5848004437826633, "grad_norm": 0.4947126711467195, "learning_rate": 2.3067315490673155e-05, "loss": 0.544, "step": 20030 }, { "epoch": 0.5848296400105106, "grad_norm": 0.5190897003556619, "learning_rate": 2.3065693430656934e-05, "loss": 0.5328, "step": 20031 }, { "epoch": 0.584858836238358, "grad_norm": 0.5302332845926815, "learning_rate": 2.3064071370640712e-05, "loss": 0.6249, "step": 20032 }, { "epoch": 0.5848880324662054, "grad_norm": 0.5262889036718968, "learning_rate": 2.3062449310624497e-05, "loss": 0.6531, "step": 20033 }, { "epoch": 0.5849172286940527, "grad_norm": 0.5381338303254548, "learning_rate": 2.3060827250608275e-05, "loss": 0.6554, "step": 20034 }, { "epoch": 0.5849464249219001, "grad_norm": 0.5258674276779343, "learning_rate": 2.3059205190592054e-05, "loss": 0.6122, "step": 20035 }, { "epoch": 0.5849756211497474, "grad_norm": 0.5229939049879777, "learning_rate": 2.3057583130575832e-05, "loss": 0.5946, "step": 20036 }, { "epoch": 0.5850048173775948, "grad_norm": 0.5356785500816642, "learning_rate": 2.305596107055961e-05, "loss": 0.623, "step": 20037 }, { "epoch": 0.5850340136054422, "grad_norm": 0.4855965462340552, "learning_rate": 2.3054339010543392e-05, "loss": 0.5088, "step": 20038 }, { "epoch": 0.5850632098332895, "grad_norm": 0.5431104371549824, "learning_rate": 2.305271695052717e-05, "loss": 0.6314, "step": 20039 }, { "epoch": 0.5850924060611369, "grad_norm": 0.5313055219639529, "learning_rate": 2.305109489051095e-05, "loss": 0.5868, "step": 20040 }, { "epoch": 0.5851216022889842, "grad_norm": 0.5198551905472127, "learning_rate": 2.3049472830494727e-05, "loss": 0.617, "step": 20041 }, { "epoch": 0.5851507985168316, "grad_norm": 0.5102173780167335, "learning_rate": 2.304785077047851e-05, "loss": 0.5533, "step": 20042 }, { "epoch": 0.585179994744679, "grad_norm": 0.510373507028143, "learning_rate": 2.304622871046229e-05, "loss": 0.5538, "step": 20043 }, { "epoch": 0.5852091909725263, "grad_norm": 0.5365659036769763, "learning_rate": 2.304460665044607e-05, "loss": 0.623, "step": 20044 }, { "epoch": 0.5852383872003737, "grad_norm": 0.5339844926904793, "learning_rate": 2.3042984590429848e-05, "loss": 0.6365, "step": 20045 }, { "epoch": 0.585267583428221, "grad_norm": 0.5121812973190946, "learning_rate": 2.3041362530413626e-05, "loss": 0.6033, "step": 20046 }, { "epoch": 0.5852967796560684, "grad_norm": 0.5189409817638527, "learning_rate": 2.3039740470397404e-05, "loss": 0.6055, "step": 20047 }, { "epoch": 0.5853259758839158, "grad_norm": 0.5231877053107672, "learning_rate": 2.3038118410381186e-05, "loss": 0.6058, "step": 20048 }, { "epoch": 0.5853551721117631, "grad_norm": 0.4965778086222318, "learning_rate": 2.3036496350364965e-05, "loss": 0.5553, "step": 20049 }, { "epoch": 0.5853843683396105, "grad_norm": 0.49156507981079417, "learning_rate": 2.3034874290348743e-05, "loss": 0.5456, "step": 20050 }, { "epoch": 0.5854135645674579, "grad_norm": 0.5052861403165178, "learning_rate": 2.303325223033252e-05, "loss": 0.6129, "step": 20051 }, { "epoch": 0.5854427607953052, "grad_norm": 0.5180662810621872, "learning_rate": 2.3031630170316303e-05, "loss": 0.5905, "step": 20052 }, { "epoch": 0.5854719570231526, "grad_norm": 0.48920495107362677, "learning_rate": 2.303000811030008e-05, "loss": 0.5481, "step": 20053 }, { "epoch": 0.5855011532509999, "grad_norm": 0.5199944707849978, "learning_rate": 2.3028386050283863e-05, "loss": 0.6162, "step": 20054 }, { "epoch": 0.5855303494788473, "grad_norm": 0.5335662400930794, "learning_rate": 2.302676399026764e-05, "loss": 0.6785, "step": 20055 }, { "epoch": 0.5855595457066947, "grad_norm": 0.4949079129804592, "learning_rate": 2.302514193025142e-05, "loss": 0.5461, "step": 20056 }, { "epoch": 0.585588741934542, "grad_norm": 0.49119513966739353, "learning_rate": 2.30235198702352e-05, "loss": 0.5625, "step": 20057 }, { "epoch": 0.5856179381623894, "grad_norm": 0.5763613194976721, "learning_rate": 2.302189781021898e-05, "loss": 0.7065, "step": 20058 }, { "epoch": 0.5856471343902367, "grad_norm": 0.55530167350692, "learning_rate": 2.302027575020276e-05, "loss": 0.7229, "step": 20059 }, { "epoch": 0.5856763306180841, "grad_norm": 0.49292562359144626, "learning_rate": 2.3018653690186537e-05, "loss": 0.5484, "step": 20060 }, { "epoch": 0.5857055268459315, "grad_norm": 0.48606126012539175, "learning_rate": 2.301703163017032e-05, "loss": 0.5504, "step": 20061 }, { "epoch": 0.5857347230737788, "grad_norm": 0.5210477202052243, "learning_rate": 2.3015409570154097e-05, "loss": 0.6342, "step": 20062 }, { "epoch": 0.5857639193016262, "grad_norm": 0.5417781429000963, "learning_rate": 2.3013787510137875e-05, "loss": 0.6341, "step": 20063 }, { "epoch": 0.5857931155294736, "grad_norm": 0.505563440211927, "learning_rate": 2.3012165450121657e-05, "loss": 0.6032, "step": 20064 }, { "epoch": 0.5858223117573209, "grad_norm": 0.5098668749234502, "learning_rate": 2.3010543390105436e-05, "loss": 0.5677, "step": 20065 }, { "epoch": 0.5858515079851683, "grad_norm": 0.5080423301255501, "learning_rate": 2.3008921330089214e-05, "loss": 0.5503, "step": 20066 }, { "epoch": 0.5858807042130156, "grad_norm": 0.5298797035259677, "learning_rate": 2.3007299270072992e-05, "loss": 0.6178, "step": 20067 }, { "epoch": 0.585909900440863, "grad_norm": 0.521449734898922, "learning_rate": 2.3005677210056774e-05, "loss": 0.6285, "step": 20068 }, { "epoch": 0.5859390966687104, "grad_norm": 0.5382003617712208, "learning_rate": 2.3004055150040552e-05, "loss": 0.5725, "step": 20069 }, { "epoch": 0.5859682928965577, "grad_norm": 0.517454808683505, "learning_rate": 2.300243309002433e-05, "loss": 0.63, "step": 20070 }, { "epoch": 0.5859974891244051, "grad_norm": 0.5126326176373297, "learning_rate": 2.3000811030008113e-05, "loss": 0.5563, "step": 20071 }, { "epoch": 0.5860266853522524, "grad_norm": 0.5160817740974565, "learning_rate": 2.299918896999189e-05, "loss": 0.5943, "step": 20072 }, { "epoch": 0.5860558815800998, "grad_norm": 0.5429511054991131, "learning_rate": 2.299756690997567e-05, "loss": 0.6576, "step": 20073 }, { "epoch": 0.5860850778079472, "grad_norm": 0.5177543143136332, "learning_rate": 2.299594484995945e-05, "loss": 0.6625, "step": 20074 }, { "epoch": 0.5861142740357945, "grad_norm": 0.5448855519193783, "learning_rate": 2.299432278994323e-05, "loss": 0.6757, "step": 20075 }, { "epoch": 0.5861434702636419, "grad_norm": 0.5080820547134264, "learning_rate": 2.2992700729927008e-05, "loss": 0.6031, "step": 20076 }, { "epoch": 0.5861726664914892, "grad_norm": 0.49368160856283483, "learning_rate": 2.2991078669910786e-05, "loss": 0.5445, "step": 20077 }, { "epoch": 0.5862018627193366, "grad_norm": 0.5565954200530396, "learning_rate": 2.2989456609894568e-05, "loss": 0.6673, "step": 20078 }, { "epoch": 0.586231058947184, "grad_norm": 0.4717221600656498, "learning_rate": 2.2987834549878346e-05, "loss": 0.5315, "step": 20079 }, { "epoch": 0.5862602551750313, "grad_norm": 0.504741851814048, "learning_rate": 2.2986212489862128e-05, "loss": 0.5517, "step": 20080 }, { "epoch": 0.5862894514028788, "grad_norm": 0.5122271218305727, "learning_rate": 2.2984590429845906e-05, "loss": 0.5869, "step": 20081 }, { "epoch": 0.5863186476307262, "grad_norm": 0.47934459854318634, "learning_rate": 2.2982968369829685e-05, "loss": 0.5269, "step": 20082 }, { "epoch": 0.5863478438585735, "grad_norm": 0.5305848192370323, "learning_rate": 2.2981346309813463e-05, "loss": 0.6196, "step": 20083 }, { "epoch": 0.5863770400864209, "grad_norm": 0.5172000589729635, "learning_rate": 2.2979724249797245e-05, "loss": 0.6031, "step": 20084 }, { "epoch": 0.5864062363142682, "grad_norm": 0.536408043378438, "learning_rate": 2.2978102189781023e-05, "loss": 0.6299, "step": 20085 }, { "epoch": 0.5864354325421156, "grad_norm": 0.4960955879983508, "learning_rate": 2.29764801297648e-05, "loss": 0.5794, "step": 20086 }, { "epoch": 0.586464628769963, "grad_norm": 0.5197326674623692, "learning_rate": 2.297485806974858e-05, "loss": 0.5434, "step": 20087 }, { "epoch": 0.5864938249978103, "grad_norm": 0.5476582521348161, "learning_rate": 2.297323600973236e-05, "loss": 0.6448, "step": 20088 }, { "epoch": 0.5865230212256577, "grad_norm": 0.5537235360994484, "learning_rate": 2.297161394971614e-05, "loss": 0.6005, "step": 20089 }, { "epoch": 0.586552217453505, "grad_norm": 0.5060330280893786, "learning_rate": 2.2969991889699922e-05, "loss": 0.556, "step": 20090 }, { "epoch": 0.5865814136813524, "grad_norm": 0.5795167242764039, "learning_rate": 2.29683698296837e-05, "loss": 0.6472, "step": 20091 }, { "epoch": 0.5866106099091998, "grad_norm": 0.5541805664646318, "learning_rate": 2.296674776966748e-05, "loss": 0.6851, "step": 20092 }, { "epoch": 0.5866398061370471, "grad_norm": 0.5696716091607459, "learning_rate": 2.2965125709651257e-05, "loss": 0.6636, "step": 20093 }, { "epoch": 0.5866690023648945, "grad_norm": 0.5067129775616647, "learning_rate": 2.296350364963504e-05, "loss": 0.5633, "step": 20094 }, { "epoch": 0.5866981985927419, "grad_norm": 0.5019273628390641, "learning_rate": 2.2961881589618817e-05, "loss": 0.5646, "step": 20095 }, { "epoch": 0.5867273948205892, "grad_norm": 0.4894429828508931, "learning_rate": 2.2960259529602596e-05, "loss": 0.5226, "step": 20096 }, { "epoch": 0.5867565910484366, "grad_norm": 0.492443617766733, "learning_rate": 2.2958637469586374e-05, "loss": 0.535, "step": 20097 }, { "epoch": 0.5867857872762839, "grad_norm": 0.473344691342344, "learning_rate": 2.2957015409570152e-05, "loss": 0.4999, "step": 20098 }, { "epoch": 0.5868149835041313, "grad_norm": 0.5414366939123811, "learning_rate": 2.2955393349553937e-05, "loss": 0.6583, "step": 20099 }, { "epoch": 0.5868441797319787, "grad_norm": 0.5455330960294966, "learning_rate": 2.2953771289537716e-05, "loss": 0.6558, "step": 20100 }, { "epoch": 0.586873375959826, "grad_norm": 0.5194543531996882, "learning_rate": 2.2952149229521494e-05, "loss": 0.6393, "step": 20101 }, { "epoch": 0.5869025721876734, "grad_norm": 0.5254632114951505, "learning_rate": 2.2950527169505273e-05, "loss": 0.5964, "step": 20102 }, { "epoch": 0.5869317684155207, "grad_norm": 0.4903686139290168, "learning_rate": 2.294890510948905e-05, "loss": 0.5398, "step": 20103 }, { "epoch": 0.5869609646433681, "grad_norm": 0.4961763572772684, "learning_rate": 2.2947283049472833e-05, "loss": 0.5384, "step": 20104 }, { "epoch": 0.5869901608712155, "grad_norm": 0.5318705206484897, "learning_rate": 2.294566098945661e-05, "loss": 0.5724, "step": 20105 }, { "epoch": 0.5870193570990628, "grad_norm": 0.5176478669087166, "learning_rate": 2.294403892944039e-05, "loss": 0.6023, "step": 20106 }, { "epoch": 0.5870485533269102, "grad_norm": 0.6989236054004435, "learning_rate": 2.2942416869424168e-05, "loss": 0.6528, "step": 20107 }, { "epoch": 0.5870777495547576, "grad_norm": 0.5023026999154594, "learning_rate": 2.294079480940795e-05, "loss": 0.5601, "step": 20108 }, { "epoch": 0.5871069457826049, "grad_norm": 0.46980235802038506, "learning_rate": 2.2939172749391728e-05, "loss": 0.509, "step": 20109 }, { "epoch": 0.5871361420104523, "grad_norm": 0.5371467852824148, "learning_rate": 2.293755068937551e-05, "loss": 0.6487, "step": 20110 }, { "epoch": 0.5871653382382996, "grad_norm": 0.53124233071666, "learning_rate": 2.2935928629359288e-05, "loss": 0.6122, "step": 20111 }, { "epoch": 0.587194534466147, "grad_norm": 0.526729885122119, "learning_rate": 2.2934306569343067e-05, "loss": 0.627, "step": 20112 }, { "epoch": 0.5872237306939944, "grad_norm": 0.5202636036916908, "learning_rate": 2.2932684509326845e-05, "loss": 0.558, "step": 20113 }, { "epoch": 0.5872529269218417, "grad_norm": 0.515695047238919, "learning_rate": 2.2931062449310627e-05, "loss": 0.5816, "step": 20114 }, { "epoch": 0.5872821231496891, "grad_norm": 0.5166648048448379, "learning_rate": 2.2929440389294405e-05, "loss": 0.5911, "step": 20115 }, { "epoch": 0.5873113193775364, "grad_norm": 0.5270619754601351, "learning_rate": 2.2927818329278183e-05, "loss": 0.6011, "step": 20116 }, { "epoch": 0.5873405156053838, "grad_norm": 0.5424265395247141, "learning_rate": 2.2926196269261962e-05, "loss": 0.6576, "step": 20117 }, { "epoch": 0.5873697118332312, "grad_norm": 0.5020460193873232, "learning_rate": 2.2924574209245744e-05, "loss": 0.5971, "step": 20118 }, { "epoch": 0.5873989080610785, "grad_norm": 0.5397020736572525, "learning_rate": 2.2922952149229522e-05, "loss": 0.6847, "step": 20119 }, { "epoch": 0.5874281042889259, "grad_norm": 0.5065650353961973, "learning_rate": 2.2921330089213304e-05, "loss": 0.5797, "step": 20120 }, { "epoch": 0.5874573005167733, "grad_norm": 0.5367242061143958, "learning_rate": 2.2919708029197082e-05, "loss": 0.5627, "step": 20121 }, { "epoch": 0.5874864967446206, "grad_norm": 0.49116451931192456, "learning_rate": 2.291808596918086e-05, "loss": 0.5597, "step": 20122 }, { "epoch": 0.587515692972468, "grad_norm": 0.662388590100444, "learning_rate": 2.291646390916464e-05, "loss": 0.5449, "step": 20123 }, { "epoch": 0.5875448892003153, "grad_norm": 0.5291180947549619, "learning_rate": 2.291484184914842e-05, "loss": 0.5853, "step": 20124 }, { "epoch": 0.5875740854281627, "grad_norm": 0.4807113632598437, "learning_rate": 2.29132197891322e-05, "loss": 0.5203, "step": 20125 }, { "epoch": 0.5876032816560101, "grad_norm": 0.4962873103486435, "learning_rate": 2.2911597729115977e-05, "loss": 0.5565, "step": 20126 }, { "epoch": 0.5876324778838574, "grad_norm": 0.4895087083482619, "learning_rate": 2.290997566909976e-05, "loss": 0.539, "step": 20127 }, { "epoch": 0.5876616741117048, "grad_norm": 0.5280474071035572, "learning_rate": 2.2908353609083537e-05, "loss": 0.6235, "step": 20128 }, { "epoch": 0.5876908703395521, "grad_norm": 0.4999723981472367, "learning_rate": 2.2906731549067316e-05, "loss": 0.5593, "step": 20129 }, { "epoch": 0.5877200665673995, "grad_norm": 0.5076311124054992, "learning_rate": 2.2905109489051098e-05, "loss": 0.5908, "step": 20130 }, { "epoch": 0.5877492627952469, "grad_norm": 0.5226747251133154, "learning_rate": 2.2903487429034876e-05, "loss": 0.6025, "step": 20131 }, { "epoch": 0.5877784590230942, "grad_norm": 0.5616520369722215, "learning_rate": 2.2901865369018654e-05, "loss": 0.6862, "step": 20132 }, { "epoch": 0.5878076552509416, "grad_norm": 0.5245889065782087, "learning_rate": 2.2900243309002433e-05, "loss": 0.6031, "step": 20133 }, { "epoch": 0.587836851478789, "grad_norm": 0.5828212919274286, "learning_rate": 2.2898621248986214e-05, "loss": 0.6778, "step": 20134 }, { "epoch": 0.5878660477066363, "grad_norm": 0.5557122138374351, "learning_rate": 2.2896999188969993e-05, "loss": 0.6624, "step": 20135 }, { "epoch": 0.5878952439344837, "grad_norm": 0.5448533090364526, "learning_rate": 2.289537712895377e-05, "loss": 0.6866, "step": 20136 }, { "epoch": 0.587924440162331, "grad_norm": 0.4956226439293443, "learning_rate": 2.2893755068937553e-05, "loss": 0.5384, "step": 20137 }, { "epoch": 0.5879536363901784, "grad_norm": 0.5541836091242193, "learning_rate": 2.289213300892133e-05, "loss": 0.66, "step": 20138 }, { "epoch": 0.5879828326180258, "grad_norm": 0.5167943327047633, "learning_rate": 2.289051094890511e-05, "loss": 0.571, "step": 20139 }, { "epoch": 0.5880120288458731, "grad_norm": 0.500140047644269, "learning_rate": 2.288888888888889e-05, "loss": 0.564, "step": 20140 }, { "epoch": 0.5880412250737205, "grad_norm": 0.5149139752971441, "learning_rate": 2.288726682887267e-05, "loss": 0.5969, "step": 20141 }, { "epoch": 0.5880704213015678, "grad_norm": 0.5132442467548974, "learning_rate": 2.2885644768856448e-05, "loss": 0.5875, "step": 20142 }, { "epoch": 0.5880996175294152, "grad_norm": 0.5400319296003735, "learning_rate": 2.2884022708840227e-05, "loss": 0.5958, "step": 20143 }, { "epoch": 0.5881288137572626, "grad_norm": 0.5514005096813767, "learning_rate": 2.288240064882401e-05, "loss": 0.645, "step": 20144 }, { "epoch": 0.5881580099851099, "grad_norm": 0.5127227433110546, "learning_rate": 2.2880778588807787e-05, "loss": 0.5989, "step": 20145 }, { "epoch": 0.5881872062129573, "grad_norm": 0.5684074192439713, "learning_rate": 2.287915652879157e-05, "loss": 0.6551, "step": 20146 }, { "epoch": 0.5882164024408046, "grad_norm": 0.5428234704899434, "learning_rate": 2.2877534468775347e-05, "loss": 0.6648, "step": 20147 }, { "epoch": 0.588245598668652, "grad_norm": 0.4935673028510335, "learning_rate": 2.2875912408759125e-05, "loss": 0.4994, "step": 20148 }, { "epoch": 0.5882747948964994, "grad_norm": 0.4881698821636786, "learning_rate": 2.2874290348742904e-05, "loss": 0.4761, "step": 20149 }, { "epoch": 0.5883039911243467, "grad_norm": 0.49519170760207387, "learning_rate": 2.2872668288726685e-05, "loss": 0.5898, "step": 20150 }, { "epoch": 0.5883331873521941, "grad_norm": 0.5310940362717169, "learning_rate": 2.2871046228710464e-05, "loss": 0.5503, "step": 20151 }, { "epoch": 0.5883623835800414, "grad_norm": 0.5399268582795674, "learning_rate": 2.2869424168694242e-05, "loss": 0.6873, "step": 20152 }, { "epoch": 0.5883915798078888, "grad_norm": 0.528457752521191, "learning_rate": 2.286780210867802e-05, "loss": 0.6178, "step": 20153 }, { "epoch": 0.5884207760357362, "grad_norm": 0.4964486600080584, "learning_rate": 2.28661800486618e-05, "loss": 0.5956, "step": 20154 }, { "epoch": 0.5884499722635835, "grad_norm": 0.5349815045171697, "learning_rate": 2.286455798864558e-05, "loss": 0.6384, "step": 20155 }, { "epoch": 0.5884791684914309, "grad_norm": 0.5384303257827905, "learning_rate": 2.2862935928629362e-05, "loss": 0.5584, "step": 20156 }, { "epoch": 0.5885083647192783, "grad_norm": 0.5329932759330569, "learning_rate": 2.286131386861314e-05, "loss": 0.6536, "step": 20157 }, { "epoch": 0.5885375609471256, "grad_norm": 0.4847917769892484, "learning_rate": 2.285969180859692e-05, "loss": 0.5023, "step": 20158 }, { "epoch": 0.588566757174973, "grad_norm": 0.5315472197421729, "learning_rate": 2.2858069748580698e-05, "loss": 0.6162, "step": 20159 }, { "epoch": 0.5885959534028203, "grad_norm": 0.5088084766177694, "learning_rate": 2.285644768856448e-05, "loss": 0.5528, "step": 20160 }, { "epoch": 0.5886251496306677, "grad_norm": 0.5379482562736043, "learning_rate": 2.2854825628548258e-05, "loss": 0.6588, "step": 20161 }, { "epoch": 0.5886543458585151, "grad_norm": 0.4927310875067257, "learning_rate": 2.2853203568532036e-05, "loss": 0.5382, "step": 20162 }, { "epoch": 0.5886835420863624, "grad_norm": 0.5189676283379417, "learning_rate": 2.2851581508515814e-05, "loss": 0.594, "step": 20163 }, { "epoch": 0.5887127383142098, "grad_norm": 0.5235210320330678, "learning_rate": 2.2849959448499593e-05, "loss": 0.6165, "step": 20164 }, { "epoch": 0.5887419345420571, "grad_norm": 0.5422881083150342, "learning_rate": 2.2848337388483378e-05, "loss": 0.5927, "step": 20165 }, { "epoch": 0.5887711307699045, "grad_norm": 0.5106829291908739, "learning_rate": 2.2846715328467156e-05, "loss": 0.5819, "step": 20166 }, { "epoch": 0.5888003269977519, "grad_norm": 0.5556602956906248, "learning_rate": 2.2845093268450935e-05, "loss": 0.6682, "step": 20167 }, { "epoch": 0.5888295232255992, "grad_norm": 0.5129776946362233, "learning_rate": 2.2843471208434713e-05, "loss": 0.6241, "step": 20168 }, { "epoch": 0.5888587194534466, "grad_norm": 0.5402503047686287, "learning_rate": 2.284184914841849e-05, "loss": 0.6191, "step": 20169 }, { "epoch": 0.588887915681294, "grad_norm": 0.4855886973605472, "learning_rate": 2.2840227088402273e-05, "loss": 0.5637, "step": 20170 }, { "epoch": 0.5889171119091413, "grad_norm": 0.46964168703922843, "learning_rate": 2.283860502838605e-05, "loss": 0.4798, "step": 20171 }, { "epoch": 0.5889463081369887, "grad_norm": 0.5233489998260442, "learning_rate": 2.283698296836983e-05, "loss": 0.6109, "step": 20172 }, { "epoch": 0.588975504364836, "grad_norm": 0.5156887866361797, "learning_rate": 2.283536090835361e-05, "loss": 0.6154, "step": 20173 }, { "epoch": 0.5890047005926834, "grad_norm": 0.5059157564849704, "learning_rate": 2.283373884833739e-05, "loss": 0.5713, "step": 20174 }, { "epoch": 0.5890338968205308, "grad_norm": 0.5331283321448532, "learning_rate": 2.283211678832117e-05, "loss": 0.636, "step": 20175 }, { "epoch": 0.5890630930483781, "grad_norm": 0.4921009950550144, "learning_rate": 2.283049472830495e-05, "loss": 0.5877, "step": 20176 }, { "epoch": 0.5890922892762255, "grad_norm": 0.5332509439809487, "learning_rate": 2.282887266828873e-05, "loss": 0.6827, "step": 20177 }, { "epoch": 0.5891214855040728, "grad_norm": 0.5251646038477423, "learning_rate": 2.2827250608272507e-05, "loss": 0.6078, "step": 20178 }, { "epoch": 0.5891506817319202, "grad_norm": 0.5275547511334391, "learning_rate": 2.2825628548256285e-05, "loss": 0.6504, "step": 20179 }, { "epoch": 0.5891798779597676, "grad_norm": 0.511853499280626, "learning_rate": 2.2824006488240067e-05, "loss": 0.5824, "step": 20180 }, { "epoch": 0.5892090741876149, "grad_norm": 0.5280048467482541, "learning_rate": 2.2822384428223845e-05, "loss": 0.5872, "step": 20181 }, { "epoch": 0.5892382704154623, "grad_norm": 0.5078616326731294, "learning_rate": 2.2820762368207624e-05, "loss": 0.542, "step": 20182 }, { "epoch": 0.5892674666433096, "grad_norm": 0.5010313649082264, "learning_rate": 2.2819140308191402e-05, "loss": 0.5407, "step": 20183 }, { "epoch": 0.589296662871157, "grad_norm": 0.44686135670414606, "learning_rate": 2.2817518248175184e-05, "loss": 0.5087, "step": 20184 }, { "epoch": 0.5893258590990044, "grad_norm": 0.5463177920321036, "learning_rate": 2.2815896188158962e-05, "loss": 0.6539, "step": 20185 }, { "epoch": 0.5893550553268517, "grad_norm": 0.5097116397392454, "learning_rate": 2.2814274128142744e-05, "loss": 0.6111, "step": 20186 }, { "epoch": 0.5893842515546991, "grad_norm": 0.5183089125501731, "learning_rate": 2.2812652068126522e-05, "loss": 0.6197, "step": 20187 }, { "epoch": 0.5894134477825465, "grad_norm": 0.5151795539149098, "learning_rate": 2.28110300081103e-05, "loss": 0.6041, "step": 20188 }, { "epoch": 0.5894426440103938, "grad_norm": 0.5504900770885655, "learning_rate": 2.280940794809408e-05, "loss": 0.6839, "step": 20189 }, { "epoch": 0.5894718402382412, "grad_norm": 0.49171314863274457, "learning_rate": 2.280778588807786e-05, "loss": 0.5602, "step": 20190 }, { "epoch": 0.5895010364660885, "grad_norm": 0.4732243311289911, "learning_rate": 2.280616382806164e-05, "loss": 0.5135, "step": 20191 }, { "epoch": 0.5895302326939359, "grad_norm": 0.5550615363888796, "learning_rate": 2.2804541768045418e-05, "loss": 0.6522, "step": 20192 }, { "epoch": 0.5895594289217833, "grad_norm": 0.5119844158823689, "learning_rate": 2.28029197080292e-05, "loss": 0.5677, "step": 20193 }, { "epoch": 0.5895886251496306, "grad_norm": 0.5459772597210459, "learning_rate": 2.2801297648012978e-05, "loss": 0.6814, "step": 20194 }, { "epoch": 0.589617821377478, "grad_norm": 0.49703176372860075, "learning_rate": 2.2799675587996756e-05, "loss": 0.5512, "step": 20195 }, { "epoch": 0.5896470176053253, "grad_norm": 0.5086217283013642, "learning_rate": 2.2798053527980538e-05, "loss": 0.6165, "step": 20196 }, { "epoch": 0.5896762138331727, "grad_norm": 0.5499065416568761, "learning_rate": 2.2796431467964316e-05, "loss": 0.6747, "step": 20197 }, { "epoch": 0.5897054100610201, "grad_norm": 0.4984523152331141, "learning_rate": 2.2794809407948095e-05, "loss": 0.5816, "step": 20198 }, { "epoch": 0.5897346062888674, "grad_norm": 0.5459732552602637, "learning_rate": 2.2793187347931873e-05, "loss": 0.6431, "step": 20199 }, { "epoch": 0.5897638025167148, "grad_norm": 0.48608732716940806, "learning_rate": 2.2791565287915655e-05, "loss": 0.5424, "step": 20200 }, { "epoch": 0.5897929987445623, "grad_norm": 0.500459499287976, "learning_rate": 2.2789943227899433e-05, "loss": 0.5731, "step": 20201 }, { "epoch": 0.5898221949724096, "grad_norm": 0.4803313573849537, "learning_rate": 2.278832116788321e-05, "loss": 0.5405, "step": 20202 }, { "epoch": 0.589851391200257, "grad_norm": 0.5648348759976625, "learning_rate": 2.2786699107866993e-05, "loss": 0.6204, "step": 20203 }, { "epoch": 0.5898805874281043, "grad_norm": 0.5429419584907956, "learning_rate": 2.2785077047850772e-05, "loss": 0.5608, "step": 20204 }, { "epoch": 0.5899097836559517, "grad_norm": 0.5341349963381342, "learning_rate": 2.278345498783455e-05, "loss": 0.6021, "step": 20205 }, { "epoch": 0.5899389798837991, "grad_norm": 0.5686393587182649, "learning_rate": 2.2781832927818332e-05, "loss": 0.6294, "step": 20206 }, { "epoch": 0.5899681761116464, "grad_norm": 0.5273904292443414, "learning_rate": 2.278021086780211e-05, "loss": 0.5781, "step": 20207 }, { "epoch": 0.5899973723394938, "grad_norm": 0.5474495364698548, "learning_rate": 2.277858880778589e-05, "loss": 0.6554, "step": 20208 }, { "epoch": 0.5900265685673411, "grad_norm": 0.49246264552892405, "learning_rate": 2.2776966747769667e-05, "loss": 0.5442, "step": 20209 }, { "epoch": 0.5900557647951885, "grad_norm": 0.5291454263247091, "learning_rate": 2.2775344687753445e-05, "loss": 0.5654, "step": 20210 }, { "epoch": 0.5900849610230359, "grad_norm": 0.5227004151031062, "learning_rate": 2.2773722627737227e-05, "loss": 0.6153, "step": 20211 }, { "epoch": 0.5901141572508832, "grad_norm": 0.5270806272714179, "learning_rate": 2.277210056772101e-05, "loss": 0.6218, "step": 20212 }, { "epoch": 0.5901433534787306, "grad_norm": 0.6065991806080573, "learning_rate": 2.2770478507704787e-05, "loss": 0.6387, "step": 20213 }, { "epoch": 0.590172549706578, "grad_norm": 0.5065264627356834, "learning_rate": 2.2768856447688566e-05, "loss": 0.5459, "step": 20214 }, { "epoch": 0.5902017459344253, "grad_norm": 0.5546060471579951, "learning_rate": 2.2767234387672344e-05, "loss": 0.6238, "step": 20215 }, { "epoch": 0.5902309421622727, "grad_norm": 0.4783335796923275, "learning_rate": 2.2765612327656126e-05, "loss": 0.5494, "step": 20216 }, { "epoch": 0.59026013839012, "grad_norm": 0.5654050424509388, "learning_rate": 2.2763990267639904e-05, "loss": 0.7129, "step": 20217 }, { "epoch": 0.5902893346179674, "grad_norm": 0.515324614763358, "learning_rate": 2.2762368207623683e-05, "loss": 0.5717, "step": 20218 }, { "epoch": 0.5903185308458148, "grad_norm": 0.5095062735419139, "learning_rate": 2.276074614760746e-05, "loss": 0.5482, "step": 20219 }, { "epoch": 0.5903477270736621, "grad_norm": 0.4904801014294124, "learning_rate": 2.275912408759124e-05, "loss": 0.5617, "step": 20220 }, { "epoch": 0.5903769233015095, "grad_norm": 0.5820183228158182, "learning_rate": 2.275750202757502e-05, "loss": 0.6566, "step": 20221 }, { "epoch": 0.5904061195293568, "grad_norm": 0.5298457539582557, "learning_rate": 2.2755879967558803e-05, "loss": 0.5799, "step": 20222 }, { "epoch": 0.5904353157572042, "grad_norm": 0.5116733040200173, "learning_rate": 2.275425790754258e-05, "loss": 0.5735, "step": 20223 }, { "epoch": 0.5904645119850516, "grad_norm": 0.5122428643954758, "learning_rate": 2.275263584752636e-05, "loss": 0.5817, "step": 20224 }, { "epoch": 0.5904937082128989, "grad_norm": 0.5421662690552159, "learning_rate": 2.2751013787510138e-05, "loss": 0.6298, "step": 20225 }, { "epoch": 0.5905229044407463, "grad_norm": 0.4999346512961375, "learning_rate": 2.274939172749392e-05, "loss": 0.5728, "step": 20226 }, { "epoch": 0.5905521006685936, "grad_norm": 0.5096124050856048, "learning_rate": 2.2747769667477698e-05, "loss": 0.5977, "step": 20227 }, { "epoch": 0.590581296896441, "grad_norm": 0.4948736163011401, "learning_rate": 2.2746147607461476e-05, "loss": 0.552, "step": 20228 }, { "epoch": 0.5906104931242884, "grad_norm": 0.5278789321127784, "learning_rate": 2.2744525547445255e-05, "loss": 0.5878, "step": 20229 }, { "epoch": 0.5906396893521357, "grad_norm": 0.5107605713228898, "learning_rate": 2.2742903487429033e-05, "loss": 0.5895, "step": 20230 }, { "epoch": 0.5906688855799831, "grad_norm": 0.5596184444823803, "learning_rate": 2.2741281427412815e-05, "loss": 0.6606, "step": 20231 }, { "epoch": 0.5906980818078305, "grad_norm": 0.5035546105057935, "learning_rate": 2.2739659367396597e-05, "loss": 0.5739, "step": 20232 }, { "epoch": 0.5907272780356778, "grad_norm": 0.5167323673903788, "learning_rate": 2.2738037307380375e-05, "loss": 0.6258, "step": 20233 }, { "epoch": 0.5907564742635252, "grad_norm": 0.47572958258461373, "learning_rate": 2.2736415247364153e-05, "loss": 0.5249, "step": 20234 }, { "epoch": 0.5907856704913725, "grad_norm": 0.5223562047528618, "learning_rate": 2.2734793187347932e-05, "loss": 0.6275, "step": 20235 }, { "epoch": 0.5908148667192199, "grad_norm": 0.5547110185805356, "learning_rate": 2.2733171127331714e-05, "loss": 0.6843, "step": 20236 }, { "epoch": 0.5908440629470673, "grad_norm": 0.4676995780959615, "learning_rate": 2.2731549067315492e-05, "loss": 0.508, "step": 20237 }, { "epoch": 0.5908732591749146, "grad_norm": 0.4951690686707588, "learning_rate": 2.272992700729927e-05, "loss": 0.5717, "step": 20238 }, { "epoch": 0.590902455402762, "grad_norm": 0.5292895741253513, "learning_rate": 2.272830494728305e-05, "loss": 0.5321, "step": 20239 }, { "epoch": 0.5909316516306093, "grad_norm": 0.5099102441367245, "learning_rate": 2.2726682887266827e-05, "loss": 0.628, "step": 20240 }, { "epoch": 0.5909608478584567, "grad_norm": 0.5178566143756835, "learning_rate": 2.272506082725061e-05, "loss": 0.6016, "step": 20241 }, { "epoch": 0.5909900440863041, "grad_norm": 0.5976656714793993, "learning_rate": 2.272343876723439e-05, "loss": 0.6157, "step": 20242 }, { "epoch": 0.5910192403141514, "grad_norm": 0.4671646984205289, "learning_rate": 2.272181670721817e-05, "loss": 0.4869, "step": 20243 }, { "epoch": 0.5910484365419988, "grad_norm": 0.720967433071394, "learning_rate": 2.2720194647201947e-05, "loss": 0.6418, "step": 20244 }, { "epoch": 0.5910776327698462, "grad_norm": 0.5520578447576575, "learning_rate": 2.2718572587185726e-05, "loss": 0.6595, "step": 20245 }, { "epoch": 0.5911068289976935, "grad_norm": 0.5176285127864118, "learning_rate": 2.2716950527169508e-05, "loss": 0.5616, "step": 20246 }, { "epoch": 0.5911360252255409, "grad_norm": 0.4782568314068064, "learning_rate": 2.2715328467153286e-05, "loss": 0.5054, "step": 20247 }, { "epoch": 0.5911652214533882, "grad_norm": 0.5655257225601091, "learning_rate": 2.2713706407137064e-05, "loss": 0.6945, "step": 20248 }, { "epoch": 0.5911944176812356, "grad_norm": 0.5126378604754577, "learning_rate": 2.2712084347120843e-05, "loss": 0.595, "step": 20249 }, { "epoch": 0.591223613909083, "grad_norm": 0.5246251102934322, "learning_rate": 2.2710462287104624e-05, "loss": 0.5913, "step": 20250 }, { "epoch": 0.5912528101369303, "grad_norm": 0.47691461616356595, "learning_rate": 2.2708840227088403e-05, "loss": 0.54, "step": 20251 }, { "epoch": 0.5912820063647777, "grad_norm": 0.550396077544554, "learning_rate": 2.2707218167072185e-05, "loss": 0.6554, "step": 20252 }, { "epoch": 0.591311202592625, "grad_norm": 0.532602970248922, "learning_rate": 2.2705596107055963e-05, "loss": 0.6018, "step": 20253 }, { "epoch": 0.5913403988204724, "grad_norm": 0.48490397219551096, "learning_rate": 2.270397404703974e-05, "loss": 0.5626, "step": 20254 }, { "epoch": 0.5913695950483198, "grad_norm": 0.5320102706226596, "learning_rate": 2.270235198702352e-05, "loss": 0.5914, "step": 20255 }, { "epoch": 0.5913987912761671, "grad_norm": 0.5490708648586804, "learning_rate": 2.27007299270073e-05, "loss": 0.6246, "step": 20256 }, { "epoch": 0.5914279875040145, "grad_norm": 0.501214263801494, "learning_rate": 2.269910786699108e-05, "loss": 0.5628, "step": 20257 }, { "epoch": 0.5914571837318618, "grad_norm": 0.49652859409095434, "learning_rate": 2.2697485806974858e-05, "loss": 0.551, "step": 20258 }, { "epoch": 0.5914863799597092, "grad_norm": 0.5395246692080063, "learning_rate": 2.269586374695864e-05, "loss": 0.6219, "step": 20259 }, { "epoch": 0.5915155761875566, "grad_norm": 0.5000183732625388, "learning_rate": 2.269424168694242e-05, "loss": 0.5783, "step": 20260 }, { "epoch": 0.5915447724154039, "grad_norm": 0.4769933892661702, "learning_rate": 2.2692619626926197e-05, "loss": 0.5195, "step": 20261 }, { "epoch": 0.5915739686432513, "grad_norm": 0.4747954917006689, "learning_rate": 2.269099756690998e-05, "loss": 0.5216, "step": 20262 }, { "epoch": 0.5916031648710987, "grad_norm": 0.5191779093428478, "learning_rate": 2.2689375506893757e-05, "loss": 0.5833, "step": 20263 }, { "epoch": 0.591632361098946, "grad_norm": 0.5816761284326638, "learning_rate": 2.2687753446877535e-05, "loss": 0.7357, "step": 20264 }, { "epoch": 0.5916615573267934, "grad_norm": 0.4872616366933158, "learning_rate": 2.2686131386861314e-05, "loss": 0.5539, "step": 20265 }, { "epoch": 0.5916907535546407, "grad_norm": 0.5313372743279339, "learning_rate": 2.2684509326845095e-05, "loss": 0.6181, "step": 20266 }, { "epoch": 0.5917199497824881, "grad_norm": 0.502747425592923, "learning_rate": 2.2682887266828874e-05, "loss": 0.5473, "step": 20267 }, { "epoch": 0.5917491460103355, "grad_norm": 0.5576917281441967, "learning_rate": 2.2681265206812652e-05, "loss": 0.6315, "step": 20268 }, { "epoch": 0.5917783422381828, "grad_norm": 0.47975289809988836, "learning_rate": 2.2679643146796434e-05, "loss": 0.5342, "step": 20269 }, { "epoch": 0.5918075384660302, "grad_norm": 0.4978715480975633, "learning_rate": 2.2678021086780212e-05, "loss": 0.5295, "step": 20270 }, { "epoch": 0.5918367346938775, "grad_norm": 0.568342122508607, "learning_rate": 2.267639902676399e-05, "loss": 0.6886, "step": 20271 }, { "epoch": 0.5918659309217249, "grad_norm": 0.513395761650052, "learning_rate": 2.2674776966747772e-05, "loss": 0.6082, "step": 20272 }, { "epoch": 0.5918951271495723, "grad_norm": 0.5280636982391033, "learning_rate": 2.267315490673155e-05, "loss": 0.5859, "step": 20273 }, { "epoch": 0.5919243233774196, "grad_norm": 0.5104366411995843, "learning_rate": 2.267153284671533e-05, "loss": 0.5791, "step": 20274 }, { "epoch": 0.591953519605267, "grad_norm": 0.5450766250383545, "learning_rate": 2.2669910786699107e-05, "loss": 0.662, "step": 20275 }, { "epoch": 0.5919827158331143, "grad_norm": 0.48034319300844047, "learning_rate": 2.2668288726682886e-05, "loss": 0.5432, "step": 20276 }, { "epoch": 0.5920119120609617, "grad_norm": 0.5082260964298002, "learning_rate": 2.2666666666666668e-05, "loss": 0.5703, "step": 20277 }, { "epoch": 0.5920411082888091, "grad_norm": 0.46419583349565924, "learning_rate": 2.266504460665045e-05, "loss": 0.4782, "step": 20278 }, { "epoch": 0.5920703045166564, "grad_norm": 0.5556737240591518, "learning_rate": 2.2663422546634228e-05, "loss": 0.6534, "step": 20279 }, { "epoch": 0.5920995007445038, "grad_norm": 0.4945973873321509, "learning_rate": 2.2661800486618006e-05, "loss": 0.549, "step": 20280 }, { "epoch": 0.5921286969723512, "grad_norm": 0.5364636394386689, "learning_rate": 2.2660178426601785e-05, "loss": 0.5937, "step": 20281 }, { "epoch": 0.5921578932001985, "grad_norm": 0.525669926858206, "learning_rate": 2.2658556366585566e-05, "loss": 0.6177, "step": 20282 }, { "epoch": 0.5921870894280459, "grad_norm": 0.5693469782762904, "learning_rate": 2.2656934306569345e-05, "loss": 0.6957, "step": 20283 }, { "epoch": 0.5922162856558932, "grad_norm": 0.5464337410078579, "learning_rate": 2.2655312246553123e-05, "loss": 0.6543, "step": 20284 }, { "epoch": 0.5922454818837406, "grad_norm": 0.4713637705966615, "learning_rate": 2.26536901865369e-05, "loss": 0.5502, "step": 20285 }, { "epoch": 0.592274678111588, "grad_norm": 0.5653360023445133, "learning_rate": 2.265206812652068e-05, "loss": 0.6534, "step": 20286 }, { "epoch": 0.5923038743394353, "grad_norm": 0.5589515441120455, "learning_rate": 2.265044606650446e-05, "loss": 0.6313, "step": 20287 }, { "epoch": 0.5923330705672827, "grad_norm": 0.565006460738224, "learning_rate": 2.2648824006488243e-05, "loss": 0.6585, "step": 20288 }, { "epoch": 0.59236226679513, "grad_norm": 0.5375716338237913, "learning_rate": 2.264720194647202e-05, "loss": 0.6391, "step": 20289 }, { "epoch": 0.5923914630229774, "grad_norm": 0.5121286409789059, "learning_rate": 2.26455798864558e-05, "loss": 0.6077, "step": 20290 }, { "epoch": 0.5924206592508248, "grad_norm": 0.4986692258832714, "learning_rate": 2.264395782643958e-05, "loss": 0.5616, "step": 20291 }, { "epoch": 0.5924498554786721, "grad_norm": 0.5130318120938452, "learning_rate": 2.264233576642336e-05, "loss": 0.606, "step": 20292 }, { "epoch": 0.5924790517065195, "grad_norm": 0.5028658605777244, "learning_rate": 2.264071370640714e-05, "loss": 0.5684, "step": 20293 }, { "epoch": 0.5925082479343668, "grad_norm": 0.7321724038905247, "learning_rate": 2.2639091646390917e-05, "loss": 0.6524, "step": 20294 }, { "epoch": 0.5925374441622142, "grad_norm": 0.49952468749631596, "learning_rate": 2.2637469586374695e-05, "loss": 0.5497, "step": 20295 }, { "epoch": 0.5925666403900616, "grad_norm": 0.5377927846834114, "learning_rate": 2.2635847526358474e-05, "loss": 0.5983, "step": 20296 }, { "epoch": 0.5925958366179089, "grad_norm": 0.5250541080922049, "learning_rate": 2.2634225466342255e-05, "loss": 0.6385, "step": 20297 }, { "epoch": 0.5926250328457563, "grad_norm": 0.48070397166346124, "learning_rate": 2.2632603406326037e-05, "loss": 0.5337, "step": 20298 }, { "epoch": 0.5926542290736037, "grad_norm": 0.49539332261330665, "learning_rate": 2.2630981346309816e-05, "loss": 0.546, "step": 20299 }, { "epoch": 0.592683425301451, "grad_norm": 0.47805955128690125, "learning_rate": 2.2629359286293594e-05, "loss": 0.549, "step": 20300 }, { "epoch": 0.5927126215292984, "grad_norm": 0.5001634053500298, "learning_rate": 2.2627737226277372e-05, "loss": 0.5192, "step": 20301 }, { "epoch": 0.5927418177571457, "grad_norm": 0.5237386710514805, "learning_rate": 2.2626115166261154e-05, "loss": 0.5862, "step": 20302 }, { "epoch": 0.5927710139849931, "grad_norm": 0.5825823446119883, "learning_rate": 2.2624493106244932e-05, "loss": 0.7019, "step": 20303 }, { "epoch": 0.5928002102128405, "grad_norm": 0.5389675885609593, "learning_rate": 2.262287104622871e-05, "loss": 0.6395, "step": 20304 }, { "epoch": 0.5928294064406878, "grad_norm": 0.509925076850436, "learning_rate": 2.262124898621249e-05, "loss": 0.55, "step": 20305 }, { "epoch": 0.5928586026685352, "grad_norm": 0.5420813911712844, "learning_rate": 2.2619626926196268e-05, "loss": 0.6275, "step": 20306 }, { "epoch": 0.5928877988963825, "grad_norm": 0.5464175923513314, "learning_rate": 2.261800486618005e-05, "loss": 0.6394, "step": 20307 }, { "epoch": 0.5929169951242299, "grad_norm": 0.5221208108221994, "learning_rate": 2.261638280616383e-05, "loss": 0.5732, "step": 20308 }, { "epoch": 0.5929461913520773, "grad_norm": 0.4903052870634506, "learning_rate": 2.261476074614761e-05, "loss": 0.5635, "step": 20309 }, { "epoch": 0.5929753875799246, "grad_norm": 0.5399332602830433, "learning_rate": 2.2613138686131388e-05, "loss": 0.573, "step": 20310 }, { "epoch": 0.593004583807772, "grad_norm": 0.5511806367703624, "learning_rate": 2.2611516626115166e-05, "loss": 0.6044, "step": 20311 }, { "epoch": 0.5930337800356194, "grad_norm": 0.5297424022933416, "learning_rate": 2.2609894566098948e-05, "loss": 0.6239, "step": 20312 }, { "epoch": 0.5930629762634667, "grad_norm": 0.5826717063571963, "learning_rate": 2.2608272506082726e-05, "loss": 0.5951, "step": 20313 }, { "epoch": 0.5930921724913141, "grad_norm": 0.5266360816596806, "learning_rate": 2.2606650446066505e-05, "loss": 0.5882, "step": 20314 }, { "epoch": 0.5931213687191614, "grad_norm": 0.4788875294472101, "learning_rate": 2.2605028386050283e-05, "loss": 0.5121, "step": 20315 }, { "epoch": 0.5931505649470088, "grad_norm": 0.5006332012150723, "learning_rate": 2.2603406326034065e-05, "loss": 0.5381, "step": 20316 }, { "epoch": 0.5931797611748562, "grad_norm": 0.5425503565446297, "learning_rate": 2.2601784266017843e-05, "loss": 0.6576, "step": 20317 }, { "epoch": 0.5932089574027035, "grad_norm": 0.5226110026913081, "learning_rate": 2.2600162206001625e-05, "loss": 0.5985, "step": 20318 }, { "epoch": 0.5932381536305509, "grad_norm": 0.6473636158415135, "learning_rate": 2.2598540145985403e-05, "loss": 0.7016, "step": 20319 }, { "epoch": 0.5932673498583982, "grad_norm": 0.5083701151020026, "learning_rate": 2.2596918085969182e-05, "loss": 0.5752, "step": 20320 }, { "epoch": 0.5932965460862456, "grad_norm": 0.5280382852533496, "learning_rate": 2.259529602595296e-05, "loss": 0.6484, "step": 20321 }, { "epoch": 0.5933257423140931, "grad_norm": 0.5201644762078892, "learning_rate": 2.2593673965936742e-05, "loss": 0.6191, "step": 20322 }, { "epoch": 0.5933549385419404, "grad_norm": 0.5027318128505571, "learning_rate": 2.259205190592052e-05, "loss": 0.6025, "step": 20323 }, { "epoch": 0.5933841347697878, "grad_norm": 0.5111119570846275, "learning_rate": 2.25904298459043e-05, "loss": 0.5534, "step": 20324 }, { "epoch": 0.5934133309976352, "grad_norm": 0.8107940932674147, "learning_rate": 2.2588807785888077e-05, "loss": 0.7617, "step": 20325 }, { "epoch": 0.5934425272254825, "grad_norm": 0.5563837813219091, "learning_rate": 2.258718572587186e-05, "loss": 0.6596, "step": 20326 }, { "epoch": 0.5934717234533299, "grad_norm": 0.4739992674353487, "learning_rate": 2.2585563665855637e-05, "loss": 0.53, "step": 20327 }, { "epoch": 0.5935009196811772, "grad_norm": 0.5347327414348455, "learning_rate": 2.258394160583942e-05, "loss": 0.6164, "step": 20328 }, { "epoch": 0.5935301159090246, "grad_norm": 0.5172606335120429, "learning_rate": 2.2582319545823197e-05, "loss": 0.5777, "step": 20329 }, { "epoch": 0.593559312136872, "grad_norm": 0.5139721492107453, "learning_rate": 2.2580697485806976e-05, "loss": 0.6047, "step": 20330 }, { "epoch": 0.5935885083647193, "grad_norm": 0.5240567010097416, "learning_rate": 2.2579075425790754e-05, "loss": 0.5855, "step": 20331 }, { "epoch": 0.5936177045925667, "grad_norm": 0.496189198344896, "learning_rate": 2.2577453365774532e-05, "loss": 0.5406, "step": 20332 }, { "epoch": 0.593646900820414, "grad_norm": 0.6150959113452414, "learning_rate": 2.2575831305758314e-05, "loss": 0.6478, "step": 20333 }, { "epoch": 0.5936760970482614, "grad_norm": 0.5292620955674536, "learning_rate": 2.2574209245742093e-05, "loss": 0.6309, "step": 20334 }, { "epoch": 0.5937052932761088, "grad_norm": 0.5377298155372382, "learning_rate": 2.2572587185725874e-05, "loss": 0.62, "step": 20335 }, { "epoch": 0.5937344895039561, "grad_norm": 0.5259092082761875, "learning_rate": 2.2570965125709653e-05, "loss": 0.6226, "step": 20336 }, { "epoch": 0.5937636857318035, "grad_norm": 0.5357332042308048, "learning_rate": 2.256934306569343e-05, "loss": 0.6174, "step": 20337 }, { "epoch": 0.5937928819596509, "grad_norm": 0.5363133716454288, "learning_rate": 2.2567721005677213e-05, "loss": 0.6337, "step": 20338 }, { "epoch": 0.5938220781874982, "grad_norm": 0.5413594467625624, "learning_rate": 2.256609894566099e-05, "loss": 0.66, "step": 20339 }, { "epoch": 0.5938512744153456, "grad_norm": 0.5055775630587757, "learning_rate": 2.256447688564477e-05, "loss": 0.5903, "step": 20340 }, { "epoch": 0.5938804706431929, "grad_norm": 0.5403168385582566, "learning_rate": 2.2562854825628548e-05, "loss": 0.6801, "step": 20341 }, { "epoch": 0.5939096668710403, "grad_norm": 0.538971983224941, "learning_rate": 2.2561232765612326e-05, "loss": 0.551, "step": 20342 }, { "epoch": 0.5939388630988877, "grad_norm": 0.5083869554285432, "learning_rate": 2.2559610705596108e-05, "loss": 0.565, "step": 20343 }, { "epoch": 0.593968059326735, "grad_norm": 0.5236967599578574, "learning_rate": 2.255798864557989e-05, "loss": 0.5974, "step": 20344 }, { "epoch": 0.5939972555545824, "grad_norm": 0.5168693174819617, "learning_rate": 2.2556366585563668e-05, "loss": 0.6183, "step": 20345 }, { "epoch": 0.5940264517824297, "grad_norm": 0.4773142693028057, "learning_rate": 2.2554744525547447e-05, "loss": 0.5543, "step": 20346 }, { "epoch": 0.5940556480102771, "grad_norm": 0.5529930083589407, "learning_rate": 2.2553122465531225e-05, "loss": 0.637, "step": 20347 }, { "epoch": 0.5940848442381245, "grad_norm": 0.48361413846826923, "learning_rate": 2.2551500405515007e-05, "loss": 0.5616, "step": 20348 }, { "epoch": 0.5941140404659718, "grad_norm": 0.5051504458564109, "learning_rate": 2.2549878345498785e-05, "loss": 0.6042, "step": 20349 }, { "epoch": 0.5941432366938192, "grad_norm": 0.5168617982798475, "learning_rate": 2.2548256285482563e-05, "loss": 0.6144, "step": 20350 }, { "epoch": 0.5941724329216665, "grad_norm": 0.5376493856711914, "learning_rate": 2.2546634225466342e-05, "loss": 0.6051, "step": 20351 }, { "epoch": 0.5942016291495139, "grad_norm": 0.5227678571698253, "learning_rate": 2.254501216545012e-05, "loss": 0.616, "step": 20352 }, { "epoch": 0.5942308253773613, "grad_norm": 0.5081885437657652, "learning_rate": 2.2543390105433902e-05, "loss": 0.5777, "step": 20353 }, { "epoch": 0.5942600216052086, "grad_norm": 0.5175576717349817, "learning_rate": 2.2541768045417684e-05, "loss": 0.6169, "step": 20354 }, { "epoch": 0.594289217833056, "grad_norm": 0.5399133948142072, "learning_rate": 2.2540145985401462e-05, "loss": 0.6354, "step": 20355 }, { "epoch": 0.5943184140609034, "grad_norm": 0.5650380314600513, "learning_rate": 2.253852392538524e-05, "loss": 0.7048, "step": 20356 }, { "epoch": 0.5943476102887507, "grad_norm": 0.5210808561280134, "learning_rate": 2.253690186536902e-05, "loss": 0.6281, "step": 20357 }, { "epoch": 0.5943768065165981, "grad_norm": 0.5096503101796507, "learning_rate": 2.25352798053528e-05, "loss": 0.6, "step": 20358 }, { "epoch": 0.5944060027444454, "grad_norm": 0.5141961739674901, "learning_rate": 2.253365774533658e-05, "loss": 0.5833, "step": 20359 }, { "epoch": 0.5944351989722928, "grad_norm": 0.5091848713105596, "learning_rate": 2.2532035685320357e-05, "loss": 0.607, "step": 20360 }, { "epoch": 0.5944643952001402, "grad_norm": 0.49604597319595684, "learning_rate": 2.2530413625304136e-05, "loss": 0.5422, "step": 20361 }, { "epoch": 0.5944935914279875, "grad_norm": 0.4792893080726025, "learning_rate": 2.2528791565287914e-05, "loss": 0.5537, "step": 20362 }, { "epoch": 0.5945227876558349, "grad_norm": 0.49012006039243255, "learning_rate": 2.2527169505271696e-05, "loss": 0.5565, "step": 20363 }, { "epoch": 0.5945519838836822, "grad_norm": 0.5385823487337412, "learning_rate": 2.2525547445255478e-05, "loss": 0.6678, "step": 20364 }, { "epoch": 0.5945811801115296, "grad_norm": 0.5197410440967305, "learning_rate": 2.2523925385239256e-05, "loss": 0.6063, "step": 20365 }, { "epoch": 0.594610376339377, "grad_norm": 0.5275493099328552, "learning_rate": 2.2522303325223034e-05, "loss": 0.5827, "step": 20366 }, { "epoch": 0.5946395725672243, "grad_norm": 0.5118063322398796, "learning_rate": 2.2520681265206813e-05, "loss": 0.5611, "step": 20367 }, { "epoch": 0.5946687687950717, "grad_norm": 0.6366080738523701, "learning_rate": 2.2519059205190595e-05, "loss": 0.7241, "step": 20368 }, { "epoch": 0.594697965022919, "grad_norm": 0.5156101742580604, "learning_rate": 2.2517437145174373e-05, "loss": 0.6405, "step": 20369 }, { "epoch": 0.5947271612507664, "grad_norm": 0.5283829103937745, "learning_rate": 2.251581508515815e-05, "loss": 0.6419, "step": 20370 }, { "epoch": 0.5947563574786138, "grad_norm": 0.5447507153461588, "learning_rate": 2.251419302514193e-05, "loss": 0.6607, "step": 20371 }, { "epoch": 0.5947855537064611, "grad_norm": 0.5511974535786958, "learning_rate": 2.2512570965125708e-05, "loss": 0.6544, "step": 20372 }, { "epoch": 0.5948147499343085, "grad_norm": 0.506155194463937, "learning_rate": 2.251094890510949e-05, "loss": 0.6102, "step": 20373 }, { "epoch": 0.5948439461621559, "grad_norm": 0.541828554575844, "learning_rate": 2.250932684509327e-05, "loss": 0.6502, "step": 20374 }, { "epoch": 0.5948731423900032, "grad_norm": 0.4823131653712365, "learning_rate": 2.250770478507705e-05, "loss": 0.5423, "step": 20375 }, { "epoch": 0.5949023386178506, "grad_norm": 0.5292756080467764, "learning_rate": 2.2506082725060828e-05, "loss": 0.6027, "step": 20376 }, { "epoch": 0.5949315348456979, "grad_norm": 0.52682728621398, "learning_rate": 2.2504460665044607e-05, "loss": 0.5495, "step": 20377 }, { "epoch": 0.5949607310735453, "grad_norm": 0.5487120207798439, "learning_rate": 2.250283860502839e-05, "loss": 0.6496, "step": 20378 }, { "epoch": 0.5949899273013927, "grad_norm": 0.5287913292927331, "learning_rate": 2.2501216545012167e-05, "loss": 0.6104, "step": 20379 }, { "epoch": 0.59501912352924, "grad_norm": 0.5459320333796821, "learning_rate": 2.2499594484995945e-05, "loss": 0.6242, "step": 20380 }, { "epoch": 0.5950483197570874, "grad_norm": 0.5583423566881528, "learning_rate": 2.2497972424979724e-05, "loss": 0.6383, "step": 20381 }, { "epoch": 0.5950775159849347, "grad_norm": 0.5100380684912281, "learning_rate": 2.2496350364963505e-05, "loss": 0.5813, "step": 20382 }, { "epoch": 0.5951067122127821, "grad_norm": 0.4980425560583614, "learning_rate": 2.2494728304947284e-05, "loss": 0.589, "step": 20383 }, { "epoch": 0.5951359084406295, "grad_norm": 0.5256689253096658, "learning_rate": 2.2493106244931065e-05, "loss": 0.6166, "step": 20384 }, { "epoch": 0.5951651046684768, "grad_norm": 0.49960974788223156, "learning_rate": 2.2491484184914844e-05, "loss": 0.5258, "step": 20385 }, { "epoch": 0.5951943008963242, "grad_norm": 0.5342018867053913, "learning_rate": 2.2489862124898622e-05, "loss": 0.6088, "step": 20386 }, { "epoch": 0.5952234971241716, "grad_norm": 0.568800042922361, "learning_rate": 2.24882400648824e-05, "loss": 0.6194, "step": 20387 }, { "epoch": 0.5952526933520189, "grad_norm": 0.5389817327665283, "learning_rate": 2.2486618004866182e-05, "loss": 0.598, "step": 20388 }, { "epoch": 0.5952818895798663, "grad_norm": 0.49719666649427935, "learning_rate": 2.248499594484996e-05, "loss": 0.5213, "step": 20389 }, { "epoch": 0.5953110858077136, "grad_norm": 0.515362734049075, "learning_rate": 2.248337388483374e-05, "loss": 0.589, "step": 20390 }, { "epoch": 0.595340282035561, "grad_norm": 0.49473386855485973, "learning_rate": 2.2481751824817517e-05, "loss": 0.5735, "step": 20391 }, { "epoch": 0.5953694782634084, "grad_norm": 0.5257119442379573, "learning_rate": 2.24801297648013e-05, "loss": 0.6263, "step": 20392 }, { "epoch": 0.5953986744912557, "grad_norm": 0.5507575041816732, "learning_rate": 2.2478507704785078e-05, "loss": 0.6702, "step": 20393 }, { "epoch": 0.5954278707191031, "grad_norm": 0.5249590652852343, "learning_rate": 2.247688564476886e-05, "loss": 0.6377, "step": 20394 }, { "epoch": 0.5954570669469504, "grad_norm": 0.5577805394100821, "learning_rate": 2.2475263584752638e-05, "loss": 0.6349, "step": 20395 }, { "epoch": 0.5954862631747978, "grad_norm": 0.5345322289139167, "learning_rate": 2.2473641524736416e-05, "loss": 0.6349, "step": 20396 }, { "epoch": 0.5955154594026452, "grad_norm": 0.47750897033061546, "learning_rate": 2.2472019464720194e-05, "loss": 0.524, "step": 20397 }, { "epoch": 0.5955446556304925, "grad_norm": 0.5379136382378154, "learning_rate": 2.2470397404703973e-05, "loss": 0.6058, "step": 20398 }, { "epoch": 0.5955738518583399, "grad_norm": 0.5090939749076219, "learning_rate": 2.2468775344687755e-05, "loss": 0.5942, "step": 20399 }, { "epoch": 0.5956030480861872, "grad_norm": 0.5810584669120502, "learning_rate": 2.2467153284671533e-05, "loss": 0.7382, "step": 20400 }, { "epoch": 0.5956322443140346, "grad_norm": 0.5229939340516043, "learning_rate": 2.2465531224655315e-05, "loss": 0.5762, "step": 20401 }, { "epoch": 0.595661440541882, "grad_norm": 0.5676916402620247, "learning_rate": 2.2463909164639093e-05, "loss": 0.711, "step": 20402 }, { "epoch": 0.5956906367697293, "grad_norm": 0.4820219747047877, "learning_rate": 2.246228710462287e-05, "loss": 0.5151, "step": 20403 }, { "epoch": 0.5957198329975767, "grad_norm": 0.5857665614939106, "learning_rate": 2.2460665044606653e-05, "loss": 0.6388, "step": 20404 }, { "epoch": 0.595749029225424, "grad_norm": 0.548219929000656, "learning_rate": 2.245904298459043e-05, "loss": 0.6763, "step": 20405 }, { "epoch": 0.5957782254532714, "grad_norm": 0.5682783598748168, "learning_rate": 2.245742092457421e-05, "loss": 0.6894, "step": 20406 }, { "epoch": 0.5958074216811188, "grad_norm": 0.5522528645393456, "learning_rate": 2.245579886455799e-05, "loss": 0.6424, "step": 20407 }, { "epoch": 0.5958366179089661, "grad_norm": 0.5370440016347084, "learning_rate": 2.2454176804541767e-05, "loss": 0.5927, "step": 20408 }, { "epoch": 0.5958658141368135, "grad_norm": 0.5130200581298531, "learning_rate": 2.245255474452555e-05, "loss": 0.5569, "step": 20409 }, { "epoch": 0.5958950103646609, "grad_norm": 0.5619156177656817, "learning_rate": 2.245093268450933e-05, "loss": 0.737, "step": 20410 }, { "epoch": 0.5959242065925082, "grad_norm": 0.5098325778060513, "learning_rate": 2.244931062449311e-05, "loss": 0.5848, "step": 20411 }, { "epoch": 0.5959534028203556, "grad_norm": 0.5356829352490807, "learning_rate": 2.2447688564476887e-05, "loss": 0.5948, "step": 20412 }, { "epoch": 0.5959825990482029, "grad_norm": 0.5010657687735116, "learning_rate": 2.2446066504460665e-05, "loss": 0.5024, "step": 20413 }, { "epoch": 0.5960117952760503, "grad_norm": 0.6154993049463757, "learning_rate": 2.2444444444444447e-05, "loss": 0.716, "step": 20414 }, { "epoch": 0.5960409915038977, "grad_norm": 0.5258410830459065, "learning_rate": 2.2442822384428226e-05, "loss": 0.6146, "step": 20415 }, { "epoch": 0.596070187731745, "grad_norm": 0.4850631900469161, "learning_rate": 2.2441200324412004e-05, "loss": 0.5294, "step": 20416 }, { "epoch": 0.5960993839595924, "grad_norm": 0.5257082099010006, "learning_rate": 2.2439578264395782e-05, "loss": 0.6134, "step": 20417 }, { "epoch": 0.5961285801874397, "grad_norm": 0.5463868874276107, "learning_rate": 2.243795620437956e-05, "loss": 0.5527, "step": 20418 }, { "epoch": 0.5961577764152871, "grad_norm": 0.5356552238545416, "learning_rate": 2.2436334144363342e-05, "loss": 0.6748, "step": 20419 }, { "epoch": 0.5961869726431345, "grad_norm": 0.5548165612997127, "learning_rate": 2.2434712084347124e-05, "loss": 0.6465, "step": 20420 }, { "epoch": 0.5962161688709818, "grad_norm": 0.5590451748413426, "learning_rate": 2.2433090024330903e-05, "loss": 0.6639, "step": 20421 }, { "epoch": 0.5962453650988292, "grad_norm": 0.5201559699039068, "learning_rate": 2.243146796431468e-05, "loss": 0.6052, "step": 20422 }, { "epoch": 0.5962745613266766, "grad_norm": 0.48721749440872814, "learning_rate": 2.242984590429846e-05, "loss": 0.5228, "step": 20423 }, { "epoch": 0.5963037575545239, "grad_norm": 0.5070104292174157, "learning_rate": 2.242822384428224e-05, "loss": 0.6072, "step": 20424 }, { "epoch": 0.5963329537823713, "grad_norm": 0.5419527170078935, "learning_rate": 2.242660178426602e-05, "loss": 0.6257, "step": 20425 }, { "epoch": 0.5963621500102186, "grad_norm": 0.5328564500641324, "learning_rate": 2.2424979724249798e-05, "loss": 0.6362, "step": 20426 }, { "epoch": 0.596391346238066, "grad_norm": 0.5552376421895431, "learning_rate": 2.2423357664233576e-05, "loss": 0.6041, "step": 20427 }, { "epoch": 0.5964205424659134, "grad_norm": 0.5261344244689047, "learning_rate": 2.2421735604217355e-05, "loss": 0.6265, "step": 20428 }, { "epoch": 0.5964497386937607, "grad_norm": 0.5019847785162472, "learning_rate": 2.2420113544201136e-05, "loss": 0.5879, "step": 20429 }, { "epoch": 0.5964789349216081, "grad_norm": 0.539462728321786, "learning_rate": 2.2418491484184918e-05, "loss": 0.6695, "step": 20430 }, { "epoch": 0.5965081311494554, "grad_norm": 0.5035767550125151, "learning_rate": 2.2416869424168696e-05, "loss": 0.5817, "step": 20431 }, { "epoch": 0.5965373273773028, "grad_norm": 0.5183316794464025, "learning_rate": 2.2415247364152475e-05, "loss": 0.611, "step": 20432 }, { "epoch": 0.5965665236051502, "grad_norm": 0.49019231308904987, "learning_rate": 2.2413625304136253e-05, "loss": 0.5527, "step": 20433 }, { "epoch": 0.5965957198329975, "grad_norm": 0.5234463653935613, "learning_rate": 2.2412003244120035e-05, "loss": 0.6493, "step": 20434 }, { "epoch": 0.5966249160608449, "grad_norm": 0.5164493042745129, "learning_rate": 2.2410381184103813e-05, "loss": 0.6119, "step": 20435 }, { "epoch": 0.5966541122886923, "grad_norm": 0.5543336883818581, "learning_rate": 2.2408759124087592e-05, "loss": 0.6788, "step": 20436 }, { "epoch": 0.5966833085165396, "grad_norm": 0.5213139086488499, "learning_rate": 2.240713706407137e-05, "loss": 0.6289, "step": 20437 }, { "epoch": 0.596712504744387, "grad_norm": 0.5128559616323166, "learning_rate": 2.240551500405515e-05, "loss": 0.6004, "step": 20438 }, { "epoch": 0.5967417009722343, "grad_norm": 0.5159413955645894, "learning_rate": 2.240389294403893e-05, "loss": 0.6226, "step": 20439 }, { "epoch": 0.5967708972000817, "grad_norm": 0.5316802410316344, "learning_rate": 2.2402270884022712e-05, "loss": 0.6641, "step": 20440 }, { "epoch": 0.5968000934279291, "grad_norm": 0.4741902991658043, "learning_rate": 2.240064882400649e-05, "loss": 0.5176, "step": 20441 }, { "epoch": 0.5968292896557764, "grad_norm": 0.509503159045152, "learning_rate": 2.239902676399027e-05, "loss": 0.607, "step": 20442 }, { "epoch": 0.5968584858836239, "grad_norm": 0.5213515580507243, "learning_rate": 2.2397404703974047e-05, "loss": 0.6255, "step": 20443 }, { "epoch": 0.5968876821114713, "grad_norm": 0.555056517698247, "learning_rate": 2.239578264395783e-05, "loss": 0.685, "step": 20444 }, { "epoch": 0.5969168783393186, "grad_norm": 0.5322451180607671, "learning_rate": 2.2394160583941607e-05, "loss": 0.6368, "step": 20445 }, { "epoch": 0.596946074567166, "grad_norm": 0.5489970932749292, "learning_rate": 2.2392538523925386e-05, "loss": 0.6666, "step": 20446 }, { "epoch": 0.5969752707950133, "grad_norm": 0.48138687931571844, "learning_rate": 2.2390916463909164e-05, "loss": 0.5682, "step": 20447 }, { "epoch": 0.5970044670228607, "grad_norm": 0.5198539123357585, "learning_rate": 2.2389294403892946e-05, "loss": 0.6572, "step": 20448 }, { "epoch": 0.5970336632507081, "grad_norm": 0.48409587871603377, "learning_rate": 2.2387672343876724e-05, "loss": 0.5318, "step": 20449 }, { "epoch": 0.5970628594785554, "grad_norm": 0.5306044439817358, "learning_rate": 2.2386050283860506e-05, "loss": 0.6211, "step": 20450 }, { "epoch": 0.5970920557064028, "grad_norm": 0.5375773807196537, "learning_rate": 2.2384428223844284e-05, "loss": 0.661, "step": 20451 }, { "epoch": 0.5971212519342501, "grad_norm": 0.4982633927514836, "learning_rate": 2.2382806163828063e-05, "loss": 0.5815, "step": 20452 }, { "epoch": 0.5971504481620975, "grad_norm": 0.47654090234949675, "learning_rate": 2.238118410381184e-05, "loss": 0.5278, "step": 20453 }, { "epoch": 0.5971796443899449, "grad_norm": 0.4603749090073328, "learning_rate": 2.237956204379562e-05, "loss": 0.4763, "step": 20454 }, { "epoch": 0.5972088406177922, "grad_norm": 0.5273938506437942, "learning_rate": 2.23779399837794e-05, "loss": 0.5876, "step": 20455 }, { "epoch": 0.5972380368456396, "grad_norm": 0.5482588966901027, "learning_rate": 2.237631792376318e-05, "loss": 0.6517, "step": 20456 }, { "epoch": 0.597267233073487, "grad_norm": 0.5239580718772195, "learning_rate": 2.2374695863746958e-05, "loss": 0.5868, "step": 20457 }, { "epoch": 0.5972964293013343, "grad_norm": 0.5534750282648463, "learning_rate": 2.237307380373074e-05, "loss": 0.6246, "step": 20458 }, { "epoch": 0.5973256255291817, "grad_norm": 0.5169176718248151, "learning_rate": 2.2371451743714518e-05, "loss": 0.5997, "step": 20459 }, { "epoch": 0.597354821757029, "grad_norm": 0.5190342279914282, "learning_rate": 2.23698296836983e-05, "loss": 0.6043, "step": 20460 }, { "epoch": 0.5973840179848764, "grad_norm": 0.5447500138008776, "learning_rate": 2.2368207623682078e-05, "loss": 0.6663, "step": 20461 }, { "epoch": 0.5974132142127238, "grad_norm": 0.5430942786798407, "learning_rate": 2.2366585563665857e-05, "loss": 0.6395, "step": 20462 }, { "epoch": 0.5974424104405711, "grad_norm": 0.5428944135624854, "learning_rate": 2.2364963503649635e-05, "loss": 0.6333, "step": 20463 }, { "epoch": 0.5974716066684185, "grad_norm": 0.5523259450209248, "learning_rate": 2.2363341443633413e-05, "loss": 0.612, "step": 20464 }, { "epoch": 0.5975008028962658, "grad_norm": 0.5093277634533556, "learning_rate": 2.2361719383617195e-05, "loss": 0.5629, "step": 20465 }, { "epoch": 0.5975299991241132, "grad_norm": 0.5007718478549367, "learning_rate": 2.2360097323600973e-05, "loss": 0.5867, "step": 20466 }, { "epoch": 0.5975591953519606, "grad_norm": 0.4974191628814442, "learning_rate": 2.2358475263584755e-05, "loss": 0.5379, "step": 20467 }, { "epoch": 0.5975883915798079, "grad_norm": 0.526757075698298, "learning_rate": 2.2356853203568534e-05, "loss": 0.5941, "step": 20468 }, { "epoch": 0.5976175878076553, "grad_norm": 0.49972986823697485, "learning_rate": 2.2355231143552312e-05, "loss": 0.5793, "step": 20469 }, { "epoch": 0.5976467840355026, "grad_norm": 0.5570252683476501, "learning_rate": 2.2353609083536094e-05, "loss": 0.6475, "step": 20470 }, { "epoch": 0.59767598026335, "grad_norm": 0.5563170556116663, "learning_rate": 2.2351987023519872e-05, "loss": 0.649, "step": 20471 }, { "epoch": 0.5977051764911974, "grad_norm": 0.5197536019847298, "learning_rate": 2.235036496350365e-05, "loss": 0.5608, "step": 20472 }, { "epoch": 0.5977343727190447, "grad_norm": 0.5253354696324397, "learning_rate": 2.234874290348743e-05, "loss": 0.6152, "step": 20473 }, { "epoch": 0.5977635689468921, "grad_norm": 0.5647344282262445, "learning_rate": 2.2347120843471207e-05, "loss": 0.6324, "step": 20474 }, { "epoch": 0.5977927651747394, "grad_norm": 0.5406404240939375, "learning_rate": 2.234549878345499e-05, "loss": 0.6347, "step": 20475 }, { "epoch": 0.5978219614025868, "grad_norm": 0.5368659584139248, "learning_rate": 2.2343876723438767e-05, "loss": 0.6494, "step": 20476 }, { "epoch": 0.5978511576304342, "grad_norm": 0.47478029325386617, "learning_rate": 2.234225466342255e-05, "loss": 0.522, "step": 20477 }, { "epoch": 0.5978803538582815, "grad_norm": 0.6612521417970024, "learning_rate": 2.2340632603406327e-05, "loss": 0.6454, "step": 20478 }, { "epoch": 0.5979095500861289, "grad_norm": 0.5297096948238289, "learning_rate": 2.2339010543390106e-05, "loss": 0.6163, "step": 20479 }, { "epoch": 0.5979387463139763, "grad_norm": 0.4621830332575783, "learning_rate": 2.2337388483373888e-05, "loss": 0.5395, "step": 20480 }, { "epoch": 0.5979679425418236, "grad_norm": 0.6027485812523865, "learning_rate": 2.2335766423357666e-05, "loss": 0.6311, "step": 20481 }, { "epoch": 0.597997138769671, "grad_norm": 0.5294126481373186, "learning_rate": 2.2334144363341444e-05, "loss": 0.591, "step": 20482 }, { "epoch": 0.5980263349975183, "grad_norm": 0.524854056601559, "learning_rate": 2.2332522303325223e-05, "loss": 0.6209, "step": 20483 }, { "epoch": 0.5980555312253657, "grad_norm": 0.5102247208709385, "learning_rate": 2.2330900243309e-05, "loss": 0.5552, "step": 20484 }, { "epoch": 0.5980847274532131, "grad_norm": 0.49715764748283575, "learning_rate": 2.2329278183292783e-05, "loss": 0.5756, "step": 20485 }, { "epoch": 0.5981139236810604, "grad_norm": 0.534517812450916, "learning_rate": 2.2327656123276565e-05, "loss": 0.6425, "step": 20486 }, { "epoch": 0.5981431199089078, "grad_norm": 0.49701995960159084, "learning_rate": 2.2326034063260343e-05, "loss": 0.5219, "step": 20487 }, { "epoch": 0.5981723161367551, "grad_norm": 0.5197779422091213, "learning_rate": 2.232441200324412e-05, "loss": 0.6076, "step": 20488 }, { "epoch": 0.5982015123646025, "grad_norm": 0.548382790724794, "learning_rate": 2.23227899432279e-05, "loss": 0.6374, "step": 20489 }, { "epoch": 0.5982307085924499, "grad_norm": 0.5225707253637804, "learning_rate": 2.232116788321168e-05, "loss": 0.6269, "step": 20490 }, { "epoch": 0.5982599048202972, "grad_norm": 0.5298831106411223, "learning_rate": 2.231954582319546e-05, "loss": 0.6566, "step": 20491 }, { "epoch": 0.5982891010481446, "grad_norm": 0.5402344713574478, "learning_rate": 2.2317923763179238e-05, "loss": 0.6205, "step": 20492 }, { "epoch": 0.598318297275992, "grad_norm": 0.49342374569129727, "learning_rate": 2.2316301703163017e-05, "loss": 0.5211, "step": 20493 }, { "epoch": 0.5983474935038393, "grad_norm": 0.512142805536988, "learning_rate": 2.2314679643146795e-05, "loss": 0.559, "step": 20494 }, { "epoch": 0.5983766897316867, "grad_norm": 0.5643530199626346, "learning_rate": 2.2313057583130577e-05, "loss": 0.6852, "step": 20495 }, { "epoch": 0.598405885959534, "grad_norm": 0.568558803404427, "learning_rate": 2.231143552311436e-05, "loss": 0.6551, "step": 20496 }, { "epoch": 0.5984350821873814, "grad_norm": 0.5134127103337458, "learning_rate": 2.2309813463098137e-05, "loss": 0.6058, "step": 20497 }, { "epoch": 0.5984642784152288, "grad_norm": 0.5346965253736559, "learning_rate": 2.2308191403081915e-05, "loss": 0.6732, "step": 20498 }, { "epoch": 0.5984934746430761, "grad_norm": 0.5128655044343973, "learning_rate": 2.2306569343065694e-05, "loss": 0.6128, "step": 20499 }, { "epoch": 0.5985226708709235, "grad_norm": 0.4891306051252345, "learning_rate": 2.2304947283049475e-05, "loss": 0.5519, "step": 20500 }, { "epoch": 0.5985518670987708, "grad_norm": 0.5458942593663928, "learning_rate": 2.2303325223033254e-05, "loss": 0.6801, "step": 20501 }, { "epoch": 0.5985810633266182, "grad_norm": 0.5819658860991956, "learning_rate": 2.2301703163017032e-05, "loss": 0.6337, "step": 20502 }, { "epoch": 0.5986102595544656, "grad_norm": 0.5026224129877477, "learning_rate": 2.230008110300081e-05, "loss": 0.6043, "step": 20503 }, { "epoch": 0.5986394557823129, "grad_norm": 0.5061121582600085, "learning_rate": 2.229845904298459e-05, "loss": 0.5902, "step": 20504 }, { "epoch": 0.5986686520101603, "grad_norm": 0.5208179784378192, "learning_rate": 2.229683698296837e-05, "loss": 0.5638, "step": 20505 }, { "epoch": 0.5986978482380076, "grad_norm": 0.5499097854104772, "learning_rate": 2.2295214922952152e-05, "loss": 0.6351, "step": 20506 }, { "epoch": 0.598727044465855, "grad_norm": 0.5350718999048083, "learning_rate": 2.229359286293593e-05, "loss": 0.6481, "step": 20507 }, { "epoch": 0.5987562406937024, "grad_norm": 0.5114974571461425, "learning_rate": 2.229197080291971e-05, "loss": 0.5983, "step": 20508 }, { "epoch": 0.5987854369215497, "grad_norm": 0.5202431053386375, "learning_rate": 2.2290348742903488e-05, "loss": 0.591, "step": 20509 }, { "epoch": 0.5988146331493971, "grad_norm": 0.5061380438989386, "learning_rate": 2.228872668288727e-05, "loss": 0.5814, "step": 20510 }, { "epoch": 0.5988438293772445, "grad_norm": 0.5446674190992515, "learning_rate": 2.2287104622871048e-05, "loss": 0.6106, "step": 20511 }, { "epoch": 0.5988730256050918, "grad_norm": 0.5350008051786909, "learning_rate": 2.2285482562854826e-05, "loss": 0.6424, "step": 20512 }, { "epoch": 0.5989022218329392, "grad_norm": 0.5822188861164262, "learning_rate": 2.2283860502838604e-05, "loss": 0.711, "step": 20513 }, { "epoch": 0.5989314180607865, "grad_norm": 0.504369866566443, "learning_rate": 2.2282238442822386e-05, "loss": 0.5584, "step": 20514 }, { "epoch": 0.5989606142886339, "grad_norm": 0.5383630293382844, "learning_rate": 2.2280616382806165e-05, "loss": 0.6764, "step": 20515 }, { "epoch": 0.5989898105164813, "grad_norm": 0.5394601301861821, "learning_rate": 2.2278994322789946e-05, "loss": 0.6531, "step": 20516 }, { "epoch": 0.5990190067443286, "grad_norm": 0.5570145022108846, "learning_rate": 2.2277372262773725e-05, "loss": 0.6718, "step": 20517 }, { "epoch": 0.599048202972176, "grad_norm": 0.54871416509083, "learning_rate": 2.2275750202757503e-05, "loss": 0.6824, "step": 20518 }, { "epoch": 0.5990773992000233, "grad_norm": 0.5267890822291983, "learning_rate": 2.227412814274128e-05, "loss": 0.617, "step": 20519 }, { "epoch": 0.5991065954278707, "grad_norm": 0.5295138127004445, "learning_rate": 2.227250608272506e-05, "loss": 0.619, "step": 20520 }, { "epoch": 0.5991357916557181, "grad_norm": 0.5197576388576453, "learning_rate": 2.227088402270884e-05, "loss": 0.583, "step": 20521 }, { "epoch": 0.5991649878835654, "grad_norm": 0.5433077012994922, "learning_rate": 2.226926196269262e-05, "loss": 0.604, "step": 20522 }, { "epoch": 0.5991941841114128, "grad_norm": 0.4785927329697919, "learning_rate": 2.22676399026764e-05, "loss": 0.5098, "step": 20523 }, { "epoch": 0.5992233803392601, "grad_norm": 0.516565564956142, "learning_rate": 2.226601784266018e-05, "loss": 0.6148, "step": 20524 }, { "epoch": 0.5992525765671075, "grad_norm": 0.5391690901345613, "learning_rate": 2.226439578264396e-05, "loss": 0.6182, "step": 20525 }, { "epoch": 0.5992817727949549, "grad_norm": 0.545742380301579, "learning_rate": 2.226277372262774e-05, "loss": 0.6478, "step": 20526 }, { "epoch": 0.5993109690228022, "grad_norm": 0.5571762415669529, "learning_rate": 2.226115166261152e-05, "loss": 0.6596, "step": 20527 }, { "epoch": 0.5993401652506496, "grad_norm": 0.5026706542608735, "learning_rate": 2.2259529602595297e-05, "loss": 0.5283, "step": 20528 }, { "epoch": 0.599369361478497, "grad_norm": 0.5262001815813511, "learning_rate": 2.2257907542579075e-05, "loss": 0.6382, "step": 20529 }, { "epoch": 0.5993985577063443, "grad_norm": 0.5016796436993302, "learning_rate": 2.2256285482562854e-05, "loss": 0.5803, "step": 20530 }, { "epoch": 0.5994277539341917, "grad_norm": 0.4987609037012509, "learning_rate": 2.2254663422546635e-05, "loss": 0.5683, "step": 20531 }, { "epoch": 0.599456950162039, "grad_norm": 0.5403528456336723, "learning_rate": 2.2253041362530414e-05, "loss": 0.6141, "step": 20532 }, { "epoch": 0.5994861463898864, "grad_norm": 0.5386989437869215, "learning_rate": 2.2251419302514196e-05, "loss": 0.6589, "step": 20533 }, { "epoch": 0.5995153426177338, "grad_norm": 0.47208564314306933, "learning_rate": 2.2249797242497974e-05, "loss": 0.5099, "step": 20534 }, { "epoch": 0.5995445388455811, "grad_norm": 0.5242150710500614, "learning_rate": 2.2248175182481752e-05, "loss": 0.6068, "step": 20535 }, { "epoch": 0.5995737350734285, "grad_norm": 0.5255340613718826, "learning_rate": 2.2246553122465534e-05, "loss": 0.6456, "step": 20536 }, { "epoch": 0.5996029313012758, "grad_norm": 0.5322693111395256, "learning_rate": 2.2244931062449312e-05, "loss": 0.6245, "step": 20537 }, { "epoch": 0.5996321275291232, "grad_norm": 0.5027176513541953, "learning_rate": 2.224330900243309e-05, "loss": 0.5761, "step": 20538 }, { "epoch": 0.5996613237569706, "grad_norm": 0.5085142647565075, "learning_rate": 2.224168694241687e-05, "loss": 0.5628, "step": 20539 }, { "epoch": 0.5996905199848179, "grad_norm": 0.4977852919298976, "learning_rate": 2.2240064882400648e-05, "loss": 0.6088, "step": 20540 }, { "epoch": 0.5997197162126653, "grad_norm": 0.5375590959573733, "learning_rate": 2.223844282238443e-05, "loss": 0.6087, "step": 20541 }, { "epoch": 0.5997489124405126, "grad_norm": 0.5126145556664582, "learning_rate": 2.2236820762368208e-05, "loss": 0.604, "step": 20542 }, { "epoch": 0.59977810866836, "grad_norm": 0.5472381236286852, "learning_rate": 2.223519870235199e-05, "loss": 0.6322, "step": 20543 }, { "epoch": 0.5998073048962074, "grad_norm": 0.5618917107460945, "learning_rate": 2.2233576642335768e-05, "loss": 0.6516, "step": 20544 }, { "epoch": 0.5998365011240547, "grad_norm": 0.4833553100319435, "learning_rate": 2.2231954582319546e-05, "loss": 0.535, "step": 20545 }, { "epoch": 0.5998656973519021, "grad_norm": 0.5451753002818718, "learning_rate": 2.2230332522303328e-05, "loss": 0.6282, "step": 20546 }, { "epoch": 0.5998948935797495, "grad_norm": 0.48461992134416193, "learning_rate": 2.2228710462287106e-05, "loss": 0.5374, "step": 20547 }, { "epoch": 0.5999240898075968, "grad_norm": 0.5300836821657923, "learning_rate": 2.2227088402270885e-05, "loss": 0.6402, "step": 20548 }, { "epoch": 0.5999532860354442, "grad_norm": 0.550954659504089, "learning_rate": 2.2225466342254663e-05, "loss": 0.6598, "step": 20549 }, { "epoch": 0.5999824822632915, "grad_norm": 0.5334022862035394, "learning_rate": 2.222384428223844e-05, "loss": 0.645, "step": 20550 }, { "epoch": 0.6000116784911389, "grad_norm": 0.49062359804527117, "learning_rate": 2.2222222222222223e-05, "loss": 0.5293, "step": 20551 }, { "epoch": 0.6000408747189863, "grad_norm": 0.5152043322641664, "learning_rate": 2.2220600162206005e-05, "loss": 0.5692, "step": 20552 }, { "epoch": 0.6000700709468336, "grad_norm": 0.5105602765129864, "learning_rate": 2.2218978102189783e-05, "loss": 0.5811, "step": 20553 }, { "epoch": 0.600099267174681, "grad_norm": 0.5073583277335387, "learning_rate": 2.2217356042173562e-05, "loss": 0.6204, "step": 20554 }, { "epoch": 0.6001284634025283, "grad_norm": 0.5324915488892239, "learning_rate": 2.221573398215734e-05, "loss": 0.6082, "step": 20555 }, { "epoch": 0.6001576596303757, "grad_norm": 0.5253639138353051, "learning_rate": 2.2214111922141122e-05, "loss": 0.6059, "step": 20556 }, { "epoch": 0.6001868558582231, "grad_norm": 0.5151319019052937, "learning_rate": 2.22124898621249e-05, "loss": 0.5905, "step": 20557 }, { "epoch": 0.6002160520860704, "grad_norm": 0.5271388144632094, "learning_rate": 2.221086780210868e-05, "loss": 0.6109, "step": 20558 }, { "epoch": 0.6002452483139178, "grad_norm": 0.5088595323878128, "learning_rate": 2.2209245742092457e-05, "loss": 0.5973, "step": 20559 }, { "epoch": 0.6002744445417652, "grad_norm": 0.4688813746841555, "learning_rate": 2.2207623682076235e-05, "loss": 0.5033, "step": 20560 }, { "epoch": 0.6003036407696125, "grad_norm": 0.5275053613910687, "learning_rate": 2.2206001622060017e-05, "loss": 0.6066, "step": 20561 }, { "epoch": 0.6003328369974599, "grad_norm": 0.5119968936903657, "learning_rate": 2.22043795620438e-05, "loss": 0.601, "step": 20562 }, { "epoch": 0.6003620332253073, "grad_norm": 0.5241832385582054, "learning_rate": 2.2202757502027577e-05, "loss": 0.6274, "step": 20563 }, { "epoch": 0.6003912294531547, "grad_norm": 0.569535726887896, "learning_rate": 2.2201135442011356e-05, "loss": 0.6503, "step": 20564 }, { "epoch": 0.6004204256810021, "grad_norm": 0.5193251824722316, "learning_rate": 2.2199513381995134e-05, "loss": 0.621, "step": 20565 }, { "epoch": 0.6004496219088494, "grad_norm": 0.5159350608202296, "learning_rate": 2.2197891321978916e-05, "loss": 0.5762, "step": 20566 }, { "epoch": 0.6004788181366968, "grad_norm": 0.5219553629844725, "learning_rate": 2.2196269261962694e-05, "loss": 0.6461, "step": 20567 }, { "epoch": 0.6005080143645442, "grad_norm": 0.5516931000404336, "learning_rate": 2.2194647201946473e-05, "loss": 0.5827, "step": 20568 }, { "epoch": 0.6005372105923915, "grad_norm": 0.5233880414533834, "learning_rate": 2.219302514193025e-05, "loss": 0.5767, "step": 20569 }, { "epoch": 0.6005664068202389, "grad_norm": 0.534018435413966, "learning_rate": 2.219140308191403e-05, "loss": 0.6354, "step": 20570 }, { "epoch": 0.6005956030480862, "grad_norm": 0.47513294497969205, "learning_rate": 2.218978102189781e-05, "loss": 0.524, "step": 20571 }, { "epoch": 0.6006247992759336, "grad_norm": 0.5710101940037687, "learning_rate": 2.2188158961881593e-05, "loss": 0.6068, "step": 20572 }, { "epoch": 0.600653995503781, "grad_norm": 0.5844489907160136, "learning_rate": 2.218653690186537e-05, "loss": 0.6003, "step": 20573 }, { "epoch": 0.6006831917316283, "grad_norm": 0.5308331378266721, "learning_rate": 2.218491484184915e-05, "loss": 0.6034, "step": 20574 }, { "epoch": 0.6007123879594757, "grad_norm": 0.5279416494626752, "learning_rate": 2.2183292781832928e-05, "loss": 0.6352, "step": 20575 }, { "epoch": 0.600741584187323, "grad_norm": 0.683684870754559, "learning_rate": 2.2181670721816706e-05, "loss": 0.621, "step": 20576 }, { "epoch": 0.6007707804151704, "grad_norm": 0.5349995136887574, "learning_rate": 2.2180048661800488e-05, "loss": 0.6081, "step": 20577 }, { "epoch": 0.6007999766430178, "grad_norm": 0.5560837431605692, "learning_rate": 2.2178426601784266e-05, "loss": 0.6798, "step": 20578 }, { "epoch": 0.6008291728708651, "grad_norm": 0.5518925378302769, "learning_rate": 2.2176804541768045e-05, "loss": 0.6068, "step": 20579 }, { "epoch": 0.6008583690987125, "grad_norm": 0.5598678963036311, "learning_rate": 2.2175182481751827e-05, "loss": 0.713, "step": 20580 }, { "epoch": 0.6008875653265598, "grad_norm": 0.46650893888744605, "learning_rate": 2.2173560421735605e-05, "loss": 0.4956, "step": 20581 }, { "epoch": 0.6009167615544072, "grad_norm": 0.5426880150311187, "learning_rate": 2.2171938361719387e-05, "loss": 0.5989, "step": 20582 }, { "epoch": 0.6009459577822546, "grad_norm": 0.5061264390580753, "learning_rate": 2.2170316301703165e-05, "loss": 0.6055, "step": 20583 }, { "epoch": 0.6009751540101019, "grad_norm": 0.5520096039689489, "learning_rate": 2.2168694241686944e-05, "loss": 0.6789, "step": 20584 }, { "epoch": 0.6010043502379493, "grad_norm": 0.5292545398929023, "learning_rate": 2.2167072181670722e-05, "loss": 0.6227, "step": 20585 }, { "epoch": 0.6010335464657967, "grad_norm": 0.5578263631668388, "learning_rate": 2.21654501216545e-05, "loss": 0.646, "step": 20586 }, { "epoch": 0.601062742693644, "grad_norm": 0.5188751006645299, "learning_rate": 2.2163828061638282e-05, "loss": 0.6219, "step": 20587 }, { "epoch": 0.6010919389214914, "grad_norm": 0.4932333743109188, "learning_rate": 2.216220600162206e-05, "loss": 0.5396, "step": 20588 }, { "epoch": 0.6011211351493387, "grad_norm": 0.5114034441553792, "learning_rate": 2.216058394160584e-05, "loss": 0.585, "step": 20589 }, { "epoch": 0.6011503313771861, "grad_norm": 0.49906516527941164, "learning_rate": 2.215896188158962e-05, "loss": 0.5634, "step": 20590 }, { "epoch": 0.6011795276050335, "grad_norm": 0.552647199995812, "learning_rate": 2.21573398215734e-05, "loss": 0.6457, "step": 20591 }, { "epoch": 0.6012087238328808, "grad_norm": 0.5088100440743086, "learning_rate": 2.215571776155718e-05, "loss": 0.5629, "step": 20592 }, { "epoch": 0.6012379200607282, "grad_norm": 0.4991004649919219, "learning_rate": 2.215409570154096e-05, "loss": 0.5618, "step": 20593 }, { "epoch": 0.6012671162885755, "grad_norm": 0.5034221236715076, "learning_rate": 2.2152473641524737e-05, "loss": 0.5625, "step": 20594 }, { "epoch": 0.6012963125164229, "grad_norm": 0.5045129142895262, "learning_rate": 2.2150851581508516e-05, "loss": 0.5801, "step": 20595 }, { "epoch": 0.6013255087442703, "grad_norm": 0.5401277277543945, "learning_rate": 2.2149229521492294e-05, "loss": 0.5926, "step": 20596 }, { "epoch": 0.6013547049721176, "grad_norm": 0.5017591222507479, "learning_rate": 2.2147607461476076e-05, "loss": 0.558, "step": 20597 }, { "epoch": 0.601383901199965, "grad_norm": 0.5291532218384387, "learning_rate": 2.2145985401459854e-05, "loss": 0.6098, "step": 20598 }, { "epoch": 0.6014130974278123, "grad_norm": 0.5154584238088765, "learning_rate": 2.2144363341443636e-05, "loss": 0.5808, "step": 20599 }, { "epoch": 0.6014422936556597, "grad_norm": 0.5537298966797507, "learning_rate": 2.2142741281427414e-05, "loss": 0.6513, "step": 20600 }, { "epoch": 0.6014714898835071, "grad_norm": 0.5279332386044046, "learning_rate": 2.2141119221411193e-05, "loss": 0.6328, "step": 20601 }, { "epoch": 0.6015006861113544, "grad_norm": 0.5031504447246395, "learning_rate": 2.2139497161394975e-05, "loss": 0.5746, "step": 20602 }, { "epoch": 0.6015298823392018, "grad_norm": 0.5440964909817221, "learning_rate": 2.2137875101378753e-05, "loss": 0.6665, "step": 20603 }, { "epoch": 0.6015590785670492, "grad_norm": 0.4823704438175518, "learning_rate": 2.213625304136253e-05, "loss": 0.5256, "step": 20604 }, { "epoch": 0.6015882747948965, "grad_norm": 0.5560151543186047, "learning_rate": 2.213463098134631e-05, "loss": 0.4993, "step": 20605 }, { "epoch": 0.6016174710227439, "grad_norm": 0.5075882735410522, "learning_rate": 2.2133008921330088e-05, "loss": 0.5564, "step": 20606 }, { "epoch": 0.6016466672505912, "grad_norm": 0.5437078863239818, "learning_rate": 2.213138686131387e-05, "loss": 0.6429, "step": 20607 }, { "epoch": 0.6016758634784386, "grad_norm": 0.49180575372835367, "learning_rate": 2.2129764801297648e-05, "loss": 0.5713, "step": 20608 }, { "epoch": 0.601705059706286, "grad_norm": 0.5548046873461724, "learning_rate": 2.212814274128143e-05, "loss": 0.6705, "step": 20609 }, { "epoch": 0.6017342559341333, "grad_norm": 0.5193349844515538, "learning_rate": 2.212652068126521e-05, "loss": 0.6199, "step": 20610 }, { "epoch": 0.6017634521619807, "grad_norm": 0.4964239673432038, "learning_rate": 2.2124898621248987e-05, "loss": 0.5959, "step": 20611 }, { "epoch": 0.601792648389828, "grad_norm": 0.5347441408897159, "learning_rate": 2.212327656123277e-05, "loss": 0.6251, "step": 20612 }, { "epoch": 0.6018218446176754, "grad_norm": 0.5415819396010872, "learning_rate": 2.2121654501216547e-05, "loss": 0.671, "step": 20613 }, { "epoch": 0.6018510408455228, "grad_norm": 0.5025034830081255, "learning_rate": 2.2120032441200325e-05, "loss": 0.5586, "step": 20614 }, { "epoch": 0.6018802370733701, "grad_norm": 0.5208962433892294, "learning_rate": 2.2118410381184104e-05, "loss": 0.5858, "step": 20615 }, { "epoch": 0.6019094333012175, "grad_norm": 0.5361588827431243, "learning_rate": 2.2116788321167882e-05, "loss": 0.6431, "step": 20616 }, { "epoch": 0.6019386295290649, "grad_norm": 0.5298083930666846, "learning_rate": 2.2115166261151664e-05, "loss": 0.6253, "step": 20617 }, { "epoch": 0.6019678257569122, "grad_norm": 0.5373569465742234, "learning_rate": 2.2113544201135445e-05, "loss": 0.6157, "step": 20618 }, { "epoch": 0.6019970219847596, "grad_norm": 0.5406184368822917, "learning_rate": 2.2111922141119224e-05, "loss": 0.6232, "step": 20619 }, { "epoch": 0.6020262182126069, "grad_norm": 0.49723941445787045, "learning_rate": 2.2110300081103002e-05, "loss": 0.561, "step": 20620 }, { "epoch": 0.6020554144404543, "grad_norm": 0.5471907394623583, "learning_rate": 2.210867802108678e-05, "loss": 0.674, "step": 20621 }, { "epoch": 0.6020846106683017, "grad_norm": 0.5224679766470165, "learning_rate": 2.2107055961070562e-05, "loss": 0.6039, "step": 20622 }, { "epoch": 0.602113806896149, "grad_norm": 0.5215931344274214, "learning_rate": 2.210543390105434e-05, "loss": 0.6129, "step": 20623 }, { "epoch": 0.6021430031239964, "grad_norm": 0.5264185132777452, "learning_rate": 2.210381184103812e-05, "loss": 0.6156, "step": 20624 }, { "epoch": 0.6021721993518437, "grad_norm": 0.5285691782546809, "learning_rate": 2.2102189781021897e-05, "loss": 0.6233, "step": 20625 }, { "epoch": 0.6022013955796911, "grad_norm": 0.5561902810523524, "learning_rate": 2.2100567721005676e-05, "loss": 0.6305, "step": 20626 }, { "epoch": 0.6022305918075385, "grad_norm": 0.6222776628756772, "learning_rate": 2.2098945660989458e-05, "loss": 0.6613, "step": 20627 }, { "epoch": 0.6022597880353858, "grad_norm": 0.5303005793147966, "learning_rate": 2.209732360097324e-05, "loss": 0.6211, "step": 20628 }, { "epoch": 0.6022889842632332, "grad_norm": 0.5460558325547619, "learning_rate": 2.2095701540957018e-05, "loss": 0.5762, "step": 20629 }, { "epoch": 0.6023181804910805, "grad_norm": 0.5545175263364676, "learning_rate": 2.2094079480940796e-05, "loss": 0.6213, "step": 20630 }, { "epoch": 0.6023473767189279, "grad_norm": 0.5107551233830749, "learning_rate": 2.2092457420924575e-05, "loss": 0.592, "step": 20631 }, { "epoch": 0.6023765729467753, "grad_norm": 0.536992778148413, "learning_rate": 2.2090835360908356e-05, "loss": 0.6579, "step": 20632 }, { "epoch": 0.6024057691746226, "grad_norm": 0.549657652053377, "learning_rate": 2.2089213300892135e-05, "loss": 0.6028, "step": 20633 }, { "epoch": 0.60243496540247, "grad_norm": 0.532826607235719, "learning_rate": 2.2087591240875913e-05, "loss": 0.6559, "step": 20634 }, { "epoch": 0.6024641616303174, "grad_norm": 0.5255348974185514, "learning_rate": 2.208596918085969e-05, "loss": 0.6042, "step": 20635 }, { "epoch": 0.6024933578581647, "grad_norm": 0.5337799475476244, "learning_rate": 2.208434712084347e-05, "loss": 0.6713, "step": 20636 }, { "epoch": 0.6025225540860121, "grad_norm": 0.5174425114231568, "learning_rate": 2.208272506082725e-05, "loss": 0.5805, "step": 20637 }, { "epoch": 0.6025517503138594, "grad_norm": 0.47523517088490336, "learning_rate": 2.2081103000811033e-05, "loss": 0.4829, "step": 20638 }, { "epoch": 0.6025809465417068, "grad_norm": 0.5392038464764792, "learning_rate": 2.207948094079481e-05, "loss": 0.6253, "step": 20639 }, { "epoch": 0.6026101427695542, "grad_norm": 0.533034064781189, "learning_rate": 2.207785888077859e-05, "loss": 0.6232, "step": 20640 }, { "epoch": 0.6026393389974015, "grad_norm": 0.5058043113561062, "learning_rate": 2.207623682076237e-05, "loss": 0.6, "step": 20641 }, { "epoch": 0.6026685352252489, "grad_norm": 0.5419012908251031, "learning_rate": 2.2074614760746147e-05, "loss": 0.59, "step": 20642 }, { "epoch": 0.6026977314530962, "grad_norm": 0.5775695920057096, "learning_rate": 2.207299270072993e-05, "loss": 0.6923, "step": 20643 }, { "epoch": 0.6027269276809436, "grad_norm": 0.4671798080189083, "learning_rate": 2.2071370640713707e-05, "loss": 0.4859, "step": 20644 }, { "epoch": 0.602756123908791, "grad_norm": 0.4966419968633107, "learning_rate": 2.2069748580697485e-05, "loss": 0.5503, "step": 20645 }, { "epoch": 0.6027853201366383, "grad_norm": 0.5148942528871876, "learning_rate": 2.2068126520681267e-05, "loss": 0.5983, "step": 20646 }, { "epoch": 0.6028145163644857, "grad_norm": 0.5018947471023208, "learning_rate": 2.2066504460665045e-05, "loss": 0.6091, "step": 20647 }, { "epoch": 0.602843712592333, "grad_norm": 0.5009159796753163, "learning_rate": 2.2064882400648827e-05, "loss": 0.5605, "step": 20648 }, { "epoch": 0.6028729088201804, "grad_norm": 0.5102097020491397, "learning_rate": 2.2063260340632606e-05, "loss": 0.6016, "step": 20649 }, { "epoch": 0.6029021050480278, "grad_norm": 0.5475210427596512, "learning_rate": 2.2061638280616384e-05, "loss": 0.6608, "step": 20650 }, { "epoch": 0.6029313012758751, "grad_norm": 0.5321533288860961, "learning_rate": 2.2060016220600162e-05, "loss": 0.6187, "step": 20651 }, { "epoch": 0.6029604975037225, "grad_norm": 0.5418044074652826, "learning_rate": 2.205839416058394e-05, "loss": 0.5836, "step": 20652 }, { "epoch": 0.6029896937315699, "grad_norm": 0.5093178008591907, "learning_rate": 2.2056772100567722e-05, "loss": 0.5661, "step": 20653 }, { "epoch": 0.6030188899594172, "grad_norm": 0.5422395112391345, "learning_rate": 2.20551500405515e-05, "loss": 0.6499, "step": 20654 }, { "epoch": 0.6030480861872646, "grad_norm": 0.5267308415341152, "learning_rate": 2.205352798053528e-05, "loss": 0.5764, "step": 20655 }, { "epoch": 0.6030772824151119, "grad_norm": 0.5518741772813178, "learning_rate": 2.205190592051906e-05, "loss": 0.6742, "step": 20656 }, { "epoch": 0.6031064786429593, "grad_norm": 0.5961137267460562, "learning_rate": 2.205028386050284e-05, "loss": 0.7154, "step": 20657 }, { "epoch": 0.6031356748708067, "grad_norm": 0.5459067611222874, "learning_rate": 2.204866180048662e-05, "loss": 0.6646, "step": 20658 }, { "epoch": 0.603164871098654, "grad_norm": 0.5584413471994492, "learning_rate": 2.20470397404704e-05, "loss": 0.6613, "step": 20659 }, { "epoch": 0.6031940673265014, "grad_norm": 0.5122156188460734, "learning_rate": 2.2045417680454178e-05, "loss": 0.5908, "step": 20660 }, { "epoch": 0.6032232635543487, "grad_norm": 0.533618226959078, "learning_rate": 2.2043795620437956e-05, "loss": 0.6267, "step": 20661 }, { "epoch": 0.6032524597821961, "grad_norm": 0.5360748611624654, "learning_rate": 2.2042173560421735e-05, "loss": 0.6169, "step": 20662 }, { "epoch": 0.6032816560100435, "grad_norm": 0.47466394541881507, "learning_rate": 2.2040551500405516e-05, "loss": 0.5424, "step": 20663 }, { "epoch": 0.6033108522378908, "grad_norm": 0.5175448069556424, "learning_rate": 2.2038929440389295e-05, "loss": 0.5882, "step": 20664 }, { "epoch": 0.6033400484657382, "grad_norm": 0.528108391096167, "learning_rate": 2.2037307380373076e-05, "loss": 0.6359, "step": 20665 }, { "epoch": 0.6033692446935855, "grad_norm": 0.5183482780823343, "learning_rate": 2.2035685320356855e-05, "loss": 0.6415, "step": 20666 }, { "epoch": 0.6033984409214329, "grad_norm": 0.5106066315687641, "learning_rate": 2.2034063260340633e-05, "loss": 0.6062, "step": 20667 }, { "epoch": 0.6034276371492803, "grad_norm": 0.46556272562194906, "learning_rate": 2.2032441200324415e-05, "loss": 0.5042, "step": 20668 }, { "epoch": 0.6034568333771276, "grad_norm": 0.4958471739339863, "learning_rate": 2.2030819140308193e-05, "loss": 0.5776, "step": 20669 }, { "epoch": 0.603486029604975, "grad_norm": 0.5118632590583023, "learning_rate": 2.2029197080291972e-05, "loss": 0.6059, "step": 20670 }, { "epoch": 0.6035152258328224, "grad_norm": 0.542330660453841, "learning_rate": 2.202757502027575e-05, "loss": 0.5804, "step": 20671 }, { "epoch": 0.6035444220606697, "grad_norm": 0.5757123539148937, "learning_rate": 2.202595296025953e-05, "loss": 0.6995, "step": 20672 }, { "epoch": 0.6035736182885171, "grad_norm": 0.5086034833128334, "learning_rate": 2.202433090024331e-05, "loss": 0.5797, "step": 20673 }, { "epoch": 0.6036028145163644, "grad_norm": 0.5152238147060193, "learning_rate": 2.202270884022709e-05, "loss": 0.5645, "step": 20674 }, { "epoch": 0.6036320107442118, "grad_norm": 0.536701237554562, "learning_rate": 2.202108678021087e-05, "loss": 0.6224, "step": 20675 }, { "epoch": 0.6036612069720592, "grad_norm": 0.5350000570509599, "learning_rate": 2.201946472019465e-05, "loss": 0.6499, "step": 20676 }, { "epoch": 0.6036904031999065, "grad_norm": 0.48813786811975574, "learning_rate": 2.2017842660178427e-05, "loss": 0.5558, "step": 20677 }, { "epoch": 0.6037195994277539, "grad_norm": 0.5311743880630712, "learning_rate": 2.201622060016221e-05, "loss": 0.6365, "step": 20678 }, { "epoch": 0.6037487956556012, "grad_norm": 0.49942983406496483, "learning_rate": 2.2014598540145987e-05, "loss": 0.5909, "step": 20679 }, { "epoch": 0.6037779918834486, "grad_norm": 0.5286409980512156, "learning_rate": 2.2012976480129766e-05, "loss": 0.6675, "step": 20680 }, { "epoch": 0.603807188111296, "grad_norm": 0.5195505619711003, "learning_rate": 2.2011354420113544e-05, "loss": 0.5525, "step": 20681 }, { "epoch": 0.6038363843391433, "grad_norm": 0.6040335729462774, "learning_rate": 2.2009732360097322e-05, "loss": 0.6378, "step": 20682 }, { "epoch": 0.6038655805669907, "grad_norm": 0.5065960050102909, "learning_rate": 2.2008110300081104e-05, "loss": 0.5715, "step": 20683 }, { "epoch": 0.6038947767948382, "grad_norm": 0.5349715815634599, "learning_rate": 2.2006488240064886e-05, "loss": 0.6029, "step": 20684 }, { "epoch": 0.6039239730226855, "grad_norm": 0.5239334635370622, "learning_rate": 2.2004866180048664e-05, "loss": 0.6294, "step": 20685 }, { "epoch": 0.6039531692505329, "grad_norm": 0.5188090170150738, "learning_rate": 2.2003244120032443e-05, "loss": 0.5863, "step": 20686 }, { "epoch": 0.6039823654783802, "grad_norm": 0.5122127377381124, "learning_rate": 2.200162206001622e-05, "loss": 0.5479, "step": 20687 }, { "epoch": 0.6040115617062276, "grad_norm": 0.5022715133491348, "learning_rate": 2.2000000000000003e-05, "loss": 0.5723, "step": 20688 }, { "epoch": 0.604040757934075, "grad_norm": 0.5224197722931193, "learning_rate": 2.199837793998378e-05, "loss": 0.601, "step": 20689 }, { "epoch": 0.6040699541619223, "grad_norm": 0.5374787174873304, "learning_rate": 2.199675587996756e-05, "loss": 0.5934, "step": 20690 }, { "epoch": 0.6040991503897697, "grad_norm": 0.5290608588265514, "learning_rate": 2.1995133819951338e-05, "loss": 0.6185, "step": 20691 }, { "epoch": 0.604128346617617, "grad_norm": 0.5256830017516034, "learning_rate": 2.1993511759935116e-05, "loss": 0.5897, "step": 20692 }, { "epoch": 0.6041575428454644, "grad_norm": 0.50817754824102, "learning_rate": 2.1991889699918898e-05, "loss": 0.5787, "step": 20693 }, { "epoch": 0.6041867390733118, "grad_norm": 0.49779361987102716, "learning_rate": 2.199026763990268e-05, "loss": 0.5638, "step": 20694 }, { "epoch": 0.6042159353011591, "grad_norm": 0.5232717212402306, "learning_rate": 2.1988645579886458e-05, "loss": 0.6214, "step": 20695 }, { "epoch": 0.6042451315290065, "grad_norm": 0.5295860203604682, "learning_rate": 2.1987023519870237e-05, "loss": 0.6613, "step": 20696 }, { "epoch": 0.6042743277568539, "grad_norm": 0.5032239852260597, "learning_rate": 2.1985401459854015e-05, "loss": 0.6012, "step": 20697 }, { "epoch": 0.6043035239847012, "grad_norm": 0.4944022087791941, "learning_rate": 2.1983779399837793e-05, "loss": 0.5491, "step": 20698 }, { "epoch": 0.6043327202125486, "grad_norm": 0.5374337659548337, "learning_rate": 2.1982157339821575e-05, "loss": 0.6276, "step": 20699 }, { "epoch": 0.6043619164403959, "grad_norm": 0.5161190788989181, "learning_rate": 2.1980535279805353e-05, "loss": 0.5682, "step": 20700 }, { "epoch": 0.6043911126682433, "grad_norm": 0.5125766926949741, "learning_rate": 2.1978913219789132e-05, "loss": 0.6027, "step": 20701 }, { "epoch": 0.6044203088960907, "grad_norm": 0.5436306695369645, "learning_rate": 2.197729115977291e-05, "loss": 0.6585, "step": 20702 }, { "epoch": 0.604449505123938, "grad_norm": 0.5027379790530072, "learning_rate": 2.1975669099756692e-05, "loss": 0.5188, "step": 20703 }, { "epoch": 0.6044787013517854, "grad_norm": 0.5230385912291053, "learning_rate": 2.1974047039740474e-05, "loss": 0.5959, "step": 20704 }, { "epoch": 0.6045078975796327, "grad_norm": 0.49271597377953286, "learning_rate": 2.1972424979724252e-05, "loss": 0.5867, "step": 20705 }, { "epoch": 0.6045370938074801, "grad_norm": 0.5355191735194323, "learning_rate": 2.197080291970803e-05, "loss": 0.6127, "step": 20706 }, { "epoch": 0.6045662900353275, "grad_norm": 0.5120808711496996, "learning_rate": 2.196918085969181e-05, "loss": 0.5858, "step": 20707 }, { "epoch": 0.6045954862631748, "grad_norm": 0.4748701828513498, "learning_rate": 2.1967558799675587e-05, "loss": 0.5323, "step": 20708 }, { "epoch": 0.6046246824910222, "grad_norm": 0.6080700957153271, "learning_rate": 2.196593673965937e-05, "loss": 0.7852, "step": 20709 }, { "epoch": 0.6046538787188696, "grad_norm": 0.5208504442317159, "learning_rate": 2.1964314679643147e-05, "loss": 0.5874, "step": 20710 }, { "epoch": 0.6046830749467169, "grad_norm": 0.4892047886093569, "learning_rate": 2.1962692619626926e-05, "loss": 0.5927, "step": 20711 }, { "epoch": 0.6047122711745643, "grad_norm": 0.5294554454524509, "learning_rate": 2.1961070559610708e-05, "loss": 0.6404, "step": 20712 }, { "epoch": 0.6047414674024116, "grad_norm": 0.5007636466963493, "learning_rate": 2.1959448499594486e-05, "loss": 0.562, "step": 20713 }, { "epoch": 0.604770663630259, "grad_norm": 0.5232528755769638, "learning_rate": 2.1957826439578268e-05, "loss": 0.5885, "step": 20714 }, { "epoch": 0.6047998598581064, "grad_norm": 0.49792699612675134, "learning_rate": 2.1956204379562046e-05, "loss": 0.5555, "step": 20715 }, { "epoch": 0.6048290560859537, "grad_norm": 0.5771533361263866, "learning_rate": 2.1954582319545824e-05, "loss": 0.6418, "step": 20716 }, { "epoch": 0.6048582523138011, "grad_norm": 0.5495613308871095, "learning_rate": 2.1952960259529603e-05, "loss": 0.6424, "step": 20717 }, { "epoch": 0.6048874485416484, "grad_norm": 0.5550534391508831, "learning_rate": 2.195133819951338e-05, "loss": 0.6608, "step": 20718 }, { "epoch": 0.6049166447694958, "grad_norm": 0.5228914754222036, "learning_rate": 2.1949716139497163e-05, "loss": 0.5927, "step": 20719 }, { "epoch": 0.6049458409973432, "grad_norm": 0.5060954210610906, "learning_rate": 2.194809407948094e-05, "loss": 0.5641, "step": 20720 }, { "epoch": 0.6049750372251905, "grad_norm": 0.5193466772311905, "learning_rate": 2.194647201946472e-05, "loss": 0.5758, "step": 20721 }, { "epoch": 0.6050042334530379, "grad_norm": 0.5335272866082105, "learning_rate": 2.19448499594485e-05, "loss": 0.6327, "step": 20722 }, { "epoch": 0.6050334296808852, "grad_norm": 0.4984342961380529, "learning_rate": 2.194322789943228e-05, "loss": 0.5736, "step": 20723 }, { "epoch": 0.6050626259087326, "grad_norm": 0.5440233163024074, "learning_rate": 2.194160583941606e-05, "loss": 0.6373, "step": 20724 }, { "epoch": 0.60509182213658, "grad_norm": 0.5910760316979338, "learning_rate": 2.193998377939984e-05, "loss": 0.6717, "step": 20725 }, { "epoch": 0.6051210183644273, "grad_norm": 0.4757032431889236, "learning_rate": 2.1938361719383618e-05, "loss": 0.5067, "step": 20726 }, { "epoch": 0.6051502145922747, "grad_norm": 0.52349469288361, "learning_rate": 2.1936739659367397e-05, "loss": 0.6006, "step": 20727 }, { "epoch": 0.605179410820122, "grad_norm": 0.5244245738924874, "learning_rate": 2.1935117599351175e-05, "loss": 0.6011, "step": 20728 }, { "epoch": 0.6052086070479694, "grad_norm": 0.499374999553057, "learning_rate": 2.1933495539334957e-05, "loss": 0.57, "step": 20729 }, { "epoch": 0.6052378032758168, "grad_norm": 0.5083850331966484, "learning_rate": 2.1931873479318735e-05, "loss": 0.5973, "step": 20730 }, { "epoch": 0.6052669995036641, "grad_norm": 0.5520115876707183, "learning_rate": 2.1930251419302517e-05, "loss": 0.6014, "step": 20731 }, { "epoch": 0.6052961957315115, "grad_norm": 0.5021303942773844, "learning_rate": 2.1928629359286295e-05, "loss": 0.5791, "step": 20732 }, { "epoch": 0.6053253919593589, "grad_norm": 0.525480721524379, "learning_rate": 2.1927007299270074e-05, "loss": 0.5883, "step": 20733 }, { "epoch": 0.6053545881872062, "grad_norm": 0.5437048259108545, "learning_rate": 2.1925385239253855e-05, "loss": 0.6317, "step": 20734 }, { "epoch": 0.6053837844150536, "grad_norm": 0.5135216147599876, "learning_rate": 2.1923763179237634e-05, "loss": 0.586, "step": 20735 }, { "epoch": 0.6054129806429009, "grad_norm": 0.5143256599811717, "learning_rate": 2.1922141119221412e-05, "loss": 0.5806, "step": 20736 }, { "epoch": 0.6054421768707483, "grad_norm": 0.5140739328797415, "learning_rate": 2.192051905920519e-05, "loss": 0.596, "step": 20737 }, { "epoch": 0.6054713730985957, "grad_norm": 0.4984409679383449, "learning_rate": 2.191889699918897e-05, "loss": 0.5665, "step": 20738 }, { "epoch": 0.605500569326443, "grad_norm": 0.5337354168950753, "learning_rate": 2.191727493917275e-05, "loss": 0.5911, "step": 20739 }, { "epoch": 0.6055297655542904, "grad_norm": 0.5420928971998679, "learning_rate": 2.191565287915653e-05, "loss": 0.6092, "step": 20740 }, { "epoch": 0.6055589617821378, "grad_norm": 0.5498188555709373, "learning_rate": 2.191403081914031e-05, "loss": 0.6284, "step": 20741 }, { "epoch": 0.6055881580099851, "grad_norm": 0.5482856944732437, "learning_rate": 2.191240875912409e-05, "loss": 0.6166, "step": 20742 }, { "epoch": 0.6056173542378325, "grad_norm": 0.5583986333975864, "learning_rate": 2.1910786699107868e-05, "loss": 0.6597, "step": 20743 }, { "epoch": 0.6056465504656798, "grad_norm": 0.5166999747564268, "learning_rate": 2.190916463909165e-05, "loss": 0.5702, "step": 20744 }, { "epoch": 0.6056757466935272, "grad_norm": 0.4974130618211558, "learning_rate": 2.1907542579075428e-05, "loss": 0.5514, "step": 20745 }, { "epoch": 0.6057049429213746, "grad_norm": 0.49733676992406944, "learning_rate": 2.1905920519059206e-05, "loss": 0.5849, "step": 20746 }, { "epoch": 0.6057341391492219, "grad_norm": 0.5709251888720567, "learning_rate": 2.1904298459042984e-05, "loss": 0.6795, "step": 20747 }, { "epoch": 0.6057633353770693, "grad_norm": 0.5116290280191657, "learning_rate": 2.1902676399026763e-05, "loss": 0.5961, "step": 20748 }, { "epoch": 0.6057925316049166, "grad_norm": 0.5390074587169056, "learning_rate": 2.1901054339010545e-05, "loss": 0.6882, "step": 20749 }, { "epoch": 0.605821727832764, "grad_norm": 0.5353089082260827, "learning_rate": 2.1899432278994326e-05, "loss": 0.6418, "step": 20750 }, { "epoch": 0.6058509240606114, "grad_norm": 0.5275165185418811, "learning_rate": 2.1897810218978105e-05, "loss": 0.5902, "step": 20751 }, { "epoch": 0.6058801202884587, "grad_norm": 0.5205934340465294, "learning_rate": 2.1896188158961883e-05, "loss": 0.5955, "step": 20752 }, { "epoch": 0.6059093165163061, "grad_norm": 0.5362530236593142, "learning_rate": 2.189456609894566e-05, "loss": 0.6057, "step": 20753 }, { "epoch": 0.6059385127441534, "grad_norm": 0.5021896260714066, "learning_rate": 2.1892944038929443e-05, "loss": 0.5933, "step": 20754 }, { "epoch": 0.6059677089720008, "grad_norm": 0.544415965451138, "learning_rate": 2.189132197891322e-05, "loss": 0.643, "step": 20755 }, { "epoch": 0.6059969051998482, "grad_norm": 0.5398753266040563, "learning_rate": 2.1889699918897e-05, "loss": 0.6212, "step": 20756 }, { "epoch": 0.6060261014276955, "grad_norm": 0.46392831575186394, "learning_rate": 2.188807785888078e-05, "loss": 0.4777, "step": 20757 }, { "epoch": 0.6060552976555429, "grad_norm": 0.5259950576876229, "learning_rate": 2.1886455798864557e-05, "loss": 0.6146, "step": 20758 }, { "epoch": 0.6060844938833903, "grad_norm": 0.5025759213913136, "learning_rate": 2.188483373884834e-05, "loss": 0.6021, "step": 20759 }, { "epoch": 0.6061136901112376, "grad_norm": 0.552258089691994, "learning_rate": 2.188321167883212e-05, "loss": 0.6029, "step": 20760 }, { "epoch": 0.606142886339085, "grad_norm": 0.4960000768542335, "learning_rate": 2.18815896188159e-05, "loss": 0.5834, "step": 20761 }, { "epoch": 0.6061720825669323, "grad_norm": 0.5129796185007363, "learning_rate": 2.1879967558799677e-05, "loss": 0.5841, "step": 20762 }, { "epoch": 0.6062012787947797, "grad_norm": 0.5367350536371746, "learning_rate": 2.1878345498783455e-05, "loss": 0.6609, "step": 20763 }, { "epoch": 0.6062304750226271, "grad_norm": 0.5274475134610725, "learning_rate": 2.1876723438767234e-05, "loss": 0.5394, "step": 20764 }, { "epoch": 0.6062596712504744, "grad_norm": 0.5133612295607084, "learning_rate": 2.1875101378751016e-05, "loss": 0.5775, "step": 20765 }, { "epoch": 0.6062888674783218, "grad_norm": 0.537926592407239, "learning_rate": 2.1873479318734794e-05, "loss": 0.6593, "step": 20766 }, { "epoch": 0.6063180637061691, "grad_norm": 0.5479534188870789, "learning_rate": 2.1871857258718572e-05, "loss": 0.671, "step": 20767 }, { "epoch": 0.6063472599340165, "grad_norm": 0.501391286948089, "learning_rate": 2.187023519870235e-05, "loss": 0.5665, "step": 20768 }, { "epoch": 0.6063764561618639, "grad_norm": 0.5003704008710003, "learning_rate": 2.1868613138686132e-05, "loss": 0.5761, "step": 20769 }, { "epoch": 0.6064056523897112, "grad_norm": 0.5318231982901609, "learning_rate": 2.1866991078669914e-05, "loss": 0.6415, "step": 20770 }, { "epoch": 0.6064348486175586, "grad_norm": 0.5354403191653261, "learning_rate": 2.1865369018653693e-05, "loss": 0.6239, "step": 20771 }, { "epoch": 0.606464044845406, "grad_norm": 0.5326837181707578, "learning_rate": 2.186374695863747e-05, "loss": 0.6171, "step": 20772 }, { "epoch": 0.6064932410732533, "grad_norm": 0.5067511650520982, "learning_rate": 2.186212489862125e-05, "loss": 0.5937, "step": 20773 }, { "epoch": 0.6065224373011007, "grad_norm": 0.5186030155233573, "learning_rate": 2.1860502838605028e-05, "loss": 0.5981, "step": 20774 }, { "epoch": 0.606551633528948, "grad_norm": 0.5611928299196564, "learning_rate": 2.185888077858881e-05, "loss": 0.6162, "step": 20775 }, { "epoch": 0.6065808297567954, "grad_norm": 0.47551747626557783, "learning_rate": 2.1857258718572588e-05, "loss": 0.5458, "step": 20776 }, { "epoch": 0.6066100259846428, "grad_norm": 0.5385745022310183, "learning_rate": 2.1855636658556366e-05, "loss": 0.596, "step": 20777 }, { "epoch": 0.6066392222124901, "grad_norm": 0.4926377050638555, "learning_rate": 2.1854014598540145e-05, "loss": 0.5773, "step": 20778 }, { "epoch": 0.6066684184403375, "grad_norm": 0.5073444591482753, "learning_rate": 2.1852392538523926e-05, "loss": 0.5754, "step": 20779 }, { "epoch": 0.6066976146681848, "grad_norm": 0.5569446159238503, "learning_rate": 2.1850770478507708e-05, "loss": 0.6716, "step": 20780 }, { "epoch": 0.6067268108960322, "grad_norm": 0.49695945211140224, "learning_rate": 2.1849148418491486e-05, "loss": 0.59, "step": 20781 }, { "epoch": 0.6067560071238796, "grad_norm": 0.49467658783759755, "learning_rate": 2.1847526358475265e-05, "loss": 0.5862, "step": 20782 }, { "epoch": 0.6067852033517269, "grad_norm": 0.5047256456787234, "learning_rate": 2.1845904298459043e-05, "loss": 0.5859, "step": 20783 }, { "epoch": 0.6068143995795743, "grad_norm": 0.5002406501397095, "learning_rate": 2.184428223844282e-05, "loss": 0.5332, "step": 20784 }, { "epoch": 0.6068435958074216, "grad_norm": 0.4724880277442897, "learning_rate": 2.1842660178426603e-05, "loss": 0.4896, "step": 20785 }, { "epoch": 0.606872792035269, "grad_norm": 0.5352002893932462, "learning_rate": 2.1841038118410382e-05, "loss": 0.6541, "step": 20786 }, { "epoch": 0.6069019882631164, "grad_norm": 0.5102327738374312, "learning_rate": 2.183941605839416e-05, "loss": 0.6023, "step": 20787 }, { "epoch": 0.6069311844909637, "grad_norm": 0.5081199401677226, "learning_rate": 2.1837793998377942e-05, "loss": 0.6173, "step": 20788 }, { "epoch": 0.6069603807188111, "grad_norm": 0.6898533062584277, "learning_rate": 2.183617193836172e-05, "loss": 0.5915, "step": 20789 }, { "epoch": 0.6069895769466584, "grad_norm": 0.5453619175692296, "learning_rate": 2.1834549878345502e-05, "loss": 0.6589, "step": 20790 }, { "epoch": 0.6070187731745058, "grad_norm": 0.5255161589494093, "learning_rate": 2.183292781832928e-05, "loss": 0.6211, "step": 20791 }, { "epoch": 0.6070479694023532, "grad_norm": 0.5478797781421543, "learning_rate": 2.183130575831306e-05, "loss": 0.6209, "step": 20792 }, { "epoch": 0.6070771656302005, "grad_norm": 0.5288098822650816, "learning_rate": 2.1829683698296837e-05, "loss": 0.6417, "step": 20793 }, { "epoch": 0.6071063618580479, "grad_norm": 0.5363179785135181, "learning_rate": 2.1828061638280615e-05, "loss": 0.6487, "step": 20794 }, { "epoch": 0.6071355580858953, "grad_norm": 0.500672134127963, "learning_rate": 2.1826439578264397e-05, "loss": 0.5539, "step": 20795 }, { "epoch": 0.6071647543137426, "grad_norm": 0.5466595045099428, "learning_rate": 2.1824817518248176e-05, "loss": 0.683, "step": 20796 }, { "epoch": 0.60719395054159, "grad_norm": 0.5147433521232423, "learning_rate": 2.1823195458231957e-05, "loss": 0.5499, "step": 20797 }, { "epoch": 0.6072231467694373, "grad_norm": 0.4547064751142765, "learning_rate": 2.1821573398215736e-05, "loss": 0.4906, "step": 20798 }, { "epoch": 0.6072523429972847, "grad_norm": 0.5049479363936877, "learning_rate": 2.1819951338199514e-05, "loss": 0.5515, "step": 20799 }, { "epoch": 0.6072815392251321, "grad_norm": 0.5937617330598668, "learning_rate": 2.1818329278183296e-05, "loss": 0.6126, "step": 20800 }, { "epoch": 0.6073107354529794, "grad_norm": 0.5057684106897056, "learning_rate": 2.1816707218167074e-05, "loss": 0.5768, "step": 20801 }, { "epoch": 0.6073399316808268, "grad_norm": 0.5219112378758074, "learning_rate": 2.1815085158150853e-05, "loss": 0.5741, "step": 20802 }, { "epoch": 0.6073691279086741, "grad_norm": 0.5196887213681295, "learning_rate": 2.181346309813463e-05, "loss": 0.5983, "step": 20803 }, { "epoch": 0.6073983241365216, "grad_norm": 0.5091577829596446, "learning_rate": 2.181184103811841e-05, "loss": 0.6055, "step": 20804 }, { "epoch": 0.607427520364369, "grad_norm": 0.5375432035253577, "learning_rate": 2.181021897810219e-05, "loss": 0.6129, "step": 20805 }, { "epoch": 0.6074567165922163, "grad_norm": 0.5048620616649311, "learning_rate": 2.180859691808597e-05, "loss": 0.558, "step": 20806 }, { "epoch": 0.6074859128200637, "grad_norm": 0.5391305649153542, "learning_rate": 2.180697485806975e-05, "loss": 0.6326, "step": 20807 }, { "epoch": 0.6075151090479111, "grad_norm": 0.5203302151819466, "learning_rate": 2.180535279805353e-05, "loss": 0.6027, "step": 20808 }, { "epoch": 0.6075443052757584, "grad_norm": 0.5516791437534031, "learning_rate": 2.1803730738037308e-05, "loss": 0.6747, "step": 20809 }, { "epoch": 0.6075735015036058, "grad_norm": 0.5580434267349905, "learning_rate": 2.180210867802109e-05, "loss": 0.6371, "step": 20810 }, { "epoch": 0.6076026977314531, "grad_norm": 0.49659882190478916, "learning_rate": 2.1800486618004868e-05, "loss": 0.5882, "step": 20811 }, { "epoch": 0.6076318939593005, "grad_norm": 0.5402290474445188, "learning_rate": 2.1798864557988647e-05, "loss": 0.6254, "step": 20812 }, { "epoch": 0.6076610901871479, "grad_norm": 0.5870403822894258, "learning_rate": 2.1797242497972425e-05, "loss": 0.6915, "step": 20813 }, { "epoch": 0.6076902864149952, "grad_norm": 0.5418243980435601, "learning_rate": 2.1795620437956203e-05, "loss": 0.6264, "step": 20814 }, { "epoch": 0.6077194826428426, "grad_norm": 0.5296305181753801, "learning_rate": 2.1793998377939985e-05, "loss": 0.6443, "step": 20815 }, { "epoch": 0.60774867887069, "grad_norm": 0.5339502138040942, "learning_rate": 2.1792376317923767e-05, "loss": 0.6209, "step": 20816 }, { "epoch": 0.6077778750985373, "grad_norm": 0.5106808262511956, "learning_rate": 2.1790754257907545e-05, "loss": 0.5763, "step": 20817 }, { "epoch": 0.6078070713263847, "grad_norm": 0.4636098400204813, "learning_rate": 2.1789132197891324e-05, "loss": 0.4933, "step": 20818 }, { "epoch": 0.607836267554232, "grad_norm": 0.5147026838440374, "learning_rate": 2.1787510137875102e-05, "loss": 0.5962, "step": 20819 }, { "epoch": 0.6078654637820794, "grad_norm": 0.5016319304713026, "learning_rate": 2.178588807785888e-05, "loss": 0.5701, "step": 20820 }, { "epoch": 0.6078946600099268, "grad_norm": 0.4772846150794329, "learning_rate": 2.1784266017842662e-05, "loss": 0.5373, "step": 20821 }, { "epoch": 0.6079238562377741, "grad_norm": 0.5267544619133838, "learning_rate": 2.178264395782644e-05, "loss": 0.5912, "step": 20822 }, { "epoch": 0.6079530524656215, "grad_norm": 0.5088782163718655, "learning_rate": 2.178102189781022e-05, "loss": 0.6005, "step": 20823 }, { "epoch": 0.6079822486934688, "grad_norm": 0.49015063530853453, "learning_rate": 2.1779399837793997e-05, "loss": 0.563, "step": 20824 }, { "epoch": 0.6080114449213162, "grad_norm": 0.5228334149032713, "learning_rate": 2.177777777777778e-05, "loss": 0.6437, "step": 20825 }, { "epoch": 0.6080406411491636, "grad_norm": 0.5414304996686978, "learning_rate": 2.177615571776156e-05, "loss": 0.6306, "step": 20826 }, { "epoch": 0.6080698373770109, "grad_norm": 0.534839081368637, "learning_rate": 2.177453365774534e-05, "loss": 0.5699, "step": 20827 }, { "epoch": 0.6080990336048583, "grad_norm": 0.5713378425597574, "learning_rate": 2.1772911597729117e-05, "loss": 0.6654, "step": 20828 }, { "epoch": 0.6081282298327056, "grad_norm": 0.5130632184911124, "learning_rate": 2.1771289537712896e-05, "loss": 0.5996, "step": 20829 }, { "epoch": 0.608157426060553, "grad_norm": 0.5055832640253738, "learning_rate": 2.1769667477696674e-05, "loss": 0.6184, "step": 20830 }, { "epoch": 0.6081866222884004, "grad_norm": 0.5318351841298724, "learning_rate": 2.1768045417680456e-05, "loss": 0.6731, "step": 20831 }, { "epoch": 0.6082158185162477, "grad_norm": 0.5475297497154548, "learning_rate": 2.1766423357664234e-05, "loss": 0.6587, "step": 20832 }, { "epoch": 0.6082450147440951, "grad_norm": 0.5453082564654318, "learning_rate": 2.1764801297648013e-05, "loss": 0.6673, "step": 20833 }, { "epoch": 0.6082742109719425, "grad_norm": 0.548482704324053, "learning_rate": 2.176317923763179e-05, "loss": 0.6659, "step": 20834 }, { "epoch": 0.6083034071997898, "grad_norm": 0.5043274226141166, "learning_rate": 2.1761557177615573e-05, "loss": 0.4989, "step": 20835 }, { "epoch": 0.6083326034276372, "grad_norm": 0.549727839410997, "learning_rate": 2.1759935117599355e-05, "loss": 0.6916, "step": 20836 }, { "epoch": 0.6083617996554845, "grad_norm": 0.5307255926769114, "learning_rate": 2.1758313057583133e-05, "loss": 0.6395, "step": 20837 }, { "epoch": 0.6083909958833319, "grad_norm": 0.540104359302164, "learning_rate": 2.175669099756691e-05, "loss": 0.6964, "step": 20838 }, { "epoch": 0.6084201921111793, "grad_norm": 0.553415330247414, "learning_rate": 2.175506893755069e-05, "loss": 0.6429, "step": 20839 }, { "epoch": 0.6084493883390266, "grad_norm": 0.50712722833895, "learning_rate": 2.1753446877534468e-05, "loss": 0.5686, "step": 20840 }, { "epoch": 0.608478584566874, "grad_norm": 0.5178883586545154, "learning_rate": 2.175182481751825e-05, "loss": 0.6518, "step": 20841 }, { "epoch": 0.6085077807947213, "grad_norm": 0.4912255255210308, "learning_rate": 2.1750202757502028e-05, "loss": 0.5483, "step": 20842 }, { "epoch": 0.6085369770225687, "grad_norm": 0.5058549851369284, "learning_rate": 2.1748580697485807e-05, "loss": 0.5559, "step": 20843 }, { "epoch": 0.6085661732504161, "grad_norm": 0.5416313488108433, "learning_rate": 2.1746958637469585e-05, "loss": 0.6241, "step": 20844 }, { "epoch": 0.6085953694782634, "grad_norm": 0.5720322704911648, "learning_rate": 2.1745336577453367e-05, "loss": 0.7366, "step": 20845 }, { "epoch": 0.6086245657061108, "grad_norm": 0.5400752936691438, "learning_rate": 2.174371451743715e-05, "loss": 0.6247, "step": 20846 }, { "epoch": 0.6086537619339581, "grad_norm": 0.5186416904093291, "learning_rate": 2.1742092457420927e-05, "loss": 0.5537, "step": 20847 }, { "epoch": 0.6086829581618055, "grad_norm": 0.5604654166875553, "learning_rate": 2.1740470397404705e-05, "loss": 0.7163, "step": 20848 }, { "epoch": 0.6087121543896529, "grad_norm": 0.5762960321417121, "learning_rate": 2.1738848337388484e-05, "loss": 0.6762, "step": 20849 }, { "epoch": 0.6087413506175002, "grad_norm": 0.5217648566023385, "learning_rate": 2.1737226277372262e-05, "loss": 0.6231, "step": 20850 }, { "epoch": 0.6087705468453476, "grad_norm": 0.5085058173767424, "learning_rate": 2.1735604217356044e-05, "loss": 0.5756, "step": 20851 }, { "epoch": 0.608799743073195, "grad_norm": 0.5009352464617416, "learning_rate": 2.1733982157339822e-05, "loss": 0.5737, "step": 20852 }, { "epoch": 0.6088289393010423, "grad_norm": 0.5135497491851032, "learning_rate": 2.17323600973236e-05, "loss": 0.5778, "step": 20853 }, { "epoch": 0.6088581355288897, "grad_norm": 0.5235601048708406, "learning_rate": 2.1730738037307382e-05, "loss": 0.5956, "step": 20854 }, { "epoch": 0.608887331756737, "grad_norm": 0.4950760871330473, "learning_rate": 2.172911597729116e-05, "loss": 0.5423, "step": 20855 }, { "epoch": 0.6089165279845844, "grad_norm": 0.5209454169177632, "learning_rate": 2.1727493917274942e-05, "loss": 0.5775, "step": 20856 }, { "epoch": 0.6089457242124318, "grad_norm": 0.5187248340615079, "learning_rate": 2.172587185725872e-05, "loss": 0.6192, "step": 20857 }, { "epoch": 0.6089749204402791, "grad_norm": 0.5591166109338617, "learning_rate": 2.17242497972425e-05, "loss": 0.6637, "step": 20858 }, { "epoch": 0.6090041166681265, "grad_norm": 0.5459471245508412, "learning_rate": 2.1722627737226278e-05, "loss": 0.6766, "step": 20859 }, { "epoch": 0.6090333128959738, "grad_norm": 0.46437369187957095, "learning_rate": 2.1721005677210056e-05, "loss": 0.4765, "step": 20860 }, { "epoch": 0.6090625091238212, "grad_norm": 0.505594099881041, "learning_rate": 2.1719383617193838e-05, "loss": 0.5404, "step": 20861 }, { "epoch": 0.6090917053516686, "grad_norm": 0.5417521242067966, "learning_rate": 2.1717761557177616e-05, "loss": 0.6422, "step": 20862 }, { "epoch": 0.6091209015795159, "grad_norm": 0.5528981331184465, "learning_rate": 2.1716139497161398e-05, "loss": 0.6101, "step": 20863 }, { "epoch": 0.6091500978073633, "grad_norm": 0.5200915140730475, "learning_rate": 2.1714517437145176e-05, "loss": 0.6009, "step": 20864 }, { "epoch": 0.6091792940352107, "grad_norm": 0.5550494241528597, "learning_rate": 2.1712895377128955e-05, "loss": 0.6727, "step": 20865 }, { "epoch": 0.609208490263058, "grad_norm": 0.49705776494982856, "learning_rate": 2.1711273317112736e-05, "loss": 0.569, "step": 20866 }, { "epoch": 0.6092376864909054, "grad_norm": 0.46975568514877214, "learning_rate": 2.1709651257096515e-05, "loss": 0.5106, "step": 20867 }, { "epoch": 0.6092668827187527, "grad_norm": 0.5587758958388328, "learning_rate": 2.1708029197080293e-05, "loss": 0.6529, "step": 20868 }, { "epoch": 0.6092960789466001, "grad_norm": 0.5182230398507435, "learning_rate": 2.170640713706407e-05, "loss": 0.5939, "step": 20869 }, { "epoch": 0.6093252751744475, "grad_norm": 0.5308918597691222, "learning_rate": 2.170478507704785e-05, "loss": 0.6074, "step": 20870 }, { "epoch": 0.6093544714022948, "grad_norm": 0.50866091277341, "learning_rate": 2.170316301703163e-05, "loss": 0.5769, "step": 20871 }, { "epoch": 0.6093836676301422, "grad_norm": 0.5056977446317948, "learning_rate": 2.170154095701541e-05, "loss": 0.5761, "step": 20872 }, { "epoch": 0.6094128638579895, "grad_norm": 0.5367534659618857, "learning_rate": 2.1699918896999192e-05, "loss": 0.6326, "step": 20873 }, { "epoch": 0.6094420600858369, "grad_norm": 0.5012526392182244, "learning_rate": 2.169829683698297e-05, "loss": 0.5753, "step": 20874 }, { "epoch": 0.6094712563136843, "grad_norm": 0.4759977007601913, "learning_rate": 2.169667477696675e-05, "loss": 0.5662, "step": 20875 }, { "epoch": 0.6095004525415316, "grad_norm": 0.540847098131963, "learning_rate": 2.169505271695053e-05, "loss": 0.6622, "step": 20876 }, { "epoch": 0.609529648769379, "grad_norm": 0.4964164679090471, "learning_rate": 2.169343065693431e-05, "loss": 0.5788, "step": 20877 }, { "epoch": 0.6095588449972263, "grad_norm": 0.5167105100790825, "learning_rate": 2.1691808596918087e-05, "loss": 0.5838, "step": 20878 }, { "epoch": 0.6095880412250737, "grad_norm": 0.4952716282282872, "learning_rate": 2.1690186536901865e-05, "loss": 0.5742, "step": 20879 }, { "epoch": 0.6096172374529211, "grad_norm": 0.5076594806976584, "learning_rate": 2.1688564476885644e-05, "loss": 0.5556, "step": 20880 }, { "epoch": 0.6096464336807684, "grad_norm": 0.5246110388335343, "learning_rate": 2.1686942416869425e-05, "loss": 0.6135, "step": 20881 }, { "epoch": 0.6096756299086158, "grad_norm": 0.5459928640729103, "learning_rate": 2.1685320356853207e-05, "loss": 0.6772, "step": 20882 }, { "epoch": 0.6097048261364632, "grad_norm": 0.5273331004602798, "learning_rate": 2.1683698296836986e-05, "loss": 0.5844, "step": 20883 }, { "epoch": 0.6097340223643105, "grad_norm": 0.503378918658205, "learning_rate": 2.1682076236820764e-05, "loss": 0.5564, "step": 20884 }, { "epoch": 0.6097632185921579, "grad_norm": 0.5010320178734907, "learning_rate": 2.1680454176804542e-05, "loss": 0.5283, "step": 20885 }, { "epoch": 0.6097924148200052, "grad_norm": 0.5036208896323935, "learning_rate": 2.167883211678832e-05, "loss": 0.5639, "step": 20886 }, { "epoch": 0.6098216110478526, "grad_norm": 0.5484422975004414, "learning_rate": 2.1677210056772103e-05, "loss": 0.6654, "step": 20887 }, { "epoch": 0.6098508072757, "grad_norm": 0.569087170480209, "learning_rate": 2.167558799675588e-05, "loss": 0.6782, "step": 20888 }, { "epoch": 0.6098800035035473, "grad_norm": 0.5532388693339133, "learning_rate": 2.167396593673966e-05, "loss": 0.6773, "step": 20889 }, { "epoch": 0.6099091997313947, "grad_norm": 0.5323512894625937, "learning_rate": 2.1672343876723438e-05, "loss": 0.6335, "step": 20890 }, { "epoch": 0.609938395959242, "grad_norm": 0.5144371234612983, "learning_rate": 2.167072181670722e-05, "loss": 0.5921, "step": 20891 }, { "epoch": 0.6099675921870894, "grad_norm": 0.49984660944029785, "learning_rate": 2.1669099756691e-05, "loss": 0.5431, "step": 20892 }, { "epoch": 0.6099967884149368, "grad_norm": 0.5278531850842574, "learning_rate": 2.166747769667478e-05, "loss": 0.6355, "step": 20893 }, { "epoch": 0.6100259846427841, "grad_norm": 0.523715562678818, "learning_rate": 2.1665855636658558e-05, "loss": 0.6099, "step": 20894 }, { "epoch": 0.6100551808706315, "grad_norm": 0.5683188551515291, "learning_rate": 2.1664233576642336e-05, "loss": 0.6866, "step": 20895 }, { "epoch": 0.6100843770984788, "grad_norm": 0.5268997008793324, "learning_rate": 2.1662611516626115e-05, "loss": 0.5524, "step": 20896 }, { "epoch": 0.6101135733263262, "grad_norm": 0.5154900952892382, "learning_rate": 2.1660989456609896e-05, "loss": 0.5914, "step": 20897 }, { "epoch": 0.6101427695541736, "grad_norm": 0.5134273144454128, "learning_rate": 2.1659367396593675e-05, "loss": 0.5754, "step": 20898 }, { "epoch": 0.6101719657820209, "grad_norm": 0.5126998438042454, "learning_rate": 2.1657745336577453e-05, "loss": 0.6009, "step": 20899 }, { "epoch": 0.6102011620098683, "grad_norm": 0.5036461454843983, "learning_rate": 2.165612327656123e-05, "loss": 0.6022, "step": 20900 }, { "epoch": 0.6102303582377157, "grad_norm": 0.527427950838443, "learning_rate": 2.1654501216545013e-05, "loss": 0.6265, "step": 20901 }, { "epoch": 0.610259554465563, "grad_norm": 0.544534501138116, "learning_rate": 2.1652879156528795e-05, "loss": 0.6489, "step": 20902 }, { "epoch": 0.6102887506934104, "grad_norm": 0.5327452487049477, "learning_rate": 2.1651257096512573e-05, "loss": 0.6681, "step": 20903 }, { "epoch": 0.6103179469212577, "grad_norm": 0.5226253079291265, "learning_rate": 2.1649635036496352e-05, "loss": 0.5891, "step": 20904 }, { "epoch": 0.6103471431491051, "grad_norm": 0.5096105456479845, "learning_rate": 2.164801297648013e-05, "loss": 0.6206, "step": 20905 }, { "epoch": 0.6103763393769525, "grad_norm": 0.5138602543859124, "learning_rate": 2.164639091646391e-05, "loss": 0.5698, "step": 20906 }, { "epoch": 0.6104055356047998, "grad_norm": 0.5447701462922191, "learning_rate": 2.164476885644769e-05, "loss": 0.6161, "step": 20907 }, { "epoch": 0.6104347318326472, "grad_norm": 0.5345857579097413, "learning_rate": 2.164314679643147e-05, "loss": 0.6241, "step": 20908 }, { "epoch": 0.6104639280604945, "grad_norm": 0.5514191877206096, "learning_rate": 2.1641524736415247e-05, "loss": 0.667, "step": 20909 }, { "epoch": 0.6104931242883419, "grad_norm": 0.575609448837302, "learning_rate": 2.1639902676399025e-05, "loss": 0.6778, "step": 20910 }, { "epoch": 0.6105223205161893, "grad_norm": 0.5466918406947723, "learning_rate": 2.1638280616382807e-05, "loss": 0.6354, "step": 20911 }, { "epoch": 0.6105515167440366, "grad_norm": 0.5377674854925641, "learning_rate": 2.163665855636659e-05, "loss": 0.6507, "step": 20912 }, { "epoch": 0.610580712971884, "grad_norm": 0.5155993183435604, "learning_rate": 2.1635036496350367e-05, "loss": 0.5991, "step": 20913 }, { "epoch": 0.6106099091997313, "grad_norm": 0.5321169096273586, "learning_rate": 2.1633414436334146e-05, "loss": 0.6123, "step": 20914 }, { "epoch": 0.6106391054275787, "grad_norm": 0.5311329195540759, "learning_rate": 2.1631792376317924e-05, "loss": 0.6211, "step": 20915 }, { "epoch": 0.6106683016554261, "grad_norm": 0.5487657323872964, "learning_rate": 2.1630170316301702e-05, "loss": 0.6474, "step": 20916 }, { "epoch": 0.6106974978832734, "grad_norm": 0.5126499161451724, "learning_rate": 2.1628548256285484e-05, "loss": 0.5751, "step": 20917 }, { "epoch": 0.6107266941111208, "grad_norm": 0.5786700573503183, "learning_rate": 2.1626926196269263e-05, "loss": 0.7264, "step": 20918 }, { "epoch": 0.6107558903389682, "grad_norm": 0.512610760930007, "learning_rate": 2.162530413625304e-05, "loss": 0.5703, "step": 20919 }, { "epoch": 0.6107850865668155, "grad_norm": 0.49728075753124057, "learning_rate": 2.1623682076236823e-05, "loss": 0.567, "step": 20920 }, { "epoch": 0.6108142827946629, "grad_norm": 0.52921138138824, "learning_rate": 2.16220600162206e-05, "loss": 0.6305, "step": 20921 }, { "epoch": 0.6108434790225102, "grad_norm": 0.5681257071938461, "learning_rate": 2.1620437956204383e-05, "loss": 0.7195, "step": 20922 }, { "epoch": 0.6108726752503576, "grad_norm": 0.5436970987408991, "learning_rate": 2.161881589618816e-05, "loss": 0.6355, "step": 20923 }, { "epoch": 0.610901871478205, "grad_norm": 0.5354714422426251, "learning_rate": 2.161719383617194e-05, "loss": 0.6375, "step": 20924 }, { "epoch": 0.6109310677060524, "grad_norm": 0.5373161053955693, "learning_rate": 2.1615571776155718e-05, "loss": 0.6222, "step": 20925 }, { "epoch": 0.6109602639338998, "grad_norm": 0.4979000894745661, "learning_rate": 2.1613949716139496e-05, "loss": 0.5536, "step": 20926 }, { "epoch": 0.6109894601617472, "grad_norm": 0.47741020576532955, "learning_rate": 2.1612327656123278e-05, "loss": 0.553, "step": 20927 }, { "epoch": 0.6110186563895945, "grad_norm": 0.5337418265783989, "learning_rate": 2.1610705596107057e-05, "loss": 0.6427, "step": 20928 }, { "epoch": 0.6110478526174419, "grad_norm": 0.5218115229712059, "learning_rate": 2.1609083536090835e-05, "loss": 0.6155, "step": 20929 }, { "epoch": 0.6110770488452892, "grad_norm": 0.5345405896986507, "learning_rate": 2.1607461476074617e-05, "loss": 0.6235, "step": 20930 }, { "epoch": 0.6111062450731366, "grad_norm": 0.5072170690289838, "learning_rate": 2.1605839416058395e-05, "loss": 0.5746, "step": 20931 }, { "epoch": 0.611135441300984, "grad_norm": 0.46404299402485005, "learning_rate": 2.1604217356042177e-05, "loss": 0.4618, "step": 20932 }, { "epoch": 0.6111646375288313, "grad_norm": 0.5329092567343319, "learning_rate": 2.1602595296025955e-05, "loss": 0.6175, "step": 20933 }, { "epoch": 0.6111938337566787, "grad_norm": 0.5315449281336471, "learning_rate": 2.1600973236009734e-05, "loss": 0.6292, "step": 20934 }, { "epoch": 0.611223029984526, "grad_norm": 0.5013589537204451, "learning_rate": 2.1599351175993512e-05, "loss": 0.5898, "step": 20935 }, { "epoch": 0.6112522262123734, "grad_norm": 0.5387529944990119, "learning_rate": 2.159772911597729e-05, "loss": 0.548, "step": 20936 }, { "epoch": 0.6112814224402208, "grad_norm": 0.5013761452463831, "learning_rate": 2.1596107055961072e-05, "loss": 0.5531, "step": 20937 }, { "epoch": 0.6113106186680681, "grad_norm": 0.5539683388902431, "learning_rate": 2.159448499594485e-05, "loss": 0.6565, "step": 20938 }, { "epoch": 0.6113398148959155, "grad_norm": 0.5275746258957814, "learning_rate": 2.1592862935928632e-05, "loss": 0.5784, "step": 20939 }, { "epoch": 0.6113690111237629, "grad_norm": 0.5185511106089621, "learning_rate": 2.159124087591241e-05, "loss": 0.6057, "step": 20940 }, { "epoch": 0.6113982073516102, "grad_norm": 0.5214408538989304, "learning_rate": 2.158961881589619e-05, "loss": 0.59, "step": 20941 }, { "epoch": 0.6114274035794576, "grad_norm": 0.4822666634654532, "learning_rate": 2.1587996755879967e-05, "loss": 0.5223, "step": 20942 }, { "epoch": 0.6114565998073049, "grad_norm": 0.46852976701022114, "learning_rate": 2.158637469586375e-05, "loss": 0.489, "step": 20943 }, { "epoch": 0.6114857960351523, "grad_norm": 0.5094107960545248, "learning_rate": 2.1584752635847527e-05, "loss": 0.5845, "step": 20944 }, { "epoch": 0.6115149922629997, "grad_norm": 0.49448835195898294, "learning_rate": 2.1583130575831306e-05, "loss": 0.5668, "step": 20945 }, { "epoch": 0.611544188490847, "grad_norm": 0.5004699175008663, "learning_rate": 2.1581508515815084e-05, "loss": 0.5582, "step": 20946 }, { "epoch": 0.6115733847186944, "grad_norm": 0.4981279246081383, "learning_rate": 2.1579886455798866e-05, "loss": 0.5417, "step": 20947 }, { "epoch": 0.6116025809465417, "grad_norm": 0.5186741314753225, "learning_rate": 2.1578264395782648e-05, "loss": 0.606, "step": 20948 }, { "epoch": 0.6116317771743891, "grad_norm": 0.5460346107409321, "learning_rate": 2.1576642335766426e-05, "loss": 0.6525, "step": 20949 }, { "epoch": 0.6116609734022365, "grad_norm": 0.5628920615999343, "learning_rate": 2.1575020275750204e-05, "loss": 0.6785, "step": 20950 }, { "epoch": 0.6116901696300838, "grad_norm": 0.5103497301027032, "learning_rate": 2.1573398215733983e-05, "loss": 0.5642, "step": 20951 }, { "epoch": 0.6117193658579312, "grad_norm": 0.5022198152937586, "learning_rate": 2.157177615571776e-05, "loss": 0.5586, "step": 20952 }, { "epoch": 0.6117485620857785, "grad_norm": 0.5399617476490659, "learning_rate": 2.1570154095701543e-05, "loss": 0.6403, "step": 20953 }, { "epoch": 0.6117777583136259, "grad_norm": 0.51624811870439, "learning_rate": 2.156853203568532e-05, "loss": 0.6075, "step": 20954 }, { "epoch": 0.6118069545414733, "grad_norm": 0.5487690410716605, "learning_rate": 2.15669099756691e-05, "loss": 0.6437, "step": 20955 }, { "epoch": 0.6118361507693206, "grad_norm": 0.5257529173766825, "learning_rate": 2.1565287915652878e-05, "loss": 0.6529, "step": 20956 }, { "epoch": 0.611865346997168, "grad_norm": 0.5362698718353779, "learning_rate": 2.156366585563666e-05, "loss": 0.6376, "step": 20957 }, { "epoch": 0.6118945432250154, "grad_norm": 0.49397309545461654, "learning_rate": 2.156204379562044e-05, "loss": 0.5164, "step": 20958 }, { "epoch": 0.6119237394528627, "grad_norm": 0.5314449384117867, "learning_rate": 2.156042173560422e-05, "loss": 0.6023, "step": 20959 }, { "epoch": 0.6119529356807101, "grad_norm": 0.5005147807237456, "learning_rate": 2.1558799675588e-05, "loss": 0.5716, "step": 20960 }, { "epoch": 0.6119821319085574, "grad_norm": 0.51340133344184, "learning_rate": 2.1557177615571777e-05, "loss": 0.5771, "step": 20961 }, { "epoch": 0.6120113281364048, "grad_norm": 0.5300329789187073, "learning_rate": 2.1555555555555555e-05, "loss": 0.611, "step": 20962 }, { "epoch": 0.6120405243642522, "grad_norm": 0.5045447866575722, "learning_rate": 2.1553933495539337e-05, "loss": 0.576, "step": 20963 }, { "epoch": 0.6120697205920995, "grad_norm": 0.5074487757473334, "learning_rate": 2.1552311435523115e-05, "loss": 0.5804, "step": 20964 }, { "epoch": 0.6120989168199469, "grad_norm": 0.5071163762649008, "learning_rate": 2.1550689375506894e-05, "loss": 0.5651, "step": 20965 }, { "epoch": 0.6121281130477942, "grad_norm": 0.4970842722630466, "learning_rate": 2.1549067315490672e-05, "loss": 0.5802, "step": 20966 }, { "epoch": 0.6121573092756416, "grad_norm": 0.5310816115082352, "learning_rate": 2.1547445255474454e-05, "loss": 0.6323, "step": 20967 }, { "epoch": 0.612186505503489, "grad_norm": 0.5428059662621441, "learning_rate": 2.1545823195458236e-05, "loss": 0.6, "step": 20968 }, { "epoch": 0.6122157017313363, "grad_norm": 0.5363416185585825, "learning_rate": 2.1544201135442014e-05, "loss": 0.6286, "step": 20969 }, { "epoch": 0.6122448979591837, "grad_norm": 0.4910682572423027, "learning_rate": 2.1542579075425792e-05, "loss": 0.5791, "step": 20970 }, { "epoch": 0.612274094187031, "grad_norm": 0.54001361052242, "learning_rate": 2.154095701540957e-05, "loss": 0.6956, "step": 20971 }, { "epoch": 0.6123032904148784, "grad_norm": 0.5292126234184958, "learning_rate": 2.153933495539335e-05, "loss": 0.5873, "step": 20972 }, { "epoch": 0.6123324866427258, "grad_norm": 0.5207268833775861, "learning_rate": 2.153771289537713e-05, "loss": 0.6149, "step": 20973 }, { "epoch": 0.6123616828705731, "grad_norm": 0.5385948379666852, "learning_rate": 2.153609083536091e-05, "loss": 0.6054, "step": 20974 }, { "epoch": 0.6123908790984205, "grad_norm": 0.5103956014532984, "learning_rate": 2.1534468775344688e-05, "loss": 0.6239, "step": 20975 }, { "epoch": 0.6124200753262679, "grad_norm": 0.537190605202964, "learning_rate": 2.1532846715328466e-05, "loss": 0.6756, "step": 20976 }, { "epoch": 0.6124492715541152, "grad_norm": 0.5033997099929822, "learning_rate": 2.1531224655312248e-05, "loss": 0.6082, "step": 20977 }, { "epoch": 0.6124784677819626, "grad_norm": 0.5549341464072528, "learning_rate": 2.152960259529603e-05, "loss": 0.676, "step": 20978 }, { "epoch": 0.6125076640098099, "grad_norm": 0.5144825582872201, "learning_rate": 2.1527980535279808e-05, "loss": 0.5644, "step": 20979 }, { "epoch": 0.6125368602376573, "grad_norm": 0.49161283452347315, "learning_rate": 2.1526358475263586e-05, "loss": 0.5896, "step": 20980 }, { "epoch": 0.6125660564655047, "grad_norm": 0.495328070195822, "learning_rate": 2.1524736415247365e-05, "loss": 0.5365, "step": 20981 }, { "epoch": 0.612595252693352, "grad_norm": 0.5676000769053589, "learning_rate": 2.1523114355231143e-05, "loss": 0.7004, "step": 20982 }, { "epoch": 0.6126244489211994, "grad_norm": 0.5399251665446002, "learning_rate": 2.1521492295214925e-05, "loss": 0.6457, "step": 20983 }, { "epoch": 0.6126536451490467, "grad_norm": 0.5192111630297338, "learning_rate": 2.1519870235198703e-05, "loss": 0.6061, "step": 20984 }, { "epoch": 0.6126828413768941, "grad_norm": 0.55266577737453, "learning_rate": 2.151824817518248e-05, "loss": 0.6874, "step": 20985 }, { "epoch": 0.6127120376047415, "grad_norm": 0.5295761043935326, "learning_rate": 2.1516626115166263e-05, "loss": 0.6425, "step": 20986 }, { "epoch": 0.6127412338325888, "grad_norm": 0.5277601150243865, "learning_rate": 2.151500405515004e-05, "loss": 0.6326, "step": 20987 }, { "epoch": 0.6127704300604362, "grad_norm": 0.4996747653667977, "learning_rate": 2.1513381995133823e-05, "loss": 0.5804, "step": 20988 }, { "epoch": 0.6127996262882835, "grad_norm": 0.5266699926622965, "learning_rate": 2.15117599351176e-05, "loss": 0.6153, "step": 20989 }, { "epoch": 0.6128288225161309, "grad_norm": 0.5281733650285108, "learning_rate": 2.151013787510138e-05, "loss": 0.5697, "step": 20990 }, { "epoch": 0.6128580187439783, "grad_norm": 0.4939585373108027, "learning_rate": 2.150851581508516e-05, "loss": 0.5256, "step": 20991 }, { "epoch": 0.6128872149718256, "grad_norm": 0.5929726151482188, "learning_rate": 2.1506893755068937e-05, "loss": 0.719, "step": 20992 }, { "epoch": 0.612916411199673, "grad_norm": 0.5812148915022575, "learning_rate": 2.150527169505272e-05, "loss": 0.6407, "step": 20993 }, { "epoch": 0.6129456074275204, "grad_norm": 0.5010631149837863, "learning_rate": 2.1503649635036497e-05, "loss": 0.5623, "step": 20994 }, { "epoch": 0.6129748036553677, "grad_norm": 0.5097369275014799, "learning_rate": 2.1502027575020275e-05, "loss": 0.5724, "step": 20995 }, { "epoch": 0.6130039998832151, "grad_norm": 0.5349511283843869, "learning_rate": 2.1500405515004057e-05, "loss": 0.5978, "step": 20996 }, { "epoch": 0.6130331961110624, "grad_norm": 0.5171850551858692, "learning_rate": 2.1498783454987835e-05, "loss": 0.5625, "step": 20997 }, { "epoch": 0.6130623923389098, "grad_norm": 0.5262674433647327, "learning_rate": 2.1497161394971614e-05, "loss": 0.6027, "step": 20998 }, { "epoch": 0.6130915885667572, "grad_norm": 0.5117886795245761, "learning_rate": 2.1495539334955396e-05, "loss": 0.6227, "step": 20999 }, { "epoch": 0.6131207847946045, "grad_norm": 0.5269162495623255, "learning_rate": 2.1493917274939174e-05, "loss": 0.5922, "step": 21000 }, { "epoch": 0.6131499810224519, "grad_norm": 0.5078399902346782, "learning_rate": 2.1492295214922952e-05, "loss": 0.5609, "step": 21001 }, { "epoch": 0.6131791772502992, "grad_norm": 0.5306838554349809, "learning_rate": 2.149067315490673e-05, "loss": 0.6574, "step": 21002 }, { "epoch": 0.6132083734781466, "grad_norm": 0.5314942692274488, "learning_rate": 2.1489051094890512e-05, "loss": 0.6302, "step": 21003 }, { "epoch": 0.613237569705994, "grad_norm": 0.5371309375653632, "learning_rate": 2.148742903487429e-05, "loss": 0.6109, "step": 21004 }, { "epoch": 0.6132667659338413, "grad_norm": 0.4792096495652128, "learning_rate": 2.1485806974858073e-05, "loss": 0.5255, "step": 21005 }, { "epoch": 0.6132959621616887, "grad_norm": 0.535000553038276, "learning_rate": 2.148418491484185e-05, "loss": 0.6393, "step": 21006 }, { "epoch": 0.613325158389536, "grad_norm": 0.48997047426784496, "learning_rate": 2.148256285482563e-05, "loss": 0.5181, "step": 21007 }, { "epoch": 0.6133543546173834, "grad_norm": 0.5383167424783919, "learning_rate": 2.1480940794809408e-05, "loss": 0.5995, "step": 21008 }, { "epoch": 0.6133835508452308, "grad_norm": 0.551622953387041, "learning_rate": 2.147931873479319e-05, "loss": 0.6186, "step": 21009 }, { "epoch": 0.6134127470730781, "grad_norm": 0.5137366121665273, "learning_rate": 2.1477696674776968e-05, "loss": 0.5816, "step": 21010 }, { "epoch": 0.6134419433009255, "grad_norm": 0.538241320940237, "learning_rate": 2.1476074614760746e-05, "loss": 0.5764, "step": 21011 }, { "epoch": 0.6134711395287729, "grad_norm": 0.5395973545583613, "learning_rate": 2.1474452554744525e-05, "loss": 0.6052, "step": 21012 }, { "epoch": 0.6135003357566202, "grad_norm": 0.5628519373162573, "learning_rate": 2.1472830494728306e-05, "loss": 0.6756, "step": 21013 }, { "epoch": 0.6135295319844676, "grad_norm": 0.5424290427568712, "learning_rate": 2.1471208434712088e-05, "loss": 0.5961, "step": 21014 }, { "epoch": 0.6135587282123149, "grad_norm": 0.5237763417154678, "learning_rate": 2.1469586374695867e-05, "loss": 0.5976, "step": 21015 }, { "epoch": 0.6135879244401623, "grad_norm": 0.524335031668363, "learning_rate": 2.1467964314679645e-05, "loss": 0.5854, "step": 21016 }, { "epoch": 0.6136171206680097, "grad_norm": 0.5232534868832706, "learning_rate": 2.1466342254663423e-05, "loss": 0.6041, "step": 21017 }, { "epoch": 0.613646316895857, "grad_norm": 0.5020222816756886, "learning_rate": 2.14647201946472e-05, "loss": 0.5947, "step": 21018 }, { "epoch": 0.6136755131237044, "grad_norm": 0.49737231006123966, "learning_rate": 2.1463098134630983e-05, "loss": 0.5444, "step": 21019 }, { "epoch": 0.6137047093515517, "grad_norm": 0.5430702492755425, "learning_rate": 2.1461476074614762e-05, "loss": 0.6536, "step": 21020 }, { "epoch": 0.6137339055793991, "grad_norm": 0.5355061604415735, "learning_rate": 2.145985401459854e-05, "loss": 0.6324, "step": 21021 }, { "epoch": 0.6137631018072465, "grad_norm": 0.5514800724775698, "learning_rate": 2.145823195458232e-05, "loss": 0.6483, "step": 21022 }, { "epoch": 0.6137922980350938, "grad_norm": 0.4952466234220285, "learning_rate": 2.14566098945661e-05, "loss": 0.582, "step": 21023 }, { "epoch": 0.6138214942629412, "grad_norm": 0.5408812218939586, "learning_rate": 2.1454987834549882e-05, "loss": 0.6503, "step": 21024 }, { "epoch": 0.6138506904907886, "grad_norm": 0.5299398167577289, "learning_rate": 2.145336577453366e-05, "loss": 0.5769, "step": 21025 }, { "epoch": 0.6138798867186359, "grad_norm": 0.5438194115353068, "learning_rate": 2.145174371451744e-05, "loss": 0.6586, "step": 21026 }, { "epoch": 0.6139090829464833, "grad_norm": 0.5234309229957778, "learning_rate": 2.1450121654501217e-05, "loss": 0.626, "step": 21027 }, { "epoch": 0.6139382791743306, "grad_norm": 0.5284616823132681, "learning_rate": 2.1448499594484996e-05, "loss": 0.5996, "step": 21028 }, { "epoch": 0.613967475402178, "grad_norm": 0.5265413057579477, "learning_rate": 2.1446877534468777e-05, "loss": 0.6067, "step": 21029 }, { "epoch": 0.6139966716300254, "grad_norm": 0.5052530642269013, "learning_rate": 2.1445255474452556e-05, "loss": 0.5998, "step": 21030 }, { "epoch": 0.6140258678578727, "grad_norm": 0.5521925377230285, "learning_rate": 2.1443633414436334e-05, "loss": 0.6908, "step": 21031 }, { "epoch": 0.6140550640857201, "grad_norm": 0.4996832132847174, "learning_rate": 2.1442011354420112e-05, "loss": 0.6062, "step": 21032 }, { "epoch": 0.6140842603135674, "grad_norm": 0.5307417129673411, "learning_rate": 2.1440389294403894e-05, "loss": 0.6186, "step": 21033 }, { "epoch": 0.6141134565414148, "grad_norm": 0.4941446075069792, "learning_rate": 2.1438767234387676e-05, "loss": 0.5469, "step": 21034 }, { "epoch": 0.6141426527692622, "grad_norm": 0.6183895533602717, "learning_rate": 2.1437145174371454e-05, "loss": 0.6714, "step": 21035 }, { "epoch": 0.6141718489971095, "grad_norm": 0.534645607625916, "learning_rate": 2.1435523114355233e-05, "loss": 0.6546, "step": 21036 }, { "epoch": 0.6142010452249569, "grad_norm": 0.5335386018808272, "learning_rate": 2.143390105433901e-05, "loss": 0.6408, "step": 21037 }, { "epoch": 0.6142302414528042, "grad_norm": 0.5015468765172987, "learning_rate": 2.143227899432279e-05, "loss": 0.5801, "step": 21038 }, { "epoch": 0.6142594376806516, "grad_norm": 0.5240339137253981, "learning_rate": 2.143065693430657e-05, "loss": 0.5587, "step": 21039 }, { "epoch": 0.614288633908499, "grad_norm": 0.5180682076581912, "learning_rate": 2.142903487429035e-05, "loss": 0.6006, "step": 21040 }, { "epoch": 0.6143178301363463, "grad_norm": 0.5116551928450795, "learning_rate": 2.1427412814274128e-05, "loss": 0.5629, "step": 21041 }, { "epoch": 0.6143470263641937, "grad_norm": 0.5114906627581042, "learning_rate": 2.1425790754257906e-05, "loss": 0.5748, "step": 21042 }, { "epoch": 0.614376222592041, "grad_norm": 0.5176916736949807, "learning_rate": 2.1424168694241688e-05, "loss": 0.6004, "step": 21043 }, { "epoch": 0.6144054188198884, "grad_norm": 0.5390041657385656, "learning_rate": 2.142254663422547e-05, "loss": 0.6435, "step": 21044 }, { "epoch": 0.6144346150477358, "grad_norm": 0.5056248868015794, "learning_rate": 2.1420924574209248e-05, "loss": 0.5884, "step": 21045 }, { "epoch": 0.6144638112755832, "grad_norm": 0.5142201185173414, "learning_rate": 2.1419302514193027e-05, "loss": 0.6168, "step": 21046 }, { "epoch": 0.6144930075034306, "grad_norm": 0.5266803601710571, "learning_rate": 2.1417680454176805e-05, "loss": 0.6279, "step": 21047 }, { "epoch": 0.614522203731278, "grad_norm": 0.5083185567563149, "learning_rate": 2.1416058394160583e-05, "loss": 0.5372, "step": 21048 }, { "epoch": 0.6145513999591253, "grad_norm": 0.5404417767953971, "learning_rate": 2.1414436334144365e-05, "loss": 0.5125, "step": 21049 }, { "epoch": 0.6145805961869727, "grad_norm": 0.5448449109170848, "learning_rate": 2.1412814274128143e-05, "loss": 0.6847, "step": 21050 }, { "epoch": 0.61460979241482, "grad_norm": 0.566961050887407, "learning_rate": 2.1411192214111922e-05, "loss": 0.6555, "step": 21051 }, { "epoch": 0.6146389886426674, "grad_norm": 0.494804848928366, "learning_rate": 2.1409570154095704e-05, "loss": 0.5513, "step": 21052 }, { "epoch": 0.6146681848705148, "grad_norm": 0.5225928027173458, "learning_rate": 2.1407948094079482e-05, "loss": 0.6107, "step": 21053 }, { "epoch": 0.6146973810983621, "grad_norm": 0.5389001750941654, "learning_rate": 2.1406326034063264e-05, "loss": 0.6352, "step": 21054 }, { "epoch": 0.6147265773262095, "grad_norm": 0.5091030811030003, "learning_rate": 2.1404703974047042e-05, "loss": 0.5884, "step": 21055 }, { "epoch": 0.6147557735540569, "grad_norm": 0.5453700095363744, "learning_rate": 2.140308191403082e-05, "loss": 0.6157, "step": 21056 }, { "epoch": 0.6147849697819042, "grad_norm": 0.4865643691729383, "learning_rate": 2.14014598540146e-05, "loss": 0.5603, "step": 21057 }, { "epoch": 0.6148141660097516, "grad_norm": 0.5203649084693365, "learning_rate": 2.1399837793998377e-05, "loss": 0.6125, "step": 21058 }, { "epoch": 0.614843362237599, "grad_norm": 0.48280440458611623, "learning_rate": 2.139821573398216e-05, "loss": 0.5483, "step": 21059 }, { "epoch": 0.6148725584654463, "grad_norm": 0.5917741745204667, "learning_rate": 2.1396593673965937e-05, "loss": 0.701, "step": 21060 }, { "epoch": 0.6149017546932937, "grad_norm": 0.48516375998598704, "learning_rate": 2.1394971613949716e-05, "loss": 0.5592, "step": 21061 }, { "epoch": 0.614930950921141, "grad_norm": 0.5261305933558311, "learning_rate": 2.1393349553933498e-05, "loss": 0.6613, "step": 21062 }, { "epoch": 0.6149601471489884, "grad_norm": 0.49638924912868204, "learning_rate": 2.1391727493917276e-05, "loss": 0.5995, "step": 21063 }, { "epoch": 0.6149893433768358, "grad_norm": 0.5550895206109104, "learning_rate": 2.1390105433901054e-05, "loss": 0.64, "step": 21064 }, { "epoch": 0.6150185396046831, "grad_norm": 0.5237936766743144, "learning_rate": 2.1388483373884836e-05, "loss": 0.6018, "step": 21065 }, { "epoch": 0.6150477358325305, "grad_norm": 0.48534578317104154, "learning_rate": 2.1386861313868614e-05, "loss": 0.578, "step": 21066 }, { "epoch": 0.6150769320603778, "grad_norm": 0.47591196931436225, "learning_rate": 2.1385239253852393e-05, "loss": 0.5024, "step": 21067 }, { "epoch": 0.6151061282882252, "grad_norm": 0.5136804201596162, "learning_rate": 2.138361719383617e-05, "loss": 0.5896, "step": 21068 }, { "epoch": 0.6151353245160726, "grad_norm": 0.4878904023183946, "learning_rate": 2.1381995133819953e-05, "loss": 0.5858, "step": 21069 }, { "epoch": 0.6151645207439199, "grad_norm": 0.5287314726361934, "learning_rate": 2.138037307380373e-05, "loss": 0.561, "step": 21070 }, { "epoch": 0.6151937169717673, "grad_norm": 0.5133997237620397, "learning_rate": 2.1378751013787513e-05, "loss": 0.5785, "step": 21071 }, { "epoch": 0.6152229131996146, "grad_norm": 0.5517599419585248, "learning_rate": 2.137712895377129e-05, "loss": 0.6365, "step": 21072 }, { "epoch": 0.615252109427462, "grad_norm": 0.5042114972360667, "learning_rate": 2.137550689375507e-05, "loss": 0.6192, "step": 21073 }, { "epoch": 0.6152813056553094, "grad_norm": 0.5457691244516095, "learning_rate": 2.1373884833738848e-05, "loss": 0.6375, "step": 21074 }, { "epoch": 0.6153105018831567, "grad_norm": 0.5400469303005193, "learning_rate": 2.137226277372263e-05, "loss": 0.628, "step": 21075 }, { "epoch": 0.6153396981110041, "grad_norm": 0.5319880340206459, "learning_rate": 2.137064071370641e-05, "loss": 0.5667, "step": 21076 }, { "epoch": 0.6153688943388514, "grad_norm": 0.5207813365209915, "learning_rate": 2.1369018653690187e-05, "loss": 0.6181, "step": 21077 }, { "epoch": 0.6153980905666988, "grad_norm": 0.49698024409343805, "learning_rate": 2.1367396593673965e-05, "loss": 0.5818, "step": 21078 }, { "epoch": 0.6154272867945462, "grad_norm": 0.5247532783483569, "learning_rate": 2.1365774533657747e-05, "loss": 0.6074, "step": 21079 }, { "epoch": 0.6154564830223935, "grad_norm": 0.516937042342933, "learning_rate": 2.1364152473641525e-05, "loss": 0.5879, "step": 21080 }, { "epoch": 0.6154856792502409, "grad_norm": 0.5743481562784828, "learning_rate": 2.1362530413625307e-05, "loss": 0.6998, "step": 21081 }, { "epoch": 0.6155148754780883, "grad_norm": 0.5262174157192991, "learning_rate": 2.1360908353609085e-05, "loss": 0.6506, "step": 21082 }, { "epoch": 0.6155440717059356, "grad_norm": 0.5411619585479348, "learning_rate": 2.1359286293592864e-05, "loss": 0.6419, "step": 21083 }, { "epoch": 0.615573267933783, "grad_norm": 0.5405309980260615, "learning_rate": 2.1357664233576642e-05, "loss": 0.6618, "step": 21084 }, { "epoch": 0.6156024641616303, "grad_norm": 0.500513831801919, "learning_rate": 2.1356042173560424e-05, "loss": 0.5882, "step": 21085 }, { "epoch": 0.6156316603894777, "grad_norm": 0.5853986776531703, "learning_rate": 2.1354420113544202e-05, "loss": 0.7203, "step": 21086 }, { "epoch": 0.6156608566173251, "grad_norm": 0.5261089656142827, "learning_rate": 2.135279805352798e-05, "loss": 0.5989, "step": 21087 }, { "epoch": 0.6156900528451724, "grad_norm": 0.5233322330503346, "learning_rate": 2.135117599351176e-05, "loss": 0.6155, "step": 21088 }, { "epoch": 0.6157192490730198, "grad_norm": 0.4986624582690539, "learning_rate": 2.134955393349554e-05, "loss": 0.5582, "step": 21089 }, { "epoch": 0.6157484453008671, "grad_norm": 0.49698254388237756, "learning_rate": 2.1347931873479322e-05, "loss": 0.5583, "step": 21090 }, { "epoch": 0.6157776415287145, "grad_norm": 0.5104247391464012, "learning_rate": 2.13463098134631e-05, "loss": 0.5657, "step": 21091 }, { "epoch": 0.6158068377565619, "grad_norm": 0.5042283896513532, "learning_rate": 2.134468775344688e-05, "loss": 0.567, "step": 21092 }, { "epoch": 0.6158360339844092, "grad_norm": 0.5384679160962765, "learning_rate": 2.1343065693430658e-05, "loss": 0.6253, "step": 21093 }, { "epoch": 0.6158652302122566, "grad_norm": 0.5445924694934793, "learning_rate": 2.1341443633414436e-05, "loss": 0.5667, "step": 21094 }, { "epoch": 0.615894426440104, "grad_norm": 0.5691382035899066, "learning_rate": 2.1339821573398218e-05, "loss": 0.6821, "step": 21095 }, { "epoch": 0.6159236226679513, "grad_norm": 0.5175203090608683, "learning_rate": 2.1338199513381996e-05, "loss": 0.5633, "step": 21096 }, { "epoch": 0.6159528188957987, "grad_norm": 0.540109348271549, "learning_rate": 2.1336577453365774e-05, "loss": 0.6142, "step": 21097 }, { "epoch": 0.615982015123646, "grad_norm": 0.5585721532943397, "learning_rate": 2.1334955393349553e-05, "loss": 0.6921, "step": 21098 }, { "epoch": 0.6160112113514934, "grad_norm": 0.526600251665631, "learning_rate": 2.1333333333333335e-05, "loss": 0.5837, "step": 21099 }, { "epoch": 0.6160404075793408, "grad_norm": 0.4815767559094484, "learning_rate": 2.1331711273317116e-05, "loss": 0.5036, "step": 21100 }, { "epoch": 0.6160696038071881, "grad_norm": 0.5280491364179503, "learning_rate": 2.1330089213300895e-05, "loss": 0.6782, "step": 21101 }, { "epoch": 0.6160988000350355, "grad_norm": 0.5267588695377602, "learning_rate": 2.1328467153284673e-05, "loss": 0.6234, "step": 21102 }, { "epoch": 0.6161279962628828, "grad_norm": 0.6073193988920962, "learning_rate": 2.132684509326845e-05, "loss": 0.6563, "step": 21103 }, { "epoch": 0.6161571924907302, "grad_norm": 0.46403424537405064, "learning_rate": 2.132522303325223e-05, "loss": 0.4997, "step": 21104 }, { "epoch": 0.6161863887185776, "grad_norm": 0.5543772879980325, "learning_rate": 2.132360097323601e-05, "loss": 0.6825, "step": 21105 }, { "epoch": 0.6162155849464249, "grad_norm": 0.5867630084665477, "learning_rate": 2.132197891321979e-05, "loss": 0.6374, "step": 21106 }, { "epoch": 0.6162447811742723, "grad_norm": 0.4897466496863292, "learning_rate": 2.132035685320357e-05, "loss": 0.5695, "step": 21107 }, { "epoch": 0.6162739774021196, "grad_norm": 0.5191146580508896, "learning_rate": 2.1318734793187347e-05, "loss": 0.5787, "step": 21108 }, { "epoch": 0.616303173629967, "grad_norm": 0.5465714089022844, "learning_rate": 2.131711273317113e-05, "loss": 0.6117, "step": 21109 }, { "epoch": 0.6163323698578144, "grad_norm": 0.5547003746437232, "learning_rate": 2.131549067315491e-05, "loss": 0.6601, "step": 21110 }, { "epoch": 0.6163615660856617, "grad_norm": 0.5095797647202184, "learning_rate": 2.131386861313869e-05, "loss": 0.5672, "step": 21111 }, { "epoch": 0.6163907623135091, "grad_norm": 0.5040770316045672, "learning_rate": 2.1312246553122467e-05, "loss": 0.5902, "step": 21112 }, { "epoch": 0.6164199585413564, "grad_norm": 0.5078181640269603, "learning_rate": 2.1310624493106245e-05, "loss": 0.6003, "step": 21113 }, { "epoch": 0.6164491547692038, "grad_norm": 0.5281734853037885, "learning_rate": 2.1309002433090024e-05, "loss": 0.6519, "step": 21114 }, { "epoch": 0.6164783509970512, "grad_norm": 0.537174128217506, "learning_rate": 2.1307380373073806e-05, "loss": 0.6267, "step": 21115 }, { "epoch": 0.6165075472248985, "grad_norm": 0.5278155370632348, "learning_rate": 2.1305758313057584e-05, "loss": 0.628, "step": 21116 }, { "epoch": 0.6165367434527459, "grad_norm": 0.5245561259417314, "learning_rate": 2.1304136253041362e-05, "loss": 0.6498, "step": 21117 }, { "epoch": 0.6165659396805933, "grad_norm": 0.49559683191991466, "learning_rate": 2.1302514193025144e-05, "loss": 0.5518, "step": 21118 }, { "epoch": 0.6165951359084406, "grad_norm": 0.5123207770668703, "learning_rate": 2.1300892133008922e-05, "loss": 0.5723, "step": 21119 }, { "epoch": 0.616624332136288, "grad_norm": 0.5332527773047607, "learning_rate": 2.12992700729927e-05, "loss": 0.6067, "step": 21120 }, { "epoch": 0.6166535283641353, "grad_norm": 0.481957437035484, "learning_rate": 2.1297648012976483e-05, "loss": 0.5401, "step": 21121 }, { "epoch": 0.6166827245919827, "grad_norm": 0.5219137519206948, "learning_rate": 2.129602595296026e-05, "loss": 0.5696, "step": 21122 }, { "epoch": 0.6167119208198301, "grad_norm": 0.517585127523878, "learning_rate": 2.129440389294404e-05, "loss": 0.6048, "step": 21123 }, { "epoch": 0.6167411170476774, "grad_norm": 0.5120746820651203, "learning_rate": 2.1292781832927818e-05, "loss": 0.5705, "step": 21124 }, { "epoch": 0.6167703132755248, "grad_norm": 0.5028640828167716, "learning_rate": 2.12911597729116e-05, "loss": 0.5629, "step": 21125 }, { "epoch": 0.6167995095033721, "grad_norm": 0.5282344181425357, "learning_rate": 2.1289537712895378e-05, "loss": 0.6172, "step": 21126 }, { "epoch": 0.6168287057312195, "grad_norm": 0.5627283231966008, "learning_rate": 2.1287915652879156e-05, "loss": 0.6269, "step": 21127 }, { "epoch": 0.6168579019590669, "grad_norm": 0.5415554530466818, "learning_rate": 2.1286293592862938e-05, "loss": 0.6058, "step": 21128 }, { "epoch": 0.6168870981869142, "grad_norm": 0.49383130698783584, "learning_rate": 2.1284671532846716e-05, "loss": 0.5619, "step": 21129 }, { "epoch": 0.6169162944147616, "grad_norm": 0.4934911506854212, "learning_rate": 2.1283049472830495e-05, "loss": 0.55, "step": 21130 }, { "epoch": 0.616945490642609, "grad_norm": 0.5155651918218189, "learning_rate": 2.1281427412814276e-05, "loss": 0.6058, "step": 21131 }, { "epoch": 0.6169746868704563, "grad_norm": 0.5416476786637957, "learning_rate": 2.1279805352798055e-05, "loss": 0.6232, "step": 21132 }, { "epoch": 0.6170038830983037, "grad_norm": 0.4825048105704614, "learning_rate": 2.1278183292781833e-05, "loss": 0.5425, "step": 21133 }, { "epoch": 0.617033079326151, "grad_norm": 0.531955746900939, "learning_rate": 2.127656123276561e-05, "loss": 0.6609, "step": 21134 }, { "epoch": 0.6170622755539984, "grad_norm": 0.492872556041426, "learning_rate": 2.1274939172749393e-05, "loss": 0.5521, "step": 21135 }, { "epoch": 0.6170914717818458, "grad_norm": 0.49022378660775223, "learning_rate": 2.1273317112733172e-05, "loss": 0.56, "step": 21136 }, { "epoch": 0.6171206680096931, "grad_norm": 0.5205155045613944, "learning_rate": 2.1271695052716953e-05, "loss": 0.6442, "step": 21137 }, { "epoch": 0.6171498642375405, "grad_norm": 0.5196731254350171, "learning_rate": 2.1270072992700732e-05, "loss": 0.5896, "step": 21138 }, { "epoch": 0.6171790604653878, "grad_norm": 0.5420051582909976, "learning_rate": 2.126845093268451e-05, "loss": 0.6297, "step": 21139 }, { "epoch": 0.6172082566932352, "grad_norm": 0.5152610784101823, "learning_rate": 2.126682887266829e-05, "loss": 0.5946, "step": 21140 }, { "epoch": 0.6172374529210826, "grad_norm": 0.5483232031651283, "learning_rate": 2.126520681265207e-05, "loss": 0.6362, "step": 21141 }, { "epoch": 0.6172666491489299, "grad_norm": 0.5050605239520055, "learning_rate": 2.126358475263585e-05, "loss": 0.5966, "step": 21142 }, { "epoch": 0.6172958453767773, "grad_norm": 0.5438657632053202, "learning_rate": 2.1261962692619627e-05, "loss": 0.6812, "step": 21143 }, { "epoch": 0.6173250416046246, "grad_norm": 0.5053826825758002, "learning_rate": 2.1260340632603405e-05, "loss": 0.5555, "step": 21144 }, { "epoch": 0.617354237832472, "grad_norm": 0.4695390854369261, "learning_rate": 2.1258718572587187e-05, "loss": 0.5324, "step": 21145 }, { "epoch": 0.6173834340603194, "grad_norm": 0.5356678314590245, "learning_rate": 2.1257096512570966e-05, "loss": 0.6489, "step": 21146 }, { "epoch": 0.6174126302881667, "grad_norm": 0.5212444324592966, "learning_rate": 2.1255474452554747e-05, "loss": 0.6352, "step": 21147 }, { "epoch": 0.6174418265160141, "grad_norm": 0.5035089053515247, "learning_rate": 2.1253852392538526e-05, "loss": 0.5863, "step": 21148 }, { "epoch": 0.6174710227438615, "grad_norm": 0.5065761532125285, "learning_rate": 2.1252230332522304e-05, "loss": 0.6052, "step": 21149 }, { "epoch": 0.6175002189717088, "grad_norm": 0.5311245676808038, "learning_rate": 2.1250608272506083e-05, "loss": 0.6162, "step": 21150 }, { "epoch": 0.6175294151995562, "grad_norm": 0.5639516733958578, "learning_rate": 2.1248986212489864e-05, "loss": 0.6625, "step": 21151 }, { "epoch": 0.6175586114274035, "grad_norm": 0.5134371978724677, "learning_rate": 2.1247364152473643e-05, "loss": 0.5912, "step": 21152 }, { "epoch": 0.6175878076552509, "grad_norm": 0.5218456438586409, "learning_rate": 2.124574209245742e-05, "loss": 0.63, "step": 21153 }, { "epoch": 0.6176170038830983, "grad_norm": 0.485204346009625, "learning_rate": 2.12441200324412e-05, "loss": 0.5658, "step": 21154 }, { "epoch": 0.6176462001109456, "grad_norm": 0.5319639078595211, "learning_rate": 2.124249797242498e-05, "loss": 0.6167, "step": 21155 }, { "epoch": 0.617675396338793, "grad_norm": 0.49848171599475244, "learning_rate": 2.1240875912408763e-05, "loss": 0.5539, "step": 21156 }, { "epoch": 0.6177045925666403, "grad_norm": 0.5340865448343589, "learning_rate": 2.123925385239254e-05, "loss": 0.578, "step": 21157 }, { "epoch": 0.6177337887944877, "grad_norm": 0.503150674426847, "learning_rate": 2.123763179237632e-05, "loss": 0.5585, "step": 21158 }, { "epoch": 0.6177629850223351, "grad_norm": 0.5230963190332, "learning_rate": 2.1236009732360098e-05, "loss": 0.6032, "step": 21159 }, { "epoch": 0.6177921812501824, "grad_norm": 0.5161185884061235, "learning_rate": 2.1234387672343876e-05, "loss": 0.6147, "step": 21160 }, { "epoch": 0.6178213774780298, "grad_norm": 0.46600547067856174, "learning_rate": 2.1232765612327658e-05, "loss": 0.5166, "step": 21161 }, { "epoch": 0.6178505737058771, "grad_norm": 0.5517295723596422, "learning_rate": 2.1231143552311437e-05, "loss": 0.6561, "step": 21162 }, { "epoch": 0.6178797699337245, "grad_norm": 0.5230653169769469, "learning_rate": 2.1229521492295215e-05, "loss": 0.5529, "step": 21163 }, { "epoch": 0.6179089661615719, "grad_norm": 0.49651982179569126, "learning_rate": 2.1227899432278993e-05, "loss": 0.5704, "step": 21164 }, { "epoch": 0.6179381623894192, "grad_norm": 0.48550317897522915, "learning_rate": 2.122627737226277e-05, "loss": 0.5301, "step": 21165 }, { "epoch": 0.6179673586172667, "grad_norm": 0.564774167432797, "learning_rate": 2.1224655312246557e-05, "loss": 0.6866, "step": 21166 }, { "epoch": 0.6179965548451141, "grad_norm": 0.5159765728321705, "learning_rate": 2.1223033252230335e-05, "loss": 0.5753, "step": 21167 }, { "epoch": 0.6180257510729614, "grad_norm": 0.5537165382451823, "learning_rate": 2.1221411192214114e-05, "loss": 0.6611, "step": 21168 }, { "epoch": 0.6180549473008088, "grad_norm": 0.5173052244578065, "learning_rate": 2.1219789132197892e-05, "loss": 0.5859, "step": 21169 }, { "epoch": 0.6180841435286561, "grad_norm": 0.5010748126367625, "learning_rate": 2.121816707218167e-05, "loss": 0.5875, "step": 21170 }, { "epoch": 0.6181133397565035, "grad_norm": 0.5276216126098919, "learning_rate": 2.1216545012165452e-05, "loss": 0.6293, "step": 21171 }, { "epoch": 0.6181425359843509, "grad_norm": 0.5464932725044372, "learning_rate": 2.121492295214923e-05, "loss": 0.5564, "step": 21172 }, { "epoch": 0.6181717322121982, "grad_norm": 0.5599840786639102, "learning_rate": 2.121330089213301e-05, "loss": 0.6165, "step": 21173 }, { "epoch": 0.6182009284400456, "grad_norm": 0.48983075984563823, "learning_rate": 2.1211678832116787e-05, "loss": 0.5272, "step": 21174 }, { "epoch": 0.618230124667893, "grad_norm": 0.5119450829644501, "learning_rate": 2.121005677210057e-05, "loss": 0.5945, "step": 21175 }, { "epoch": 0.6182593208957403, "grad_norm": 0.5080206877501119, "learning_rate": 2.120843471208435e-05, "loss": 0.5314, "step": 21176 }, { "epoch": 0.6182885171235877, "grad_norm": 0.5336128177428657, "learning_rate": 2.120681265206813e-05, "loss": 0.6062, "step": 21177 }, { "epoch": 0.618317713351435, "grad_norm": 0.5232703287037236, "learning_rate": 2.1205190592051907e-05, "loss": 0.591, "step": 21178 }, { "epoch": 0.6183469095792824, "grad_norm": 0.48551311459545055, "learning_rate": 2.1203568532035686e-05, "loss": 0.5368, "step": 21179 }, { "epoch": 0.6183761058071298, "grad_norm": 0.5552236969085705, "learning_rate": 2.1201946472019464e-05, "loss": 0.6514, "step": 21180 }, { "epoch": 0.6184053020349771, "grad_norm": 0.5164079750992873, "learning_rate": 2.1200324412003246e-05, "loss": 0.6093, "step": 21181 }, { "epoch": 0.6184344982628245, "grad_norm": 0.5033410790191725, "learning_rate": 2.1198702351987024e-05, "loss": 0.545, "step": 21182 }, { "epoch": 0.6184636944906718, "grad_norm": 0.5036289040179371, "learning_rate": 2.1197080291970803e-05, "loss": 0.5731, "step": 21183 }, { "epoch": 0.6184928907185192, "grad_norm": 0.5422105558486272, "learning_rate": 2.1195458231954585e-05, "loss": 0.6094, "step": 21184 }, { "epoch": 0.6185220869463666, "grad_norm": 0.48973584030497913, "learning_rate": 2.1193836171938363e-05, "loss": 0.5516, "step": 21185 }, { "epoch": 0.6185512831742139, "grad_norm": 0.5660532403355596, "learning_rate": 2.119221411192214e-05, "loss": 0.6967, "step": 21186 }, { "epoch": 0.6185804794020613, "grad_norm": 0.5248315001044039, "learning_rate": 2.1190592051905923e-05, "loss": 0.5585, "step": 21187 }, { "epoch": 0.6186096756299087, "grad_norm": 0.582554928641834, "learning_rate": 2.11889699918897e-05, "loss": 0.672, "step": 21188 }, { "epoch": 0.618638871857756, "grad_norm": 0.5108771458202992, "learning_rate": 2.118734793187348e-05, "loss": 0.5482, "step": 21189 }, { "epoch": 0.6186680680856034, "grad_norm": 0.510805956940933, "learning_rate": 2.1185725871857258e-05, "loss": 0.5585, "step": 21190 }, { "epoch": 0.6186972643134507, "grad_norm": 0.5064452667640146, "learning_rate": 2.118410381184104e-05, "loss": 0.5696, "step": 21191 }, { "epoch": 0.6187264605412981, "grad_norm": 0.5327532471276006, "learning_rate": 2.1182481751824818e-05, "loss": 0.5773, "step": 21192 }, { "epoch": 0.6187556567691455, "grad_norm": 0.5227073936592104, "learning_rate": 2.1180859691808597e-05, "loss": 0.6001, "step": 21193 }, { "epoch": 0.6187848529969928, "grad_norm": 0.5174178134721159, "learning_rate": 2.117923763179238e-05, "loss": 0.5945, "step": 21194 }, { "epoch": 0.6188140492248402, "grad_norm": 0.5321537295179513, "learning_rate": 2.1177615571776157e-05, "loss": 0.6481, "step": 21195 }, { "epoch": 0.6188432454526875, "grad_norm": 0.5415939988167572, "learning_rate": 2.1175993511759935e-05, "loss": 0.6622, "step": 21196 }, { "epoch": 0.6188724416805349, "grad_norm": 0.5310532783866566, "learning_rate": 2.1174371451743717e-05, "loss": 0.6324, "step": 21197 }, { "epoch": 0.6189016379083823, "grad_norm": 0.492193611174194, "learning_rate": 2.1172749391727495e-05, "loss": 0.5339, "step": 21198 }, { "epoch": 0.6189308341362296, "grad_norm": 0.5107128650643199, "learning_rate": 2.1171127331711274e-05, "loss": 0.5833, "step": 21199 }, { "epoch": 0.618960030364077, "grad_norm": 0.5534455116910937, "learning_rate": 2.1169505271695052e-05, "loss": 0.643, "step": 21200 }, { "epoch": 0.6189892265919243, "grad_norm": 0.5510479086036072, "learning_rate": 2.1167883211678834e-05, "loss": 0.6587, "step": 21201 }, { "epoch": 0.6190184228197717, "grad_norm": 0.5220226080464457, "learning_rate": 2.1166261151662612e-05, "loss": 0.5487, "step": 21202 }, { "epoch": 0.6190476190476191, "grad_norm": 0.5256844702034632, "learning_rate": 2.1164639091646394e-05, "loss": 0.5993, "step": 21203 }, { "epoch": 0.6190768152754664, "grad_norm": 0.5538481857789198, "learning_rate": 2.1163017031630172e-05, "loss": 0.7088, "step": 21204 }, { "epoch": 0.6191060115033138, "grad_norm": 0.49241604560560426, "learning_rate": 2.116139497161395e-05, "loss": 0.5067, "step": 21205 }, { "epoch": 0.6191352077311612, "grad_norm": 0.4898271125959642, "learning_rate": 2.115977291159773e-05, "loss": 0.5605, "step": 21206 }, { "epoch": 0.6191644039590085, "grad_norm": 0.5319923560239769, "learning_rate": 2.115815085158151e-05, "loss": 0.5886, "step": 21207 }, { "epoch": 0.6191936001868559, "grad_norm": 0.5373087139192337, "learning_rate": 2.115652879156529e-05, "loss": 0.6374, "step": 21208 }, { "epoch": 0.6192227964147032, "grad_norm": 0.5009426922355151, "learning_rate": 2.1154906731549068e-05, "loss": 0.5664, "step": 21209 }, { "epoch": 0.6192519926425506, "grad_norm": 0.494716995747779, "learning_rate": 2.1153284671532846e-05, "loss": 0.5712, "step": 21210 }, { "epoch": 0.619281188870398, "grad_norm": 0.5116009987082818, "learning_rate": 2.1151662611516628e-05, "loss": 0.5803, "step": 21211 }, { "epoch": 0.6193103850982453, "grad_norm": 0.4898679479147623, "learning_rate": 2.1150040551500406e-05, "loss": 0.5356, "step": 21212 }, { "epoch": 0.6193395813260927, "grad_norm": 0.5205553354233722, "learning_rate": 2.1148418491484188e-05, "loss": 0.6037, "step": 21213 }, { "epoch": 0.61936877755394, "grad_norm": 0.5083550595157538, "learning_rate": 2.1146796431467966e-05, "loss": 0.5795, "step": 21214 }, { "epoch": 0.6193979737817874, "grad_norm": 0.49258142663945353, "learning_rate": 2.1145174371451745e-05, "loss": 0.5509, "step": 21215 }, { "epoch": 0.6194271700096348, "grad_norm": 0.5008780517915916, "learning_rate": 2.1143552311435523e-05, "loss": 0.5756, "step": 21216 }, { "epoch": 0.6194563662374821, "grad_norm": 0.5289702908835903, "learning_rate": 2.1141930251419305e-05, "loss": 0.6127, "step": 21217 }, { "epoch": 0.6194855624653295, "grad_norm": 0.5187978960257452, "learning_rate": 2.1140308191403083e-05, "loss": 0.6275, "step": 21218 }, { "epoch": 0.6195147586931768, "grad_norm": 0.5347817568130278, "learning_rate": 2.113868613138686e-05, "loss": 0.6165, "step": 21219 }, { "epoch": 0.6195439549210242, "grad_norm": 0.5197521673283952, "learning_rate": 2.113706407137064e-05, "loss": 0.6005, "step": 21220 }, { "epoch": 0.6195731511488716, "grad_norm": 0.511712401751197, "learning_rate": 2.113544201135442e-05, "loss": 0.5511, "step": 21221 }, { "epoch": 0.6196023473767189, "grad_norm": 0.5190914220618128, "learning_rate": 2.1133819951338203e-05, "loss": 0.6199, "step": 21222 }, { "epoch": 0.6196315436045663, "grad_norm": 0.5527891625516292, "learning_rate": 2.1132197891321982e-05, "loss": 0.6668, "step": 21223 }, { "epoch": 0.6196607398324137, "grad_norm": 0.501257998024753, "learning_rate": 2.113057583130576e-05, "loss": 0.5771, "step": 21224 }, { "epoch": 0.619689936060261, "grad_norm": 0.5070493638701387, "learning_rate": 2.112895377128954e-05, "loss": 0.5947, "step": 21225 }, { "epoch": 0.6197191322881084, "grad_norm": 0.5340852874155632, "learning_rate": 2.1127331711273317e-05, "loss": 0.5977, "step": 21226 }, { "epoch": 0.6197483285159557, "grad_norm": 0.5540928268522641, "learning_rate": 2.11257096512571e-05, "loss": 0.6299, "step": 21227 }, { "epoch": 0.6197775247438031, "grad_norm": 0.49280532716678616, "learning_rate": 2.1124087591240877e-05, "loss": 0.5951, "step": 21228 }, { "epoch": 0.6198067209716505, "grad_norm": 0.5247618857892545, "learning_rate": 2.1122465531224655e-05, "loss": 0.6167, "step": 21229 }, { "epoch": 0.6198359171994978, "grad_norm": 0.49868133330241154, "learning_rate": 2.1120843471208434e-05, "loss": 0.5773, "step": 21230 }, { "epoch": 0.6198651134273452, "grad_norm": 0.5036629230349835, "learning_rate": 2.1119221411192212e-05, "loss": 0.6037, "step": 21231 }, { "epoch": 0.6198943096551925, "grad_norm": 0.5360329186870935, "learning_rate": 2.1117599351175997e-05, "loss": 0.6166, "step": 21232 }, { "epoch": 0.6199235058830399, "grad_norm": 0.5003317156917844, "learning_rate": 2.1115977291159776e-05, "loss": 0.5385, "step": 21233 }, { "epoch": 0.6199527021108873, "grad_norm": 0.5174784550715812, "learning_rate": 2.1114355231143554e-05, "loss": 0.5644, "step": 21234 }, { "epoch": 0.6199818983387346, "grad_norm": 0.5247594084762931, "learning_rate": 2.1112733171127332e-05, "loss": 0.5993, "step": 21235 }, { "epoch": 0.620011094566582, "grad_norm": 0.51540208366036, "learning_rate": 2.111111111111111e-05, "loss": 0.5765, "step": 21236 }, { "epoch": 0.6200402907944293, "grad_norm": 0.4825867469601576, "learning_rate": 2.1109489051094893e-05, "loss": 0.5514, "step": 21237 }, { "epoch": 0.6200694870222767, "grad_norm": 0.5222558767331918, "learning_rate": 2.110786699107867e-05, "loss": 0.6281, "step": 21238 }, { "epoch": 0.6200986832501241, "grad_norm": 0.5012977487077502, "learning_rate": 2.110624493106245e-05, "loss": 0.597, "step": 21239 }, { "epoch": 0.6201278794779714, "grad_norm": 0.5204596930475458, "learning_rate": 2.1104622871046228e-05, "loss": 0.6047, "step": 21240 }, { "epoch": 0.6201570757058188, "grad_norm": 0.513654266156874, "learning_rate": 2.110300081103001e-05, "loss": 0.5821, "step": 21241 }, { "epoch": 0.6201862719336662, "grad_norm": 0.503981360747585, "learning_rate": 2.1101378751013788e-05, "loss": 0.5938, "step": 21242 }, { "epoch": 0.6202154681615135, "grad_norm": 0.5173527638917197, "learning_rate": 2.109975669099757e-05, "loss": 0.5954, "step": 21243 }, { "epoch": 0.6202446643893609, "grad_norm": 0.4931201031040371, "learning_rate": 2.1098134630981348e-05, "loss": 0.554, "step": 21244 }, { "epoch": 0.6202738606172082, "grad_norm": 0.566490106132004, "learning_rate": 2.1096512570965126e-05, "loss": 0.7129, "step": 21245 }, { "epoch": 0.6203030568450556, "grad_norm": 0.4943553669084958, "learning_rate": 2.1094890510948905e-05, "loss": 0.5364, "step": 21246 }, { "epoch": 0.620332253072903, "grad_norm": 0.4936728218543681, "learning_rate": 2.1093268450932686e-05, "loss": 0.5823, "step": 21247 }, { "epoch": 0.6203614493007503, "grad_norm": 0.6050202648890088, "learning_rate": 2.1091646390916465e-05, "loss": 0.6993, "step": 21248 }, { "epoch": 0.6203906455285977, "grad_norm": 0.5273049597796912, "learning_rate": 2.1090024330900243e-05, "loss": 0.5671, "step": 21249 }, { "epoch": 0.620419841756445, "grad_norm": 0.5268872286062065, "learning_rate": 2.1088402270884025e-05, "loss": 0.6018, "step": 21250 }, { "epoch": 0.6204490379842924, "grad_norm": 0.49849557647415976, "learning_rate": 2.1086780210867803e-05, "loss": 0.5774, "step": 21251 }, { "epoch": 0.6204782342121398, "grad_norm": 0.4861726141543019, "learning_rate": 2.108515815085158e-05, "loss": 0.5315, "step": 21252 }, { "epoch": 0.6205074304399871, "grad_norm": 0.5073425657985937, "learning_rate": 2.1083536090835363e-05, "loss": 0.5737, "step": 21253 }, { "epoch": 0.6205366266678345, "grad_norm": 0.5234237562312427, "learning_rate": 2.1081914030819142e-05, "loss": 0.5632, "step": 21254 }, { "epoch": 0.6205658228956819, "grad_norm": 0.5453398416014957, "learning_rate": 2.108029197080292e-05, "loss": 0.6333, "step": 21255 }, { "epoch": 0.6205950191235292, "grad_norm": 0.5022494451655596, "learning_rate": 2.10786699107867e-05, "loss": 0.5834, "step": 21256 }, { "epoch": 0.6206242153513766, "grad_norm": 0.5657873544721839, "learning_rate": 2.107704785077048e-05, "loss": 0.619, "step": 21257 }, { "epoch": 0.6206534115792239, "grad_norm": 0.499652995825311, "learning_rate": 2.107542579075426e-05, "loss": 0.5867, "step": 21258 }, { "epoch": 0.6206826078070713, "grad_norm": 0.5245620842630959, "learning_rate": 2.1073803730738037e-05, "loss": 0.6045, "step": 21259 }, { "epoch": 0.6207118040349187, "grad_norm": 0.5461246938199764, "learning_rate": 2.107218167072182e-05, "loss": 0.6374, "step": 21260 }, { "epoch": 0.620741000262766, "grad_norm": 0.5073198849465781, "learning_rate": 2.1070559610705597e-05, "loss": 0.5533, "step": 21261 }, { "epoch": 0.6207701964906134, "grad_norm": 0.5272170722612588, "learning_rate": 2.1068937550689376e-05, "loss": 0.5717, "step": 21262 }, { "epoch": 0.6207993927184607, "grad_norm": 0.5222882055985884, "learning_rate": 2.1067315490673157e-05, "loss": 0.6128, "step": 21263 }, { "epoch": 0.6208285889463081, "grad_norm": 0.5712576181696793, "learning_rate": 2.1065693430656936e-05, "loss": 0.7341, "step": 21264 }, { "epoch": 0.6208577851741555, "grad_norm": 0.517261403655534, "learning_rate": 2.1064071370640714e-05, "loss": 0.59, "step": 21265 }, { "epoch": 0.6208869814020028, "grad_norm": 0.5366947710337704, "learning_rate": 2.1062449310624492e-05, "loss": 0.6325, "step": 21266 }, { "epoch": 0.6209161776298502, "grad_norm": 0.5691399882690898, "learning_rate": 2.1060827250608274e-05, "loss": 0.7249, "step": 21267 }, { "epoch": 0.6209453738576975, "grad_norm": 0.5166841923723517, "learning_rate": 2.1059205190592053e-05, "loss": 0.611, "step": 21268 }, { "epoch": 0.6209745700855449, "grad_norm": 0.5003073824571382, "learning_rate": 2.1057583130575834e-05, "loss": 0.5763, "step": 21269 }, { "epoch": 0.6210037663133923, "grad_norm": 0.5523034253382566, "learning_rate": 2.1055961070559613e-05, "loss": 0.6596, "step": 21270 }, { "epoch": 0.6210329625412396, "grad_norm": 0.5288750622829282, "learning_rate": 2.105433901054339e-05, "loss": 0.5897, "step": 21271 }, { "epoch": 0.621062158769087, "grad_norm": 0.5369689651049916, "learning_rate": 2.105271695052717e-05, "loss": 0.6185, "step": 21272 }, { "epoch": 0.6210913549969344, "grad_norm": 0.5072676023657774, "learning_rate": 2.105109489051095e-05, "loss": 0.5504, "step": 21273 }, { "epoch": 0.6211205512247817, "grad_norm": 0.4968097980946634, "learning_rate": 2.104947283049473e-05, "loss": 0.5479, "step": 21274 }, { "epoch": 0.6211497474526291, "grad_norm": 0.5301893419521416, "learning_rate": 2.1047850770478508e-05, "loss": 0.6098, "step": 21275 }, { "epoch": 0.6211789436804764, "grad_norm": 0.505480166871441, "learning_rate": 2.1046228710462286e-05, "loss": 0.5455, "step": 21276 }, { "epoch": 0.6212081399083238, "grad_norm": 0.5165699914853565, "learning_rate": 2.1044606650446068e-05, "loss": 0.6008, "step": 21277 }, { "epoch": 0.6212373361361712, "grad_norm": 0.552642696069416, "learning_rate": 2.1042984590429847e-05, "loss": 0.6451, "step": 21278 }, { "epoch": 0.6212665323640185, "grad_norm": 0.5378875563627422, "learning_rate": 2.1041362530413628e-05, "loss": 0.6422, "step": 21279 }, { "epoch": 0.6212957285918659, "grad_norm": 0.5679721005942776, "learning_rate": 2.1039740470397407e-05, "loss": 0.7413, "step": 21280 }, { "epoch": 0.6213249248197132, "grad_norm": 0.5517490191930198, "learning_rate": 2.1038118410381185e-05, "loss": 0.6806, "step": 21281 }, { "epoch": 0.6213541210475606, "grad_norm": 0.5069500353962223, "learning_rate": 2.1036496350364963e-05, "loss": 0.5741, "step": 21282 }, { "epoch": 0.621383317275408, "grad_norm": 0.48853702485803696, "learning_rate": 2.1034874290348745e-05, "loss": 0.5433, "step": 21283 }, { "epoch": 0.6214125135032553, "grad_norm": 0.49602918065204954, "learning_rate": 2.1033252230332524e-05, "loss": 0.5593, "step": 21284 }, { "epoch": 0.6214417097311027, "grad_norm": 0.4956409687327514, "learning_rate": 2.1031630170316302e-05, "loss": 0.5564, "step": 21285 }, { "epoch": 0.62147090595895, "grad_norm": 0.4812663615513293, "learning_rate": 2.103000811030008e-05, "loss": 0.5217, "step": 21286 }, { "epoch": 0.6215001021867975, "grad_norm": 0.529993570777597, "learning_rate": 2.102838605028386e-05, "loss": 0.5877, "step": 21287 }, { "epoch": 0.6215292984146449, "grad_norm": 0.5185134257183412, "learning_rate": 2.1026763990267644e-05, "loss": 0.5601, "step": 21288 }, { "epoch": 0.6215584946424922, "grad_norm": 0.5142542723810382, "learning_rate": 2.1025141930251422e-05, "loss": 0.6324, "step": 21289 }, { "epoch": 0.6215876908703396, "grad_norm": 0.4947938069924916, "learning_rate": 2.10235198702352e-05, "loss": 0.5027, "step": 21290 }, { "epoch": 0.621616887098187, "grad_norm": 0.511876886904314, "learning_rate": 2.102189781021898e-05, "loss": 0.5823, "step": 21291 }, { "epoch": 0.6216460833260343, "grad_norm": 0.5290245265507585, "learning_rate": 2.1020275750202757e-05, "loss": 0.6051, "step": 21292 }, { "epoch": 0.6216752795538817, "grad_norm": 0.5244578651970871, "learning_rate": 2.101865369018654e-05, "loss": 0.5743, "step": 21293 }, { "epoch": 0.621704475781729, "grad_norm": 0.5534374455955913, "learning_rate": 2.1017031630170317e-05, "loss": 0.6144, "step": 21294 }, { "epoch": 0.6217336720095764, "grad_norm": 0.5283192967526315, "learning_rate": 2.1015409570154096e-05, "loss": 0.611, "step": 21295 }, { "epoch": 0.6217628682374238, "grad_norm": 0.48318717249744225, "learning_rate": 2.1013787510137874e-05, "loss": 0.521, "step": 21296 }, { "epoch": 0.6217920644652711, "grad_norm": 0.5733129678577781, "learning_rate": 2.1012165450121653e-05, "loss": 0.6327, "step": 21297 }, { "epoch": 0.6218212606931185, "grad_norm": 0.4946396620165828, "learning_rate": 2.1010543390105438e-05, "loss": 0.5502, "step": 21298 }, { "epoch": 0.6218504569209659, "grad_norm": 0.5836591586793176, "learning_rate": 2.1008921330089216e-05, "loss": 0.7661, "step": 21299 }, { "epoch": 0.6218796531488132, "grad_norm": 0.4793787685217821, "learning_rate": 2.1007299270072994e-05, "loss": 0.5299, "step": 21300 }, { "epoch": 0.6219088493766606, "grad_norm": 0.4854023615644657, "learning_rate": 2.1005677210056773e-05, "loss": 0.5239, "step": 21301 }, { "epoch": 0.6219380456045079, "grad_norm": 0.5190226780195215, "learning_rate": 2.100405515004055e-05, "loss": 0.6295, "step": 21302 }, { "epoch": 0.6219672418323553, "grad_norm": 0.48401258918504547, "learning_rate": 2.1002433090024333e-05, "loss": 0.5465, "step": 21303 }, { "epoch": 0.6219964380602027, "grad_norm": 0.523765510986472, "learning_rate": 2.100081103000811e-05, "loss": 0.5717, "step": 21304 }, { "epoch": 0.62202563428805, "grad_norm": 0.5304149990506503, "learning_rate": 2.099918896999189e-05, "loss": 0.5695, "step": 21305 }, { "epoch": 0.6220548305158974, "grad_norm": 0.4912791623886854, "learning_rate": 2.0997566909975668e-05, "loss": 0.5793, "step": 21306 }, { "epoch": 0.6220840267437447, "grad_norm": 0.4959879584736292, "learning_rate": 2.099594484995945e-05, "loss": 0.5556, "step": 21307 }, { "epoch": 0.6221132229715921, "grad_norm": 0.5201329345649872, "learning_rate": 2.0994322789943228e-05, "loss": 0.5984, "step": 21308 }, { "epoch": 0.6221424191994395, "grad_norm": 0.5494297259322418, "learning_rate": 2.099270072992701e-05, "loss": 0.5623, "step": 21309 }, { "epoch": 0.6221716154272868, "grad_norm": 0.6053136234112163, "learning_rate": 2.099107866991079e-05, "loss": 0.7502, "step": 21310 }, { "epoch": 0.6222008116551342, "grad_norm": 0.5314322911345638, "learning_rate": 2.0989456609894567e-05, "loss": 0.6473, "step": 21311 }, { "epoch": 0.6222300078829816, "grad_norm": 0.523196213466519, "learning_rate": 2.0987834549878345e-05, "loss": 0.6416, "step": 21312 }, { "epoch": 0.6222592041108289, "grad_norm": 0.5133778532666718, "learning_rate": 2.0986212489862127e-05, "loss": 0.5961, "step": 21313 }, { "epoch": 0.6222884003386763, "grad_norm": 0.5266269356787593, "learning_rate": 2.0984590429845905e-05, "loss": 0.5709, "step": 21314 }, { "epoch": 0.6223175965665236, "grad_norm": 0.5353618046997913, "learning_rate": 2.0982968369829684e-05, "loss": 0.6031, "step": 21315 }, { "epoch": 0.622346792794371, "grad_norm": 0.574835880195521, "learning_rate": 2.0981346309813462e-05, "loss": 0.6626, "step": 21316 }, { "epoch": 0.6223759890222184, "grad_norm": 0.4959943455812057, "learning_rate": 2.0979724249797244e-05, "loss": 0.5522, "step": 21317 }, { "epoch": 0.6224051852500657, "grad_norm": 0.5253839305015315, "learning_rate": 2.0978102189781022e-05, "loss": 0.6221, "step": 21318 }, { "epoch": 0.6224343814779131, "grad_norm": 0.5079290349205957, "learning_rate": 2.0976480129764804e-05, "loss": 0.5927, "step": 21319 }, { "epoch": 0.6224635777057604, "grad_norm": 0.5095203041441325, "learning_rate": 2.0974858069748582e-05, "loss": 0.61, "step": 21320 }, { "epoch": 0.6224927739336078, "grad_norm": 0.47574051775628096, "learning_rate": 2.097323600973236e-05, "loss": 0.509, "step": 21321 }, { "epoch": 0.6225219701614552, "grad_norm": 0.5198442080786224, "learning_rate": 2.097161394971614e-05, "loss": 0.5535, "step": 21322 }, { "epoch": 0.6225511663893025, "grad_norm": 0.5260399712603216, "learning_rate": 2.096999188969992e-05, "loss": 0.606, "step": 21323 }, { "epoch": 0.6225803626171499, "grad_norm": 0.5045841208334626, "learning_rate": 2.09683698296837e-05, "loss": 0.5504, "step": 21324 }, { "epoch": 0.6226095588449972, "grad_norm": 0.54949504990916, "learning_rate": 2.0966747769667478e-05, "loss": 0.6797, "step": 21325 }, { "epoch": 0.6226387550728446, "grad_norm": 0.49601155322097923, "learning_rate": 2.096512570965126e-05, "loss": 0.5669, "step": 21326 }, { "epoch": 0.622667951300692, "grad_norm": 0.5211613221759294, "learning_rate": 2.0963503649635038e-05, "loss": 0.6015, "step": 21327 }, { "epoch": 0.6226971475285393, "grad_norm": 0.5514670597019192, "learning_rate": 2.0961881589618816e-05, "loss": 0.6344, "step": 21328 }, { "epoch": 0.6227263437563867, "grad_norm": 0.5038827787901509, "learning_rate": 2.0960259529602598e-05, "loss": 0.5635, "step": 21329 }, { "epoch": 0.622755539984234, "grad_norm": 0.5267383404782223, "learning_rate": 2.0958637469586376e-05, "loss": 0.6559, "step": 21330 }, { "epoch": 0.6227847362120814, "grad_norm": 0.5047369577556561, "learning_rate": 2.0957015409570155e-05, "loss": 0.5828, "step": 21331 }, { "epoch": 0.6228139324399288, "grad_norm": 0.514706309596793, "learning_rate": 2.0955393349553933e-05, "loss": 0.6189, "step": 21332 }, { "epoch": 0.6228431286677761, "grad_norm": 0.5402500465494213, "learning_rate": 2.0953771289537715e-05, "loss": 0.638, "step": 21333 }, { "epoch": 0.6228723248956235, "grad_norm": 0.5132374847532474, "learning_rate": 2.0952149229521493e-05, "loss": 0.6033, "step": 21334 }, { "epoch": 0.6229015211234709, "grad_norm": 0.5695432167903033, "learning_rate": 2.0950527169505275e-05, "loss": 0.6664, "step": 21335 }, { "epoch": 0.6229307173513182, "grad_norm": 0.5390064632292902, "learning_rate": 2.0948905109489053e-05, "loss": 0.5991, "step": 21336 }, { "epoch": 0.6229599135791656, "grad_norm": 0.49038482048403315, "learning_rate": 2.094728304947283e-05, "loss": 0.5569, "step": 21337 }, { "epoch": 0.6229891098070129, "grad_norm": 0.5101970619060195, "learning_rate": 2.094566098945661e-05, "loss": 0.5879, "step": 21338 }, { "epoch": 0.6230183060348603, "grad_norm": 0.5319705266946874, "learning_rate": 2.0944038929440392e-05, "loss": 0.6038, "step": 21339 }, { "epoch": 0.6230475022627077, "grad_norm": 0.5148233690575866, "learning_rate": 2.094241686942417e-05, "loss": 0.5588, "step": 21340 }, { "epoch": 0.623076698490555, "grad_norm": 0.5262283526664442, "learning_rate": 2.094079480940795e-05, "loss": 0.6359, "step": 21341 }, { "epoch": 0.6231058947184024, "grad_norm": 0.5000347380810126, "learning_rate": 2.0939172749391727e-05, "loss": 0.581, "step": 21342 }, { "epoch": 0.6231350909462497, "grad_norm": 0.570843158253391, "learning_rate": 2.093755068937551e-05, "loss": 0.7039, "step": 21343 }, { "epoch": 0.6231642871740971, "grad_norm": 0.5179938237755308, "learning_rate": 2.0935928629359287e-05, "loss": 0.5894, "step": 21344 }, { "epoch": 0.6231934834019445, "grad_norm": 0.5121811864317715, "learning_rate": 2.093430656934307e-05, "loss": 0.5736, "step": 21345 }, { "epoch": 0.6232226796297918, "grad_norm": 0.5105573763443939, "learning_rate": 2.0932684509326847e-05, "loss": 0.5504, "step": 21346 }, { "epoch": 0.6232518758576392, "grad_norm": 0.6630590213949884, "learning_rate": 2.0931062449310625e-05, "loss": 0.6021, "step": 21347 }, { "epoch": 0.6232810720854866, "grad_norm": 0.4969117792582606, "learning_rate": 2.0929440389294404e-05, "loss": 0.5268, "step": 21348 }, { "epoch": 0.6233102683133339, "grad_norm": 0.5243346947117214, "learning_rate": 2.0927818329278186e-05, "loss": 0.5985, "step": 21349 }, { "epoch": 0.6233394645411813, "grad_norm": 0.5205165704650988, "learning_rate": 2.0926196269261964e-05, "loss": 0.6204, "step": 21350 }, { "epoch": 0.6233686607690286, "grad_norm": 0.4974053019182621, "learning_rate": 2.0924574209245742e-05, "loss": 0.5657, "step": 21351 }, { "epoch": 0.623397856996876, "grad_norm": 0.5258981006917905, "learning_rate": 2.092295214922952e-05, "loss": 0.5962, "step": 21352 }, { "epoch": 0.6234270532247234, "grad_norm": 0.5366457168329942, "learning_rate": 2.09213300892133e-05, "loss": 0.5751, "step": 21353 }, { "epoch": 0.6234562494525707, "grad_norm": 0.5401680140230753, "learning_rate": 2.0919708029197084e-05, "loss": 0.6035, "step": 21354 }, { "epoch": 0.6234854456804181, "grad_norm": 0.4966799708244259, "learning_rate": 2.0918085969180863e-05, "loss": 0.5244, "step": 21355 }, { "epoch": 0.6235146419082654, "grad_norm": 0.5010260855163768, "learning_rate": 2.091646390916464e-05, "loss": 0.5365, "step": 21356 }, { "epoch": 0.6235438381361128, "grad_norm": 0.48225887570089004, "learning_rate": 2.091484184914842e-05, "loss": 0.5567, "step": 21357 }, { "epoch": 0.6235730343639602, "grad_norm": 0.5507569452017431, "learning_rate": 2.0913219789132198e-05, "loss": 0.6769, "step": 21358 }, { "epoch": 0.6236022305918075, "grad_norm": 0.5535100953244453, "learning_rate": 2.091159772911598e-05, "loss": 0.6329, "step": 21359 }, { "epoch": 0.6236314268196549, "grad_norm": 0.5003293827422644, "learning_rate": 2.0909975669099758e-05, "loss": 0.5885, "step": 21360 }, { "epoch": 0.6236606230475022, "grad_norm": 0.49593758274809735, "learning_rate": 2.0908353609083536e-05, "loss": 0.5371, "step": 21361 }, { "epoch": 0.6236898192753496, "grad_norm": 0.5504987519090028, "learning_rate": 2.0906731549067315e-05, "loss": 0.5999, "step": 21362 }, { "epoch": 0.623719015503197, "grad_norm": 0.527660153895066, "learning_rate": 2.0905109489051093e-05, "loss": 0.6246, "step": 21363 }, { "epoch": 0.6237482117310443, "grad_norm": 0.5210209435095627, "learning_rate": 2.0903487429034875e-05, "loss": 0.5907, "step": 21364 }, { "epoch": 0.6237774079588917, "grad_norm": 0.530257012040538, "learning_rate": 2.0901865369018657e-05, "loss": 0.6285, "step": 21365 }, { "epoch": 0.623806604186739, "grad_norm": 0.5366375237370272, "learning_rate": 2.0900243309002435e-05, "loss": 0.5812, "step": 21366 }, { "epoch": 0.6238358004145864, "grad_norm": 0.5515029349366366, "learning_rate": 2.0898621248986213e-05, "loss": 0.6374, "step": 21367 }, { "epoch": 0.6238649966424338, "grad_norm": 0.5555709060442159, "learning_rate": 2.089699918896999e-05, "loss": 0.6675, "step": 21368 }, { "epoch": 0.6238941928702811, "grad_norm": 0.5325686145043037, "learning_rate": 2.0895377128953773e-05, "loss": 0.6387, "step": 21369 }, { "epoch": 0.6239233890981285, "grad_norm": 0.5694570626519894, "learning_rate": 2.0893755068937552e-05, "loss": 0.6255, "step": 21370 }, { "epoch": 0.6239525853259759, "grad_norm": 0.49846484066142266, "learning_rate": 2.089213300892133e-05, "loss": 0.5531, "step": 21371 }, { "epoch": 0.6239817815538232, "grad_norm": 0.49146477018572843, "learning_rate": 2.089051094890511e-05, "loss": 0.5212, "step": 21372 }, { "epoch": 0.6240109777816706, "grad_norm": 0.47187226017025125, "learning_rate": 2.088888888888889e-05, "loss": 0.5143, "step": 21373 }, { "epoch": 0.624040174009518, "grad_norm": 0.504000676805208, "learning_rate": 2.088726682887267e-05, "loss": 0.6026, "step": 21374 }, { "epoch": 0.6240693702373653, "grad_norm": 0.5413070591732584, "learning_rate": 2.088564476885645e-05, "loss": 0.6405, "step": 21375 }, { "epoch": 0.6240985664652127, "grad_norm": 0.5693850025524786, "learning_rate": 2.088402270884023e-05, "loss": 0.6857, "step": 21376 }, { "epoch": 0.62412776269306, "grad_norm": 0.5013814994100244, "learning_rate": 2.0882400648824007e-05, "loss": 0.5768, "step": 21377 }, { "epoch": 0.6241569589209074, "grad_norm": 0.52616926795853, "learning_rate": 2.0880778588807786e-05, "loss": 0.6473, "step": 21378 }, { "epoch": 0.6241861551487548, "grad_norm": 0.5332016381159059, "learning_rate": 2.0879156528791567e-05, "loss": 0.6227, "step": 21379 }, { "epoch": 0.6242153513766021, "grad_norm": 0.5022356019939088, "learning_rate": 2.0877534468775346e-05, "loss": 0.5673, "step": 21380 }, { "epoch": 0.6242445476044495, "grad_norm": 0.4686255871131704, "learning_rate": 2.0875912408759124e-05, "loss": 0.499, "step": 21381 }, { "epoch": 0.6242737438322968, "grad_norm": 0.5080180392603268, "learning_rate": 2.0874290348742902e-05, "loss": 0.592, "step": 21382 }, { "epoch": 0.6243029400601442, "grad_norm": 0.5202832356736036, "learning_rate": 2.0872668288726684e-05, "loss": 0.6074, "step": 21383 }, { "epoch": 0.6243321362879916, "grad_norm": 0.5556272095477972, "learning_rate": 2.0871046228710463e-05, "loss": 0.6561, "step": 21384 }, { "epoch": 0.6243613325158389, "grad_norm": 0.5514895495122257, "learning_rate": 2.0869424168694244e-05, "loss": 0.6402, "step": 21385 }, { "epoch": 0.6243905287436863, "grad_norm": 0.5733439096219988, "learning_rate": 2.0867802108678023e-05, "loss": 0.6708, "step": 21386 }, { "epoch": 0.6244197249715336, "grad_norm": 0.523404781465948, "learning_rate": 2.08661800486618e-05, "loss": 0.6195, "step": 21387 }, { "epoch": 0.624448921199381, "grad_norm": 0.5342135014256703, "learning_rate": 2.086455798864558e-05, "loss": 0.5937, "step": 21388 }, { "epoch": 0.6244781174272284, "grad_norm": 0.5798468208047013, "learning_rate": 2.086293592862936e-05, "loss": 0.7089, "step": 21389 }, { "epoch": 0.6245073136550757, "grad_norm": 0.5228076500886953, "learning_rate": 2.086131386861314e-05, "loss": 0.6016, "step": 21390 }, { "epoch": 0.6245365098829231, "grad_norm": 0.542662970121502, "learning_rate": 2.0859691808596918e-05, "loss": 0.6636, "step": 21391 }, { "epoch": 0.6245657061107704, "grad_norm": 0.5814211680600518, "learning_rate": 2.08580697485807e-05, "loss": 0.717, "step": 21392 }, { "epoch": 0.6245949023386178, "grad_norm": 0.5350075961054913, "learning_rate": 2.0856447688564478e-05, "loss": 0.6358, "step": 21393 }, { "epoch": 0.6246240985664652, "grad_norm": 0.5056380901856788, "learning_rate": 2.0854825628548256e-05, "loss": 0.6147, "step": 21394 }, { "epoch": 0.6246532947943125, "grad_norm": 0.5129149575751962, "learning_rate": 2.0853203568532038e-05, "loss": 0.6045, "step": 21395 }, { "epoch": 0.6246824910221599, "grad_norm": 0.5138323930416616, "learning_rate": 2.0851581508515817e-05, "loss": 0.5631, "step": 21396 }, { "epoch": 0.6247116872500073, "grad_norm": 0.4937981371247345, "learning_rate": 2.0849959448499595e-05, "loss": 0.5723, "step": 21397 }, { "epoch": 0.6247408834778546, "grad_norm": 0.5303653475079437, "learning_rate": 2.0848337388483373e-05, "loss": 0.6173, "step": 21398 }, { "epoch": 0.624770079705702, "grad_norm": 0.5370863717591412, "learning_rate": 2.0846715328467155e-05, "loss": 0.5625, "step": 21399 }, { "epoch": 0.6247992759335493, "grad_norm": 0.5210925741777411, "learning_rate": 2.0845093268450933e-05, "loss": 0.5729, "step": 21400 }, { "epoch": 0.6248284721613967, "grad_norm": 0.47505512838454206, "learning_rate": 2.0843471208434715e-05, "loss": 0.4902, "step": 21401 }, { "epoch": 0.6248576683892441, "grad_norm": 0.5114841721489108, "learning_rate": 2.0841849148418494e-05, "loss": 0.5864, "step": 21402 }, { "epoch": 0.6248868646170914, "grad_norm": 0.538479757189729, "learning_rate": 2.0840227088402272e-05, "loss": 0.6631, "step": 21403 }, { "epoch": 0.6249160608449388, "grad_norm": 0.5014669505739362, "learning_rate": 2.083860502838605e-05, "loss": 0.5779, "step": 21404 }, { "epoch": 0.6249452570727861, "grad_norm": 0.5033482099070372, "learning_rate": 2.0836982968369832e-05, "loss": 0.5714, "step": 21405 }, { "epoch": 0.6249744533006335, "grad_norm": 0.5380072237052157, "learning_rate": 2.083536090835361e-05, "loss": 0.6184, "step": 21406 }, { "epoch": 0.625003649528481, "grad_norm": 0.5055208587051434, "learning_rate": 2.083373884833739e-05, "loss": 0.59, "step": 21407 }, { "epoch": 0.6250328457563283, "grad_norm": 0.5445631684282758, "learning_rate": 2.0832116788321167e-05, "loss": 0.5542, "step": 21408 }, { "epoch": 0.6250620419841757, "grad_norm": 0.46584702780940035, "learning_rate": 2.0830494728304946e-05, "loss": 0.5053, "step": 21409 }, { "epoch": 0.6250912382120231, "grad_norm": 0.4692456000905244, "learning_rate": 2.0828872668288727e-05, "loss": 0.5399, "step": 21410 }, { "epoch": 0.6251204344398704, "grad_norm": 0.5678969022085316, "learning_rate": 2.082725060827251e-05, "loss": 0.6503, "step": 21411 }, { "epoch": 0.6251496306677178, "grad_norm": 0.5212787246918485, "learning_rate": 2.0825628548256288e-05, "loss": 0.5609, "step": 21412 }, { "epoch": 0.6251788268955651, "grad_norm": 0.505625829901139, "learning_rate": 2.0824006488240066e-05, "loss": 0.5582, "step": 21413 }, { "epoch": 0.6252080231234125, "grad_norm": 0.48565334518870085, "learning_rate": 2.0822384428223844e-05, "loss": 0.5007, "step": 21414 }, { "epoch": 0.6252372193512599, "grad_norm": 0.5198113481246482, "learning_rate": 2.0820762368207626e-05, "loss": 0.5661, "step": 21415 }, { "epoch": 0.6252664155791072, "grad_norm": 0.5333939132650877, "learning_rate": 2.0819140308191404e-05, "loss": 0.6454, "step": 21416 }, { "epoch": 0.6252956118069546, "grad_norm": 0.517796248761308, "learning_rate": 2.0817518248175183e-05, "loss": 0.6126, "step": 21417 }, { "epoch": 0.625324808034802, "grad_norm": 0.5207757518927295, "learning_rate": 2.081589618815896e-05, "loss": 0.6254, "step": 21418 }, { "epoch": 0.6253540042626493, "grad_norm": 0.5706451708274146, "learning_rate": 2.081427412814274e-05, "loss": 0.6786, "step": 21419 }, { "epoch": 0.6253832004904967, "grad_norm": 0.5108363748506166, "learning_rate": 2.0812652068126525e-05, "loss": 0.5699, "step": 21420 }, { "epoch": 0.625412396718344, "grad_norm": 0.5509618544083916, "learning_rate": 2.0811030008110303e-05, "loss": 0.6353, "step": 21421 }, { "epoch": 0.6254415929461914, "grad_norm": 0.5259210634056916, "learning_rate": 2.080940794809408e-05, "loss": 0.582, "step": 21422 }, { "epoch": 0.6254707891740388, "grad_norm": 0.5067619036413338, "learning_rate": 2.080778588807786e-05, "loss": 0.5858, "step": 21423 }, { "epoch": 0.6254999854018861, "grad_norm": 0.5035112181615925, "learning_rate": 2.0806163828061638e-05, "loss": 0.5647, "step": 21424 }, { "epoch": 0.6255291816297335, "grad_norm": 0.5166342486727392, "learning_rate": 2.080454176804542e-05, "loss": 0.5848, "step": 21425 }, { "epoch": 0.6255583778575808, "grad_norm": 0.5141839739487118, "learning_rate": 2.08029197080292e-05, "loss": 0.5875, "step": 21426 }, { "epoch": 0.6255875740854282, "grad_norm": 0.5212161186494306, "learning_rate": 2.0801297648012977e-05, "loss": 0.5754, "step": 21427 }, { "epoch": 0.6256167703132756, "grad_norm": 0.5161489682643849, "learning_rate": 2.0799675587996755e-05, "loss": 0.5877, "step": 21428 }, { "epoch": 0.6256459665411229, "grad_norm": 0.5209675692257153, "learning_rate": 2.0798053527980533e-05, "loss": 0.5047, "step": 21429 }, { "epoch": 0.6256751627689703, "grad_norm": 0.4984913464265449, "learning_rate": 2.0796431467964315e-05, "loss": 0.5904, "step": 21430 }, { "epoch": 0.6257043589968176, "grad_norm": 0.5767369767674204, "learning_rate": 2.0794809407948097e-05, "loss": 0.6596, "step": 21431 }, { "epoch": 0.625733555224665, "grad_norm": 0.48941615654236614, "learning_rate": 2.0793187347931875e-05, "loss": 0.523, "step": 21432 }, { "epoch": 0.6257627514525124, "grad_norm": 0.5209446202981705, "learning_rate": 2.0791565287915654e-05, "loss": 0.5879, "step": 21433 }, { "epoch": 0.6257919476803597, "grad_norm": 0.4872258041820891, "learning_rate": 2.0789943227899432e-05, "loss": 0.5495, "step": 21434 }, { "epoch": 0.6258211439082071, "grad_norm": 0.5037156352969505, "learning_rate": 2.0788321167883214e-05, "loss": 0.6106, "step": 21435 }, { "epoch": 0.6258503401360545, "grad_norm": 0.5334651109905176, "learning_rate": 2.0786699107866992e-05, "loss": 0.6574, "step": 21436 }, { "epoch": 0.6258795363639018, "grad_norm": 0.5376537737872598, "learning_rate": 2.078507704785077e-05, "loss": 0.6126, "step": 21437 }, { "epoch": 0.6259087325917492, "grad_norm": 0.5442677331510639, "learning_rate": 2.078345498783455e-05, "loss": 0.6018, "step": 21438 }, { "epoch": 0.6259379288195965, "grad_norm": 0.49345003000411, "learning_rate": 2.078183292781833e-05, "loss": 0.558, "step": 21439 }, { "epoch": 0.6259671250474439, "grad_norm": 0.5660740849608742, "learning_rate": 2.078021086780211e-05, "loss": 0.6646, "step": 21440 }, { "epoch": 0.6259963212752913, "grad_norm": 0.5224042453963189, "learning_rate": 2.077858880778589e-05, "loss": 0.587, "step": 21441 }, { "epoch": 0.6260255175031386, "grad_norm": 0.5321497487284058, "learning_rate": 2.077696674776967e-05, "loss": 0.6592, "step": 21442 }, { "epoch": 0.626054713730986, "grad_norm": 0.5305180986830034, "learning_rate": 2.0775344687753448e-05, "loss": 0.6071, "step": 21443 }, { "epoch": 0.6260839099588333, "grad_norm": 0.5493053148702722, "learning_rate": 2.0773722627737226e-05, "loss": 0.6709, "step": 21444 }, { "epoch": 0.6261131061866807, "grad_norm": 0.5105824570017422, "learning_rate": 2.0772100567721008e-05, "loss": 0.5892, "step": 21445 }, { "epoch": 0.6261423024145281, "grad_norm": 0.5253480334524747, "learning_rate": 2.0770478507704786e-05, "loss": 0.6206, "step": 21446 }, { "epoch": 0.6261714986423754, "grad_norm": 0.5345706602380025, "learning_rate": 2.0768856447688565e-05, "loss": 0.6075, "step": 21447 }, { "epoch": 0.6262006948702228, "grad_norm": 0.5274061098112391, "learning_rate": 2.0767234387672343e-05, "loss": 0.6253, "step": 21448 }, { "epoch": 0.6262298910980701, "grad_norm": 0.5385971134641466, "learning_rate": 2.0765612327656125e-05, "loss": 0.6714, "step": 21449 }, { "epoch": 0.6262590873259175, "grad_norm": 0.48821557371743113, "learning_rate": 2.0763990267639903e-05, "loss": 0.5493, "step": 21450 }, { "epoch": 0.6262882835537649, "grad_norm": 0.5340009959948433, "learning_rate": 2.0762368207623685e-05, "loss": 0.6483, "step": 21451 }, { "epoch": 0.6263174797816122, "grad_norm": 0.49130908770648185, "learning_rate": 2.0760746147607463e-05, "loss": 0.5482, "step": 21452 }, { "epoch": 0.6263466760094596, "grad_norm": 0.48307875106159737, "learning_rate": 2.075912408759124e-05, "loss": 0.5215, "step": 21453 }, { "epoch": 0.626375872237307, "grad_norm": 0.5489112506551228, "learning_rate": 2.075750202757502e-05, "loss": 0.6375, "step": 21454 }, { "epoch": 0.6264050684651543, "grad_norm": 0.5129075937225103, "learning_rate": 2.07558799675588e-05, "loss": 0.5566, "step": 21455 }, { "epoch": 0.6264342646930017, "grad_norm": 0.48996698297095637, "learning_rate": 2.075425790754258e-05, "loss": 0.5429, "step": 21456 }, { "epoch": 0.626463460920849, "grad_norm": 0.535547225265984, "learning_rate": 2.075263584752636e-05, "loss": 0.5931, "step": 21457 }, { "epoch": 0.6264926571486964, "grad_norm": 0.4871201248225596, "learning_rate": 2.075101378751014e-05, "loss": 0.5615, "step": 21458 }, { "epoch": 0.6265218533765438, "grad_norm": 0.5726759841857593, "learning_rate": 2.074939172749392e-05, "loss": 0.6243, "step": 21459 }, { "epoch": 0.6265510496043911, "grad_norm": 0.49581021228368866, "learning_rate": 2.0747769667477697e-05, "loss": 0.5727, "step": 21460 }, { "epoch": 0.6265802458322385, "grad_norm": 0.5383289241448094, "learning_rate": 2.074614760746148e-05, "loss": 0.6555, "step": 21461 }, { "epoch": 0.6266094420600858, "grad_norm": 0.5171950610876662, "learning_rate": 2.0744525547445257e-05, "loss": 0.57, "step": 21462 }, { "epoch": 0.6266386382879332, "grad_norm": 0.4997256784793767, "learning_rate": 2.0742903487429035e-05, "loss": 0.5485, "step": 21463 }, { "epoch": 0.6266678345157806, "grad_norm": 0.5138076169246926, "learning_rate": 2.0741281427412814e-05, "loss": 0.6078, "step": 21464 }, { "epoch": 0.6266970307436279, "grad_norm": 0.49439306588635706, "learning_rate": 2.0739659367396596e-05, "loss": 0.5179, "step": 21465 }, { "epoch": 0.6267262269714753, "grad_norm": 0.5630726541766329, "learning_rate": 2.0738037307380374e-05, "loss": 0.7088, "step": 21466 }, { "epoch": 0.6267554231993226, "grad_norm": 0.5600564287013113, "learning_rate": 2.0736415247364152e-05, "loss": 0.6758, "step": 21467 }, { "epoch": 0.62678461942717, "grad_norm": 0.525573496038739, "learning_rate": 2.0734793187347934e-05, "loss": 0.5922, "step": 21468 }, { "epoch": 0.6268138156550174, "grad_norm": 0.5070746624271173, "learning_rate": 2.0733171127331712e-05, "loss": 0.5942, "step": 21469 }, { "epoch": 0.6268430118828647, "grad_norm": 0.5133535107355578, "learning_rate": 2.073154906731549e-05, "loss": 0.5965, "step": 21470 }, { "epoch": 0.6268722081107121, "grad_norm": 0.5138582354012831, "learning_rate": 2.0729927007299273e-05, "loss": 0.5756, "step": 21471 }, { "epoch": 0.6269014043385595, "grad_norm": 0.5323052218356901, "learning_rate": 2.072830494728305e-05, "loss": 0.6324, "step": 21472 }, { "epoch": 0.6269306005664068, "grad_norm": 0.5619124821114531, "learning_rate": 2.072668288726683e-05, "loss": 0.7229, "step": 21473 }, { "epoch": 0.6269597967942542, "grad_norm": 0.5561030473866576, "learning_rate": 2.0725060827250608e-05, "loss": 0.6322, "step": 21474 }, { "epoch": 0.6269889930221015, "grad_norm": 0.5128373235158592, "learning_rate": 2.0723438767234386e-05, "loss": 0.6349, "step": 21475 }, { "epoch": 0.6270181892499489, "grad_norm": 0.4864600124064243, "learning_rate": 2.0721816707218168e-05, "loss": 0.5425, "step": 21476 }, { "epoch": 0.6270473854777963, "grad_norm": 0.5494614974969481, "learning_rate": 2.072019464720195e-05, "loss": 0.6419, "step": 21477 }, { "epoch": 0.6270765817056436, "grad_norm": 0.5164585567449772, "learning_rate": 2.0718572587185728e-05, "loss": 0.5834, "step": 21478 }, { "epoch": 0.627105777933491, "grad_norm": 0.49970556941715233, "learning_rate": 2.0716950527169506e-05, "loss": 0.5818, "step": 21479 }, { "epoch": 0.6271349741613383, "grad_norm": 0.531963451393251, "learning_rate": 2.0715328467153285e-05, "loss": 0.6517, "step": 21480 }, { "epoch": 0.6271641703891857, "grad_norm": 0.5203733145789579, "learning_rate": 2.0713706407137066e-05, "loss": 0.5961, "step": 21481 }, { "epoch": 0.6271933666170331, "grad_norm": 0.5133592692312755, "learning_rate": 2.0712084347120845e-05, "loss": 0.5705, "step": 21482 }, { "epoch": 0.6272225628448804, "grad_norm": 0.5902119039861868, "learning_rate": 2.0710462287104623e-05, "loss": 0.6886, "step": 21483 }, { "epoch": 0.6272517590727278, "grad_norm": 0.5795716637227006, "learning_rate": 2.07088402270884e-05, "loss": 0.6391, "step": 21484 }, { "epoch": 0.6272809553005751, "grad_norm": 0.547728963793043, "learning_rate": 2.070721816707218e-05, "loss": 0.666, "step": 21485 }, { "epoch": 0.6273101515284225, "grad_norm": 0.5336367020137295, "learning_rate": 2.0705596107055962e-05, "loss": 0.5919, "step": 21486 }, { "epoch": 0.6273393477562699, "grad_norm": 0.4912321691214126, "learning_rate": 2.0703974047039744e-05, "loss": 0.5816, "step": 21487 }, { "epoch": 0.6273685439841172, "grad_norm": 0.5166204725062877, "learning_rate": 2.0702351987023522e-05, "loss": 0.5613, "step": 21488 }, { "epoch": 0.6273977402119646, "grad_norm": 0.5296114534799578, "learning_rate": 2.07007299270073e-05, "loss": 0.5669, "step": 21489 }, { "epoch": 0.627426936439812, "grad_norm": 0.520710621461418, "learning_rate": 2.069910786699108e-05, "loss": 0.598, "step": 21490 }, { "epoch": 0.6274561326676593, "grad_norm": 0.5307726712578684, "learning_rate": 2.069748580697486e-05, "loss": 0.6322, "step": 21491 }, { "epoch": 0.6274853288955067, "grad_norm": 0.5377171135224534, "learning_rate": 2.069586374695864e-05, "loss": 0.6462, "step": 21492 }, { "epoch": 0.627514525123354, "grad_norm": 0.5379302534777141, "learning_rate": 2.0694241686942417e-05, "loss": 0.6812, "step": 21493 }, { "epoch": 0.6275437213512014, "grad_norm": 0.5203067933306041, "learning_rate": 2.0692619626926196e-05, "loss": 0.5998, "step": 21494 }, { "epoch": 0.6275729175790488, "grad_norm": 0.5149987515988536, "learning_rate": 2.0690997566909974e-05, "loss": 0.5986, "step": 21495 }, { "epoch": 0.6276021138068961, "grad_norm": 0.5234659997260022, "learning_rate": 2.0689375506893756e-05, "loss": 0.6494, "step": 21496 }, { "epoch": 0.6276313100347435, "grad_norm": 0.49884931885342754, "learning_rate": 2.0687753446877537e-05, "loss": 0.5671, "step": 21497 }, { "epoch": 0.6276605062625908, "grad_norm": 0.4821942732887756, "learning_rate": 2.0686131386861316e-05, "loss": 0.5168, "step": 21498 }, { "epoch": 0.6276897024904382, "grad_norm": 0.4766699059515936, "learning_rate": 2.0684509326845094e-05, "loss": 0.5245, "step": 21499 }, { "epoch": 0.6277188987182856, "grad_norm": 0.47599960095805866, "learning_rate": 2.0682887266828873e-05, "loss": 0.5376, "step": 21500 }, { "epoch": 0.6277480949461329, "grad_norm": 0.5251805616129964, "learning_rate": 2.0681265206812654e-05, "loss": 0.6603, "step": 21501 }, { "epoch": 0.6277772911739803, "grad_norm": 0.5014099946824705, "learning_rate": 2.0679643146796433e-05, "loss": 0.5644, "step": 21502 }, { "epoch": 0.6278064874018277, "grad_norm": 0.5213274296737301, "learning_rate": 2.067802108678021e-05, "loss": 0.5796, "step": 21503 }, { "epoch": 0.627835683629675, "grad_norm": 0.5389082455767026, "learning_rate": 2.067639902676399e-05, "loss": 0.6304, "step": 21504 }, { "epoch": 0.6278648798575224, "grad_norm": 0.4841214514609275, "learning_rate": 2.067477696674777e-05, "loss": 0.5502, "step": 21505 }, { "epoch": 0.6278940760853697, "grad_norm": 0.5142999939075573, "learning_rate": 2.067315490673155e-05, "loss": 0.599, "step": 21506 }, { "epoch": 0.6279232723132171, "grad_norm": 0.5290074741238356, "learning_rate": 2.067153284671533e-05, "loss": 0.6192, "step": 21507 }, { "epoch": 0.6279524685410645, "grad_norm": 0.4927283716646962, "learning_rate": 2.066991078669911e-05, "loss": 0.5524, "step": 21508 }, { "epoch": 0.6279816647689118, "grad_norm": 0.5287039759294659, "learning_rate": 2.0668288726682888e-05, "loss": 0.609, "step": 21509 }, { "epoch": 0.6280108609967592, "grad_norm": 0.4828073442726072, "learning_rate": 2.0666666666666666e-05, "loss": 0.5322, "step": 21510 }, { "epoch": 0.6280400572246065, "grad_norm": 0.5416825272933963, "learning_rate": 2.0665044606650448e-05, "loss": 0.6324, "step": 21511 }, { "epoch": 0.6280692534524539, "grad_norm": 0.5013851918801322, "learning_rate": 2.0663422546634227e-05, "loss": 0.5733, "step": 21512 }, { "epoch": 0.6280984496803013, "grad_norm": 0.5429834644162955, "learning_rate": 2.0661800486618005e-05, "loss": 0.6718, "step": 21513 }, { "epoch": 0.6281276459081486, "grad_norm": 0.5138800057581087, "learning_rate": 2.0660178426601783e-05, "loss": 0.5635, "step": 21514 }, { "epoch": 0.628156842135996, "grad_norm": 0.5287613049656892, "learning_rate": 2.0658556366585565e-05, "loss": 0.6071, "step": 21515 }, { "epoch": 0.6281860383638433, "grad_norm": 0.5126450410912575, "learning_rate": 2.0656934306569343e-05, "loss": 0.6107, "step": 21516 }, { "epoch": 0.6282152345916907, "grad_norm": 0.5117858075283693, "learning_rate": 2.0655312246553125e-05, "loss": 0.586, "step": 21517 }, { "epoch": 0.6282444308195381, "grad_norm": 0.5146498058351013, "learning_rate": 2.0653690186536904e-05, "loss": 0.5818, "step": 21518 }, { "epoch": 0.6282736270473854, "grad_norm": 0.5437516210140465, "learning_rate": 2.0652068126520682e-05, "loss": 0.6502, "step": 21519 }, { "epoch": 0.6283028232752328, "grad_norm": 0.4947780332497578, "learning_rate": 2.065044606650446e-05, "loss": 0.5493, "step": 21520 }, { "epoch": 0.6283320195030802, "grad_norm": 0.5328656859026134, "learning_rate": 2.0648824006488242e-05, "loss": 0.6198, "step": 21521 }, { "epoch": 0.6283612157309275, "grad_norm": 0.5531822563662322, "learning_rate": 2.064720194647202e-05, "loss": 0.7, "step": 21522 }, { "epoch": 0.6283904119587749, "grad_norm": 0.538960433094479, "learning_rate": 2.06455798864558e-05, "loss": 0.6007, "step": 21523 }, { "epoch": 0.6284196081866222, "grad_norm": 0.4772210944604401, "learning_rate": 2.064395782643958e-05, "loss": 0.5197, "step": 21524 }, { "epoch": 0.6284488044144696, "grad_norm": 0.5132776905140536, "learning_rate": 2.064233576642336e-05, "loss": 0.609, "step": 21525 }, { "epoch": 0.628478000642317, "grad_norm": 0.5421502727346479, "learning_rate": 2.0640713706407137e-05, "loss": 0.6002, "step": 21526 }, { "epoch": 0.6285071968701643, "grad_norm": 0.563744588187407, "learning_rate": 2.063909164639092e-05, "loss": 0.6697, "step": 21527 }, { "epoch": 0.6285363930980118, "grad_norm": 0.4884181039404556, "learning_rate": 2.0637469586374697e-05, "loss": 0.5472, "step": 21528 }, { "epoch": 0.6285655893258592, "grad_norm": 0.5427523334783243, "learning_rate": 2.0635847526358476e-05, "loss": 0.6354, "step": 21529 }, { "epoch": 0.6285947855537065, "grad_norm": 0.5102728250432441, "learning_rate": 2.0634225466342254e-05, "loss": 0.5814, "step": 21530 }, { "epoch": 0.6286239817815539, "grad_norm": 0.532858024906333, "learning_rate": 2.0632603406326033e-05, "loss": 0.6411, "step": 21531 }, { "epoch": 0.6286531780094012, "grad_norm": 0.5348600669196125, "learning_rate": 2.0630981346309814e-05, "loss": 0.6063, "step": 21532 }, { "epoch": 0.6286823742372486, "grad_norm": 0.524427064422753, "learning_rate": 2.0629359286293593e-05, "loss": 0.6404, "step": 21533 }, { "epoch": 0.628711570465096, "grad_norm": 0.5278604258521842, "learning_rate": 2.0627737226277375e-05, "loss": 0.616, "step": 21534 }, { "epoch": 0.6287407666929433, "grad_norm": 0.5399890902696544, "learning_rate": 2.0626115166261153e-05, "loss": 0.6215, "step": 21535 }, { "epoch": 0.6287699629207907, "grad_norm": 0.5179885798981888, "learning_rate": 2.062449310624493e-05, "loss": 0.5798, "step": 21536 }, { "epoch": 0.628799159148638, "grad_norm": 0.5434322575029795, "learning_rate": 2.0622871046228713e-05, "loss": 0.6145, "step": 21537 }, { "epoch": 0.6288283553764854, "grad_norm": 0.5322813626059482, "learning_rate": 2.062124898621249e-05, "loss": 0.6, "step": 21538 }, { "epoch": 0.6288575516043328, "grad_norm": 0.5253531970347395, "learning_rate": 2.061962692619627e-05, "loss": 0.5478, "step": 21539 }, { "epoch": 0.6288867478321801, "grad_norm": 0.5122148352436878, "learning_rate": 2.0618004866180048e-05, "loss": 0.6091, "step": 21540 }, { "epoch": 0.6289159440600275, "grad_norm": 0.5047257720481294, "learning_rate": 2.0616382806163827e-05, "loss": 0.5584, "step": 21541 }, { "epoch": 0.6289451402878748, "grad_norm": 0.5162265032938625, "learning_rate": 2.0614760746147608e-05, "loss": 0.5791, "step": 21542 }, { "epoch": 0.6289743365157222, "grad_norm": 0.522263909740031, "learning_rate": 2.061313868613139e-05, "loss": 0.5953, "step": 21543 }, { "epoch": 0.6290035327435696, "grad_norm": 0.518617076860854, "learning_rate": 2.061151662611517e-05, "loss": 0.6147, "step": 21544 }, { "epoch": 0.6290327289714169, "grad_norm": 0.49272713039709415, "learning_rate": 2.0609894566098947e-05, "loss": 0.5284, "step": 21545 }, { "epoch": 0.6290619251992643, "grad_norm": 0.5545540816100815, "learning_rate": 2.0608272506082725e-05, "loss": 0.611, "step": 21546 }, { "epoch": 0.6290911214271117, "grad_norm": 0.531679115169527, "learning_rate": 2.0606650446066507e-05, "loss": 0.5414, "step": 21547 }, { "epoch": 0.629120317654959, "grad_norm": 0.5892672674992508, "learning_rate": 2.0605028386050285e-05, "loss": 0.6657, "step": 21548 }, { "epoch": 0.6291495138828064, "grad_norm": 0.545272854380657, "learning_rate": 2.0603406326034064e-05, "loss": 0.6352, "step": 21549 }, { "epoch": 0.6291787101106537, "grad_norm": 0.5209406680687168, "learning_rate": 2.0601784266017842e-05, "loss": 0.5876, "step": 21550 }, { "epoch": 0.6292079063385011, "grad_norm": 0.53856500977734, "learning_rate": 2.060016220600162e-05, "loss": 0.627, "step": 21551 }, { "epoch": 0.6292371025663485, "grad_norm": 0.5559003741832989, "learning_rate": 2.0598540145985402e-05, "loss": 0.6178, "step": 21552 }, { "epoch": 0.6292662987941958, "grad_norm": 0.5959395309988819, "learning_rate": 2.0596918085969184e-05, "loss": 0.6528, "step": 21553 }, { "epoch": 0.6292954950220432, "grad_norm": 0.5062474506749032, "learning_rate": 2.0595296025952962e-05, "loss": 0.5995, "step": 21554 }, { "epoch": 0.6293246912498905, "grad_norm": 0.5325474530270852, "learning_rate": 2.059367396593674e-05, "loss": 0.6196, "step": 21555 }, { "epoch": 0.6293538874777379, "grad_norm": 0.5321658020590265, "learning_rate": 2.059205190592052e-05, "loss": 0.635, "step": 21556 }, { "epoch": 0.6293830837055853, "grad_norm": 0.5016730215710185, "learning_rate": 2.05904298459043e-05, "loss": 0.5745, "step": 21557 }, { "epoch": 0.6294122799334326, "grad_norm": 0.5445015053843285, "learning_rate": 2.058880778588808e-05, "loss": 0.6121, "step": 21558 }, { "epoch": 0.62944147616128, "grad_norm": 0.5432390111873475, "learning_rate": 2.0587185725871858e-05, "loss": 0.6745, "step": 21559 }, { "epoch": 0.6294706723891274, "grad_norm": 0.5446677996496155, "learning_rate": 2.0585563665855636e-05, "loss": 0.6775, "step": 21560 }, { "epoch": 0.6294998686169747, "grad_norm": 0.5102891006553284, "learning_rate": 2.0583941605839414e-05, "loss": 0.6116, "step": 21561 }, { "epoch": 0.6295290648448221, "grad_norm": 0.5439082527008915, "learning_rate": 2.0582319545823196e-05, "loss": 0.6537, "step": 21562 }, { "epoch": 0.6295582610726694, "grad_norm": 0.5421853698867899, "learning_rate": 2.0580697485806978e-05, "loss": 0.6388, "step": 21563 }, { "epoch": 0.6295874573005168, "grad_norm": 0.509203522500694, "learning_rate": 2.0579075425790756e-05, "loss": 0.5713, "step": 21564 }, { "epoch": 0.6296166535283642, "grad_norm": 0.5037355714696766, "learning_rate": 2.0577453365774535e-05, "loss": 0.5408, "step": 21565 }, { "epoch": 0.6296458497562115, "grad_norm": 0.47115205705080193, "learning_rate": 2.0575831305758313e-05, "loss": 0.5032, "step": 21566 }, { "epoch": 0.6296750459840589, "grad_norm": 0.5344526814094932, "learning_rate": 2.0574209245742095e-05, "loss": 0.6428, "step": 21567 }, { "epoch": 0.6297042422119062, "grad_norm": 0.4869346498622348, "learning_rate": 2.0572587185725873e-05, "loss": 0.5478, "step": 21568 }, { "epoch": 0.6297334384397536, "grad_norm": 0.5127645830028825, "learning_rate": 2.057096512570965e-05, "loss": 0.578, "step": 21569 }, { "epoch": 0.629762634667601, "grad_norm": 0.5189691609677966, "learning_rate": 2.056934306569343e-05, "loss": 0.5791, "step": 21570 }, { "epoch": 0.6297918308954483, "grad_norm": 0.5018614932950021, "learning_rate": 2.056772100567721e-05, "loss": 0.5528, "step": 21571 }, { "epoch": 0.6298210271232957, "grad_norm": 0.5172686526720961, "learning_rate": 2.056609894566099e-05, "loss": 0.6054, "step": 21572 }, { "epoch": 0.629850223351143, "grad_norm": 0.5735948018804008, "learning_rate": 2.0564476885644772e-05, "loss": 0.7433, "step": 21573 }, { "epoch": 0.6298794195789904, "grad_norm": 0.536150796071613, "learning_rate": 2.056285482562855e-05, "loss": 0.5999, "step": 21574 }, { "epoch": 0.6299086158068378, "grad_norm": 0.5617945775879336, "learning_rate": 2.056123276561233e-05, "loss": 0.6366, "step": 21575 }, { "epoch": 0.6299378120346851, "grad_norm": 0.4858271299634963, "learning_rate": 2.0559610705596107e-05, "loss": 0.5549, "step": 21576 }, { "epoch": 0.6299670082625325, "grad_norm": 0.52814917090601, "learning_rate": 2.055798864557989e-05, "loss": 0.6198, "step": 21577 }, { "epoch": 0.6299962044903799, "grad_norm": 0.5032500769235562, "learning_rate": 2.0556366585563667e-05, "loss": 0.5749, "step": 21578 }, { "epoch": 0.6300254007182272, "grad_norm": 0.5707389556977622, "learning_rate": 2.0554744525547445e-05, "loss": 0.6663, "step": 21579 }, { "epoch": 0.6300545969460746, "grad_norm": 0.5143145337625633, "learning_rate": 2.0553122465531224e-05, "loss": 0.5894, "step": 21580 }, { "epoch": 0.6300837931739219, "grad_norm": 0.5054008516969059, "learning_rate": 2.0551500405515006e-05, "loss": 0.5875, "step": 21581 }, { "epoch": 0.6301129894017693, "grad_norm": 0.5541162652791779, "learning_rate": 2.0549878345498784e-05, "loss": 0.6795, "step": 21582 }, { "epoch": 0.6301421856296167, "grad_norm": 0.5017213384465496, "learning_rate": 2.0548256285482566e-05, "loss": 0.5718, "step": 21583 }, { "epoch": 0.630171381857464, "grad_norm": 0.5474968293882405, "learning_rate": 2.0546634225466344e-05, "loss": 0.6277, "step": 21584 }, { "epoch": 0.6302005780853114, "grad_norm": 0.5809133504996022, "learning_rate": 2.0545012165450122e-05, "loss": 0.6031, "step": 21585 }, { "epoch": 0.6302297743131587, "grad_norm": 0.4878278579186667, "learning_rate": 2.05433901054339e-05, "loss": 0.5402, "step": 21586 }, { "epoch": 0.6302589705410061, "grad_norm": 0.49806418024931287, "learning_rate": 2.0541768045417683e-05, "loss": 0.5845, "step": 21587 }, { "epoch": 0.6302881667688535, "grad_norm": 0.523700868479917, "learning_rate": 2.054014598540146e-05, "loss": 0.6134, "step": 21588 }, { "epoch": 0.6303173629967008, "grad_norm": 0.5248345870639295, "learning_rate": 2.053852392538524e-05, "loss": 0.5828, "step": 21589 }, { "epoch": 0.6303465592245482, "grad_norm": 0.5525898800093604, "learning_rate": 2.053690186536902e-05, "loss": 0.624, "step": 21590 }, { "epoch": 0.6303757554523955, "grad_norm": 0.5519380034744448, "learning_rate": 2.05352798053528e-05, "loss": 0.6517, "step": 21591 }, { "epoch": 0.6304049516802429, "grad_norm": 0.4877756473448657, "learning_rate": 2.0533657745336578e-05, "loss": 0.5272, "step": 21592 }, { "epoch": 0.6304341479080903, "grad_norm": 0.5408745429753022, "learning_rate": 2.053203568532036e-05, "loss": 0.5965, "step": 21593 }, { "epoch": 0.6304633441359376, "grad_norm": 0.5119534278221654, "learning_rate": 2.0530413625304138e-05, "loss": 0.5582, "step": 21594 }, { "epoch": 0.630492540363785, "grad_norm": 0.46905398398532655, "learning_rate": 2.0528791565287916e-05, "loss": 0.5352, "step": 21595 }, { "epoch": 0.6305217365916324, "grad_norm": 0.4978815914882471, "learning_rate": 2.0527169505271695e-05, "loss": 0.5628, "step": 21596 }, { "epoch": 0.6305509328194797, "grad_norm": 0.5093289450484833, "learning_rate": 2.0525547445255473e-05, "loss": 0.602, "step": 21597 }, { "epoch": 0.6305801290473271, "grad_norm": 0.5326861920813417, "learning_rate": 2.0523925385239255e-05, "loss": 0.6011, "step": 21598 }, { "epoch": 0.6306093252751744, "grad_norm": 0.5466216612149134, "learning_rate": 2.0522303325223033e-05, "loss": 0.6211, "step": 21599 }, { "epoch": 0.6306385215030218, "grad_norm": 0.5256662280140539, "learning_rate": 2.0520681265206815e-05, "loss": 0.6179, "step": 21600 }, { "epoch": 0.6306677177308692, "grad_norm": 0.5476976286611264, "learning_rate": 2.0519059205190593e-05, "loss": 0.6427, "step": 21601 }, { "epoch": 0.6306969139587165, "grad_norm": 0.5488548799965364, "learning_rate": 2.0517437145174372e-05, "loss": 0.6057, "step": 21602 }, { "epoch": 0.6307261101865639, "grad_norm": 0.5150373230177713, "learning_rate": 2.0515815085158153e-05, "loss": 0.5769, "step": 21603 }, { "epoch": 0.6307553064144112, "grad_norm": 0.5280647639388119, "learning_rate": 2.0514193025141932e-05, "loss": 0.6324, "step": 21604 }, { "epoch": 0.6307845026422586, "grad_norm": 0.49955798697500964, "learning_rate": 2.051257096512571e-05, "loss": 0.5764, "step": 21605 }, { "epoch": 0.630813698870106, "grad_norm": 0.5425973399836428, "learning_rate": 2.051094890510949e-05, "loss": 0.6686, "step": 21606 }, { "epoch": 0.6308428950979533, "grad_norm": 0.5610974375721914, "learning_rate": 2.0509326845093267e-05, "loss": 0.691, "step": 21607 }, { "epoch": 0.6308720913258007, "grad_norm": 0.49363505242605593, "learning_rate": 2.050770478507705e-05, "loss": 0.5428, "step": 21608 }, { "epoch": 0.630901287553648, "grad_norm": 0.5306645031287823, "learning_rate": 2.050608272506083e-05, "loss": 0.6333, "step": 21609 }, { "epoch": 0.6309304837814954, "grad_norm": 0.5208250057359578, "learning_rate": 2.050446066504461e-05, "loss": 0.5778, "step": 21610 }, { "epoch": 0.6309596800093428, "grad_norm": 0.49185788828090576, "learning_rate": 2.0502838605028387e-05, "loss": 0.5458, "step": 21611 }, { "epoch": 0.6309888762371901, "grad_norm": 0.557810989960775, "learning_rate": 2.0501216545012166e-05, "loss": 0.644, "step": 21612 }, { "epoch": 0.6310180724650375, "grad_norm": 0.5516874399842085, "learning_rate": 2.0499594484995947e-05, "loss": 0.6227, "step": 21613 }, { "epoch": 0.6310472686928849, "grad_norm": 0.526667332500607, "learning_rate": 2.0497972424979726e-05, "loss": 0.6207, "step": 21614 }, { "epoch": 0.6310764649207322, "grad_norm": 0.5384009112275959, "learning_rate": 2.0496350364963504e-05, "loss": 0.6093, "step": 21615 }, { "epoch": 0.6311056611485796, "grad_norm": 0.5702166800217439, "learning_rate": 2.0494728304947282e-05, "loss": 0.6086, "step": 21616 }, { "epoch": 0.6311348573764269, "grad_norm": 0.5249237289984834, "learning_rate": 2.049310624493106e-05, "loss": 0.5822, "step": 21617 }, { "epoch": 0.6311640536042743, "grad_norm": 0.547075566551089, "learning_rate": 2.0491484184914843e-05, "loss": 0.6594, "step": 21618 }, { "epoch": 0.6311932498321217, "grad_norm": 0.5159741192910235, "learning_rate": 2.0489862124898624e-05, "loss": 0.586, "step": 21619 }, { "epoch": 0.631222446059969, "grad_norm": 0.5207053725488232, "learning_rate": 2.0488240064882403e-05, "loss": 0.6195, "step": 21620 }, { "epoch": 0.6312516422878164, "grad_norm": 0.559483999978443, "learning_rate": 2.048661800486618e-05, "loss": 0.6485, "step": 21621 }, { "epoch": 0.6312808385156637, "grad_norm": 0.5331059973688761, "learning_rate": 2.048499594484996e-05, "loss": 0.6256, "step": 21622 }, { "epoch": 0.6313100347435111, "grad_norm": 0.5286545359179756, "learning_rate": 2.048337388483374e-05, "loss": 0.6041, "step": 21623 }, { "epoch": 0.6313392309713585, "grad_norm": 0.5353189363341446, "learning_rate": 2.048175182481752e-05, "loss": 0.6326, "step": 21624 }, { "epoch": 0.6313684271992058, "grad_norm": 0.4715693834326149, "learning_rate": 2.0480129764801298e-05, "loss": 0.5021, "step": 21625 }, { "epoch": 0.6313976234270532, "grad_norm": 0.5172315196980027, "learning_rate": 2.0478507704785076e-05, "loss": 0.6066, "step": 21626 }, { "epoch": 0.6314268196549006, "grad_norm": 0.5249984115008578, "learning_rate": 2.0476885644768855e-05, "loss": 0.6019, "step": 21627 }, { "epoch": 0.6314560158827479, "grad_norm": 0.5585113727285629, "learning_rate": 2.0475263584752637e-05, "loss": 0.6648, "step": 21628 }, { "epoch": 0.6314852121105953, "grad_norm": 0.5117645251812618, "learning_rate": 2.0473641524736418e-05, "loss": 0.5628, "step": 21629 }, { "epoch": 0.6315144083384426, "grad_norm": 0.5182251376139796, "learning_rate": 2.0472019464720197e-05, "loss": 0.5991, "step": 21630 }, { "epoch": 0.63154360456629, "grad_norm": 0.5714180324847105, "learning_rate": 2.0470397404703975e-05, "loss": 0.7029, "step": 21631 }, { "epoch": 0.6315728007941374, "grad_norm": 0.5658956429681634, "learning_rate": 2.0468775344687753e-05, "loss": 0.6369, "step": 21632 }, { "epoch": 0.6316019970219847, "grad_norm": 0.5200429180032707, "learning_rate": 2.0467153284671535e-05, "loss": 0.5653, "step": 21633 }, { "epoch": 0.6316311932498321, "grad_norm": 0.4929592722395278, "learning_rate": 2.0465531224655314e-05, "loss": 0.5483, "step": 21634 }, { "epoch": 0.6316603894776794, "grad_norm": 0.5433121047680572, "learning_rate": 2.0463909164639092e-05, "loss": 0.678, "step": 21635 }, { "epoch": 0.6316895857055268, "grad_norm": 0.4794449327352781, "learning_rate": 2.046228710462287e-05, "loss": 0.5498, "step": 21636 }, { "epoch": 0.6317187819333742, "grad_norm": 0.5370488162054065, "learning_rate": 2.0460665044606652e-05, "loss": 0.6389, "step": 21637 }, { "epoch": 0.6317479781612215, "grad_norm": 0.5301082410887134, "learning_rate": 2.045904298459043e-05, "loss": 0.6295, "step": 21638 }, { "epoch": 0.6317771743890689, "grad_norm": 0.555628755008497, "learning_rate": 2.0457420924574212e-05, "loss": 0.6862, "step": 21639 }, { "epoch": 0.6318063706169162, "grad_norm": 0.539060327958907, "learning_rate": 2.045579886455799e-05, "loss": 0.5737, "step": 21640 }, { "epoch": 0.6318355668447636, "grad_norm": 0.5506031604167289, "learning_rate": 2.045417680454177e-05, "loss": 0.6589, "step": 21641 }, { "epoch": 0.631864763072611, "grad_norm": 0.4897127412033197, "learning_rate": 2.0452554744525547e-05, "loss": 0.5421, "step": 21642 }, { "epoch": 0.6318939593004583, "grad_norm": 0.5144247620837293, "learning_rate": 2.045093268450933e-05, "loss": 0.6053, "step": 21643 }, { "epoch": 0.6319231555283057, "grad_norm": 0.5234353103437996, "learning_rate": 2.0449310624493107e-05, "loss": 0.6165, "step": 21644 }, { "epoch": 0.631952351756153, "grad_norm": 0.52703321698335, "learning_rate": 2.0447688564476886e-05, "loss": 0.6413, "step": 21645 }, { "epoch": 0.6319815479840004, "grad_norm": 0.5224568674227296, "learning_rate": 2.0446066504460664e-05, "loss": 0.6267, "step": 21646 }, { "epoch": 0.6320107442118478, "grad_norm": 0.5280264165375633, "learning_rate": 2.0444444444444446e-05, "loss": 0.6391, "step": 21647 }, { "epoch": 0.6320399404396951, "grad_norm": 0.5892276424378928, "learning_rate": 2.0442822384428224e-05, "loss": 0.6997, "step": 21648 }, { "epoch": 0.6320691366675426, "grad_norm": 0.49194728846644714, "learning_rate": 2.0441200324412006e-05, "loss": 0.5188, "step": 21649 }, { "epoch": 0.63209833289539, "grad_norm": 0.49155339144637045, "learning_rate": 2.0439578264395784e-05, "loss": 0.5649, "step": 21650 }, { "epoch": 0.6321275291232373, "grad_norm": 0.47717094081231975, "learning_rate": 2.0437956204379563e-05, "loss": 0.5124, "step": 21651 }, { "epoch": 0.6321567253510847, "grad_norm": 0.5239733534115433, "learning_rate": 2.043633414436334e-05, "loss": 0.593, "step": 21652 }, { "epoch": 0.632185921578932, "grad_norm": 0.5107254878866606, "learning_rate": 2.043471208434712e-05, "loss": 0.6126, "step": 21653 }, { "epoch": 0.6322151178067794, "grad_norm": 0.5485956907058764, "learning_rate": 2.04330900243309e-05, "loss": 0.6802, "step": 21654 }, { "epoch": 0.6322443140346268, "grad_norm": 0.5228307078739203, "learning_rate": 2.043146796431468e-05, "loss": 0.5505, "step": 21655 }, { "epoch": 0.6322735102624741, "grad_norm": 0.4872523245180536, "learning_rate": 2.042984590429846e-05, "loss": 0.5498, "step": 21656 }, { "epoch": 0.6323027064903215, "grad_norm": 0.520974624078065, "learning_rate": 2.042822384428224e-05, "loss": 0.6021, "step": 21657 }, { "epoch": 0.6323319027181689, "grad_norm": 0.5057373949873535, "learning_rate": 2.0426601784266018e-05, "loss": 0.58, "step": 21658 }, { "epoch": 0.6323610989460162, "grad_norm": 0.5070760235519197, "learning_rate": 2.04249797242498e-05, "loss": 0.5969, "step": 21659 }, { "epoch": 0.6323902951738636, "grad_norm": 0.536936256714119, "learning_rate": 2.042335766423358e-05, "loss": 0.6082, "step": 21660 }, { "epoch": 0.6324194914017109, "grad_norm": 0.5125898231972166, "learning_rate": 2.0421735604217357e-05, "loss": 0.5683, "step": 21661 }, { "epoch": 0.6324486876295583, "grad_norm": 0.5459533348778788, "learning_rate": 2.0420113544201135e-05, "loss": 0.6643, "step": 21662 }, { "epoch": 0.6324778838574057, "grad_norm": 0.5158493486543281, "learning_rate": 2.0418491484184914e-05, "loss": 0.61, "step": 21663 }, { "epoch": 0.632507080085253, "grad_norm": 0.8491821280180367, "learning_rate": 2.0416869424168695e-05, "loss": 0.6643, "step": 21664 }, { "epoch": 0.6325362763131004, "grad_norm": 0.5509686768771187, "learning_rate": 2.0415247364152474e-05, "loss": 0.6351, "step": 21665 }, { "epoch": 0.6325654725409477, "grad_norm": 0.5177694879721995, "learning_rate": 2.0413625304136255e-05, "loss": 0.6226, "step": 21666 }, { "epoch": 0.6325946687687951, "grad_norm": 0.49287062510723995, "learning_rate": 2.0412003244120034e-05, "loss": 0.5588, "step": 21667 }, { "epoch": 0.6326238649966425, "grad_norm": 0.4816372860361748, "learning_rate": 2.0410381184103812e-05, "loss": 0.5268, "step": 21668 }, { "epoch": 0.6326530612244898, "grad_norm": 0.509003753545, "learning_rate": 2.0408759124087594e-05, "loss": 0.5609, "step": 21669 }, { "epoch": 0.6326822574523372, "grad_norm": 0.575716518291344, "learning_rate": 2.0407137064071372e-05, "loss": 0.7335, "step": 21670 }, { "epoch": 0.6327114536801846, "grad_norm": 0.5336408900645622, "learning_rate": 2.040551500405515e-05, "loss": 0.5577, "step": 21671 }, { "epoch": 0.6327406499080319, "grad_norm": 0.4956570829070661, "learning_rate": 2.040389294403893e-05, "loss": 0.5724, "step": 21672 }, { "epoch": 0.6327698461358793, "grad_norm": 0.5169405069028533, "learning_rate": 2.0402270884022707e-05, "loss": 0.6123, "step": 21673 }, { "epoch": 0.6327990423637266, "grad_norm": 0.5583712972729152, "learning_rate": 2.040064882400649e-05, "loss": 0.4758, "step": 21674 }, { "epoch": 0.632828238591574, "grad_norm": 0.5155146450961585, "learning_rate": 2.039902676399027e-05, "loss": 0.6153, "step": 21675 }, { "epoch": 0.6328574348194214, "grad_norm": 0.49669594663129396, "learning_rate": 2.039740470397405e-05, "loss": 0.5249, "step": 21676 }, { "epoch": 0.6328866310472687, "grad_norm": 0.5211636524940795, "learning_rate": 2.0395782643957828e-05, "loss": 0.6012, "step": 21677 }, { "epoch": 0.6329158272751161, "grad_norm": 0.5043184691737218, "learning_rate": 2.0394160583941606e-05, "loss": 0.6039, "step": 21678 }, { "epoch": 0.6329450235029634, "grad_norm": 0.47659807618667693, "learning_rate": 2.0392538523925388e-05, "loss": 0.5185, "step": 21679 }, { "epoch": 0.6329742197308108, "grad_norm": 0.5472349176841308, "learning_rate": 2.0390916463909166e-05, "loss": 0.6693, "step": 21680 }, { "epoch": 0.6330034159586582, "grad_norm": 0.5493798683869777, "learning_rate": 2.0389294403892945e-05, "loss": 0.6584, "step": 21681 }, { "epoch": 0.6330326121865055, "grad_norm": 0.4819290870644546, "learning_rate": 2.0387672343876723e-05, "loss": 0.5804, "step": 21682 }, { "epoch": 0.6330618084143529, "grad_norm": 0.5592048759494227, "learning_rate": 2.03860502838605e-05, "loss": 0.6454, "step": 21683 }, { "epoch": 0.6330910046422003, "grad_norm": 0.49018554062619635, "learning_rate": 2.0384428223844283e-05, "loss": 0.5492, "step": 21684 }, { "epoch": 0.6331202008700476, "grad_norm": 0.5229845275005585, "learning_rate": 2.0382806163828065e-05, "loss": 0.6161, "step": 21685 }, { "epoch": 0.633149397097895, "grad_norm": 0.4755511675427002, "learning_rate": 2.0381184103811843e-05, "loss": 0.5174, "step": 21686 }, { "epoch": 0.6331785933257423, "grad_norm": 0.5139692810215782, "learning_rate": 2.037956204379562e-05, "loss": 0.5656, "step": 21687 }, { "epoch": 0.6332077895535897, "grad_norm": 0.5139280206007649, "learning_rate": 2.03779399837794e-05, "loss": 0.5443, "step": 21688 }, { "epoch": 0.6332369857814371, "grad_norm": 0.5056843632790937, "learning_rate": 2.0376317923763182e-05, "loss": 0.5404, "step": 21689 }, { "epoch": 0.6332661820092844, "grad_norm": 0.5196026246772225, "learning_rate": 2.037469586374696e-05, "loss": 0.603, "step": 21690 }, { "epoch": 0.6332953782371318, "grad_norm": 0.5159180512205949, "learning_rate": 2.037307380373074e-05, "loss": 0.5728, "step": 21691 }, { "epoch": 0.6333245744649791, "grad_norm": 0.5564352712664944, "learning_rate": 2.0371451743714517e-05, "loss": 0.6703, "step": 21692 }, { "epoch": 0.6333537706928265, "grad_norm": 0.5177769571266455, "learning_rate": 2.0369829683698295e-05, "loss": 0.5897, "step": 21693 }, { "epoch": 0.6333829669206739, "grad_norm": 0.5165868405865889, "learning_rate": 2.0368207623682077e-05, "loss": 0.5635, "step": 21694 }, { "epoch": 0.6334121631485212, "grad_norm": 0.5301846656012462, "learning_rate": 2.036658556366586e-05, "loss": 0.628, "step": 21695 }, { "epoch": 0.6334413593763686, "grad_norm": 0.4872478978564922, "learning_rate": 2.0364963503649637e-05, "loss": 0.5188, "step": 21696 }, { "epoch": 0.633470555604216, "grad_norm": 0.48736027125401815, "learning_rate": 2.0363341443633415e-05, "loss": 0.5199, "step": 21697 }, { "epoch": 0.6334997518320633, "grad_norm": 0.5695194278882686, "learning_rate": 2.0361719383617194e-05, "loss": 0.6603, "step": 21698 }, { "epoch": 0.6335289480599107, "grad_norm": 0.5318312520874355, "learning_rate": 2.0360097323600976e-05, "loss": 0.5909, "step": 21699 }, { "epoch": 0.633558144287758, "grad_norm": 0.5571647302439463, "learning_rate": 2.0358475263584754e-05, "loss": 0.6877, "step": 21700 }, { "epoch": 0.6335873405156054, "grad_norm": 0.5004042727292083, "learning_rate": 2.0356853203568532e-05, "loss": 0.5972, "step": 21701 }, { "epoch": 0.6336165367434528, "grad_norm": 0.5614175888369364, "learning_rate": 2.035523114355231e-05, "loss": 0.6877, "step": 21702 }, { "epoch": 0.6336457329713001, "grad_norm": 0.48036857051826126, "learning_rate": 2.0353609083536093e-05, "loss": 0.554, "step": 21703 }, { "epoch": 0.6336749291991475, "grad_norm": 0.5433959996204548, "learning_rate": 2.035198702351987e-05, "loss": 0.686, "step": 21704 }, { "epoch": 0.6337041254269948, "grad_norm": 0.47695116738058896, "learning_rate": 2.0350364963503653e-05, "loss": 0.5228, "step": 21705 }, { "epoch": 0.6337333216548422, "grad_norm": 0.5252491889357063, "learning_rate": 2.034874290348743e-05, "loss": 0.6344, "step": 21706 }, { "epoch": 0.6337625178826896, "grad_norm": 0.5034105447517753, "learning_rate": 2.034712084347121e-05, "loss": 0.5711, "step": 21707 }, { "epoch": 0.6337917141105369, "grad_norm": 0.519433417300731, "learning_rate": 2.0345498783454988e-05, "loss": 0.5713, "step": 21708 }, { "epoch": 0.6338209103383843, "grad_norm": 0.4829517221025944, "learning_rate": 2.0343876723438766e-05, "loss": 0.578, "step": 21709 }, { "epoch": 0.6338501065662316, "grad_norm": 0.5204424162147184, "learning_rate": 2.0342254663422548e-05, "loss": 0.6123, "step": 21710 }, { "epoch": 0.633879302794079, "grad_norm": 0.5072494662404778, "learning_rate": 2.0340632603406326e-05, "loss": 0.5905, "step": 21711 }, { "epoch": 0.6339084990219264, "grad_norm": 0.4938326193559441, "learning_rate": 2.0339010543390105e-05, "loss": 0.5683, "step": 21712 }, { "epoch": 0.6339376952497737, "grad_norm": 0.48386834458126127, "learning_rate": 2.0337388483373886e-05, "loss": 0.5514, "step": 21713 }, { "epoch": 0.6339668914776211, "grad_norm": 0.5119838638293863, "learning_rate": 2.0335766423357665e-05, "loss": 0.5885, "step": 21714 }, { "epoch": 0.6339960877054684, "grad_norm": 0.47133900032825127, "learning_rate": 2.0334144363341447e-05, "loss": 0.5183, "step": 21715 }, { "epoch": 0.6340252839333158, "grad_norm": 0.5327125208891212, "learning_rate": 2.0332522303325225e-05, "loss": 0.6237, "step": 21716 }, { "epoch": 0.6340544801611632, "grad_norm": 0.47491788314693767, "learning_rate": 2.0330900243309003e-05, "loss": 0.5165, "step": 21717 }, { "epoch": 0.6340836763890105, "grad_norm": 0.49486108981139326, "learning_rate": 2.032927818329278e-05, "loss": 0.5398, "step": 21718 }, { "epoch": 0.6341128726168579, "grad_norm": 0.5209958837190359, "learning_rate": 2.032765612327656e-05, "loss": 0.575, "step": 21719 }, { "epoch": 0.6341420688447053, "grad_norm": 0.49827199588909027, "learning_rate": 2.0326034063260342e-05, "loss": 0.5969, "step": 21720 }, { "epoch": 0.6341712650725526, "grad_norm": 0.5116627175457845, "learning_rate": 2.032441200324412e-05, "loss": 0.5486, "step": 21721 }, { "epoch": 0.6342004613004, "grad_norm": 0.547040728583819, "learning_rate": 2.0322789943227902e-05, "loss": 0.5942, "step": 21722 }, { "epoch": 0.6342296575282473, "grad_norm": 0.5299558063554127, "learning_rate": 2.032116788321168e-05, "loss": 0.6316, "step": 21723 }, { "epoch": 0.6342588537560947, "grad_norm": 0.5507267435915658, "learning_rate": 2.031954582319546e-05, "loss": 0.655, "step": 21724 }, { "epoch": 0.6342880499839421, "grad_norm": 0.5354055258592182, "learning_rate": 2.031792376317924e-05, "loss": 0.6301, "step": 21725 }, { "epoch": 0.6343172462117894, "grad_norm": 0.6185154096474126, "learning_rate": 2.031630170316302e-05, "loss": 0.7524, "step": 21726 }, { "epoch": 0.6343464424396368, "grad_norm": 0.5176882962927923, "learning_rate": 2.0314679643146797e-05, "loss": 0.6058, "step": 21727 }, { "epoch": 0.6343756386674841, "grad_norm": 0.5189118767458012, "learning_rate": 2.0313057583130576e-05, "loss": 0.5811, "step": 21728 }, { "epoch": 0.6344048348953315, "grad_norm": 0.5792003566494321, "learning_rate": 2.0311435523114354e-05, "loss": 0.6768, "step": 21729 }, { "epoch": 0.6344340311231789, "grad_norm": 0.585320512636573, "learning_rate": 2.0309813463098136e-05, "loss": 0.6417, "step": 21730 }, { "epoch": 0.6344632273510262, "grad_norm": 0.4822038885068579, "learning_rate": 2.0308191403081914e-05, "loss": 0.566, "step": 21731 }, { "epoch": 0.6344924235788736, "grad_norm": 0.516891952167963, "learning_rate": 2.0306569343065696e-05, "loss": 0.6057, "step": 21732 }, { "epoch": 0.634521619806721, "grad_norm": 0.5050262213485437, "learning_rate": 2.0304947283049474e-05, "loss": 0.5949, "step": 21733 }, { "epoch": 0.6345508160345683, "grad_norm": 0.5560651907677142, "learning_rate": 2.0303325223033253e-05, "loss": 0.6083, "step": 21734 }, { "epoch": 0.6345800122624157, "grad_norm": 0.5460540298695005, "learning_rate": 2.0301703163017034e-05, "loss": 0.6473, "step": 21735 }, { "epoch": 0.634609208490263, "grad_norm": 0.5484413329656421, "learning_rate": 2.0300081103000813e-05, "loss": 0.5795, "step": 21736 }, { "epoch": 0.6346384047181104, "grad_norm": 0.5155024884750816, "learning_rate": 2.029845904298459e-05, "loss": 0.6139, "step": 21737 }, { "epoch": 0.6346676009459578, "grad_norm": 0.5305469793344432, "learning_rate": 2.029683698296837e-05, "loss": 0.5969, "step": 21738 }, { "epoch": 0.6346967971738051, "grad_norm": 0.46020406549472154, "learning_rate": 2.0295214922952148e-05, "loss": 0.465, "step": 21739 }, { "epoch": 0.6347259934016525, "grad_norm": 0.50454591798387, "learning_rate": 2.029359286293593e-05, "loss": 0.5567, "step": 21740 }, { "epoch": 0.6347551896294998, "grad_norm": 0.5345133978707092, "learning_rate": 2.029197080291971e-05, "loss": 0.6217, "step": 21741 }, { "epoch": 0.6347843858573472, "grad_norm": 0.5107612193595993, "learning_rate": 2.029034874290349e-05, "loss": 0.5791, "step": 21742 }, { "epoch": 0.6348135820851946, "grad_norm": 0.4954548378820451, "learning_rate": 2.0288726682887268e-05, "loss": 0.5791, "step": 21743 }, { "epoch": 0.6348427783130419, "grad_norm": 0.5163780272137698, "learning_rate": 2.0287104622871046e-05, "loss": 0.5587, "step": 21744 }, { "epoch": 0.6348719745408893, "grad_norm": 0.5209507872109141, "learning_rate": 2.0285482562854828e-05, "loss": 0.5541, "step": 21745 }, { "epoch": 0.6349011707687366, "grad_norm": 0.5302059416027787, "learning_rate": 2.0283860502838607e-05, "loss": 0.6422, "step": 21746 }, { "epoch": 0.634930366996584, "grad_norm": 0.547055419717992, "learning_rate": 2.0282238442822385e-05, "loss": 0.6305, "step": 21747 }, { "epoch": 0.6349595632244314, "grad_norm": 0.5499591688104812, "learning_rate": 2.0280616382806163e-05, "loss": 0.69, "step": 21748 }, { "epoch": 0.6349887594522787, "grad_norm": 0.5284679083655437, "learning_rate": 2.0278994322789942e-05, "loss": 0.6169, "step": 21749 }, { "epoch": 0.6350179556801261, "grad_norm": 0.5472996089363197, "learning_rate": 2.0277372262773724e-05, "loss": 0.658, "step": 21750 }, { "epoch": 0.6350471519079735, "grad_norm": 0.49041666089350355, "learning_rate": 2.0275750202757505e-05, "loss": 0.6058, "step": 21751 }, { "epoch": 0.6350763481358208, "grad_norm": 0.5229677525710114, "learning_rate": 2.0274128142741284e-05, "loss": 0.6069, "step": 21752 }, { "epoch": 0.6351055443636682, "grad_norm": 0.4753355027421336, "learning_rate": 2.0272506082725062e-05, "loss": 0.5289, "step": 21753 }, { "epoch": 0.6351347405915155, "grad_norm": 0.5436331712498419, "learning_rate": 2.027088402270884e-05, "loss": 0.605, "step": 21754 }, { "epoch": 0.6351639368193629, "grad_norm": 0.5028159988322051, "learning_rate": 2.0269261962692622e-05, "loss": 0.5708, "step": 21755 }, { "epoch": 0.6351931330472103, "grad_norm": 0.5046313112661132, "learning_rate": 2.02676399026764e-05, "loss": 0.5544, "step": 21756 }, { "epoch": 0.6352223292750576, "grad_norm": 0.5467225898273348, "learning_rate": 2.026601784266018e-05, "loss": 0.5821, "step": 21757 }, { "epoch": 0.635251525502905, "grad_norm": 0.5440389466563086, "learning_rate": 2.0264395782643957e-05, "loss": 0.6554, "step": 21758 }, { "epoch": 0.6352807217307523, "grad_norm": 0.47616716663018605, "learning_rate": 2.0262773722627736e-05, "loss": 0.5115, "step": 21759 }, { "epoch": 0.6353099179585997, "grad_norm": 0.5133500322347904, "learning_rate": 2.0261151662611517e-05, "loss": 0.6108, "step": 21760 }, { "epoch": 0.6353391141864471, "grad_norm": 0.5126870052247635, "learning_rate": 2.02595296025953e-05, "loss": 0.5484, "step": 21761 }, { "epoch": 0.6353683104142944, "grad_norm": 0.5082732347920553, "learning_rate": 2.0257907542579078e-05, "loss": 0.5636, "step": 21762 }, { "epoch": 0.6353975066421418, "grad_norm": 0.584172113710019, "learning_rate": 2.0256285482562856e-05, "loss": 0.6219, "step": 21763 }, { "epoch": 0.6354267028699891, "grad_norm": 0.5198352037235388, "learning_rate": 2.0254663422546634e-05, "loss": 0.5689, "step": 21764 }, { "epoch": 0.6354558990978365, "grad_norm": 0.5099938471081173, "learning_rate": 2.0253041362530416e-05, "loss": 0.5595, "step": 21765 }, { "epoch": 0.6354850953256839, "grad_norm": 0.5372937362703819, "learning_rate": 2.0251419302514194e-05, "loss": 0.6229, "step": 21766 }, { "epoch": 0.6355142915535312, "grad_norm": 0.5391854592006197, "learning_rate": 2.0249797242497973e-05, "loss": 0.6487, "step": 21767 }, { "epoch": 0.6355434877813786, "grad_norm": 0.5402409810655697, "learning_rate": 2.024817518248175e-05, "loss": 0.5862, "step": 21768 }, { "epoch": 0.6355726840092261, "grad_norm": 0.5296475768816706, "learning_rate": 2.024655312246553e-05, "loss": 0.6265, "step": 21769 }, { "epoch": 0.6356018802370734, "grad_norm": 0.49857626972688573, "learning_rate": 2.024493106244931e-05, "loss": 0.5304, "step": 21770 }, { "epoch": 0.6356310764649208, "grad_norm": 0.5207844662273268, "learning_rate": 2.0243309002433093e-05, "loss": 0.5803, "step": 21771 }, { "epoch": 0.6356602726927681, "grad_norm": 0.5076410867581128, "learning_rate": 2.024168694241687e-05, "loss": 0.538, "step": 21772 }, { "epoch": 0.6356894689206155, "grad_norm": 0.5415436007654041, "learning_rate": 2.024006488240065e-05, "loss": 0.6452, "step": 21773 }, { "epoch": 0.6357186651484629, "grad_norm": 0.5114257706944856, "learning_rate": 2.0238442822384428e-05, "loss": 0.5662, "step": 21774 }, { "epoch": 0.6357478613763102, "grad_norm": 0.4905303918047852, "learning_rate": 2.0236820762368207e-05, "loss": 0.5534, "step": 21775 }, { "epoch": 0.6357770576041576, "grad_norm": 0.5254318769436231, "learning_rate": 2.023519870235199e-05, "loss": 0.6019, "step": 21776 }, { "epoch": 0.635806253832005, "grad_norm": 0.520256695000023, "learning_rate": 2.0233576642335767e-05, "loss": 0.6162, "step": 21777 }, { "epoch": 0.6358354500598523, "grad_norm": 0.5247604007055785, "learning_rate": 2.0231954582319545e-05, "loss": 0.6167, "step": 21778 }, { "epoch": 0.6358646462876997, "grad_norm": 0.5191454090936357, "learning_rate": 2.0230332522303327e-05, "loss": 0.5573, "step": 21779 }, { "epoch": 0.635893842515547, "grad_norm": 0.5018775123682225, "learning_rate": 2.0228710462287105e-05, "loss": 0.5773, "step": 21780 }, { "epoch": 0.6359230387433944, "grad_norm": 0.5178336994482108, "learning_rate": 2.0227088402270887e-05, "loss": 0.6023, "step": 21781 }, { "epoch": 0.6359522349712418, "grad_norm": 0.5123132703068275, "learning_rate": 2.0225466342254665e-05, "loss": 0.5994, "step": 21782 }, { "epoch": 0.6359814311990891, "grad_norm": 0.5098294714170443, "learning_rate": 2.0223844282238444e-05, "loss": 0.58, "step": 21783 }, { "epoch": 0.6360106274269365, "grad_norm": 0.5527242509988459, "learning_rate": 2.0222222222222222e-05, "loss": 0.6548, "step": 21784 }, { "epoch": 0.6360398236547838, "grad_norm": 0.5291260511452582, "learning_rate": 2.0220600162206e-05, "loss": 0.5865, "step": 21785 }, { "epoch": 0.6360690198826312, "grad_norm": 0.512391957781406, "learning_rate": 2.0218978102189782e-05, "loss": 0.5464, "step": 21786 }, { "epoch": 0.6360982161104786, "grad_norm": 0.5450375629040658, "learning_rate": 2.021735604217356e-05, "loss": 0.6438, "step": 21787 }, { "epoch": 0.6361274123383259, "grad_norm": 0.5271256948287336, "learning_rate": 2.0215733982157342e-05, "loss": 0.6397, "step": 21788 }, { "epoch": 0.6361566085661733, "grad_norm": 0.4849290906756893, "learning_rate": 2.021411192214112e-05, "loss": 0.5279, "step": 21789 }, { "epoch": 0.6361858047940206, "grad_norm": 0.4869832503830605, "learning_rate": 2.02124898621249e-05, "loss": 0.5599, "step": 21790 }, { "epoch": 0.636215001021868, "grad_norm": 0.48464028925903796, "learning_rate": 2.021086780210868e-05, "loss": 0.5235, "step": 21791 }, { "epoch": 0.6362441972497154, "grad_norm": 0.5238144089759194, "learning_rate": 2.020924574209246e-05, "loss": 0.6601, "step": 21792 }, { "epoch": 0.6362733934775627, "grad_norm": 0.5159334914509967, "learning_rate": 2.0207623682076238e-05, "loss": 0.5995, "step": 21793 }, { "epoch": 0.6363025897054101, "grad_norm": 0.5141282565702324, "learning_rate": 2.0206001622060016e-05, "loss": 0.566, "step": 21794 }, { "epoch": 0.6363317859332575, "grad_norm": 0.5396300885241705, "learning_rate": 2.0204379562043794e-05, "loss": 0.6729, "step": 21795 }, { "epoch": 0.6363609821611048, "grad_norm": 0.4742116493668491, "learning_rate": 2.0202757502027576e-05, "loss": 0.5264, "step": 21796 }, { "epoch": 0.6363901783889522, "grad_norm": 0.5096972504602766, "learning_rate": 2.0201135442011355e-05, "loss": 0.5781, "step": 21797 }, { "epoch": 0.6364193746167995, "grad_norm": 0.5462914894203752, "learning_rate": 2.0199513381995136e-05, "loss": 0.678, "step": 21798 }, { "epoch": 0.6364485708446469, "grad_norm": 0.5424918777851764, "learning_rate": 2.0197891321978915e-05, "loss": 0.652, "step": 21799 }, { "epoch": 0.6364777670724943, "grad_norm": 0.5003714661710377, "learning_rate": 2.0196269261962693e-05, "loss": 0.5668, "step": 21800 }, { "epoch": 0.6365069633003416, "grad_norm": 0.5196880171076244, "learning_rate": 2.0194647201946475e-05, "loss": 0.6243, "step": 21801 }, { "epoch": 0.636536159528189, "grad_norm": 0.5473481624680994, "learning_rate": 2.0193025141930253e-05, "loss": 0.6504, "step": 21802 }, { "epoch": 0.6365653557560363, "grad_norm": 0.5496850585566049, "learning_rate": 2.019140308191403e-05, "loss": 0.6557, "step": 21803 }, { "epoch": 0.6365945519838837, "grad_norm": 0.5566981496870237, "learning_rate": 2.018978102189781e-05, "loss": 0.6816, "step": 21804 }, { "epoch": 0.6366237482117311, "grad_norm": 0.5094646743639387, "learning_rate": 2.0188158961881588e-05, "loss": 0.551, "step": 21805 }, { "epoch": 0.6366529444395784, "grad_norm": 0.5319656608146908, "learning_rate": 2.018653690186537e-05, "loss": 0.6201, "step": 21806 }, { "epoch": 0.6366821406674258, "grad_norm": 0.5714461197422511, "learning_rate": 2.0184914841849152e-05, "loss": 0.6881, "step": 21807 }, { "epoch": 0.6367113368952732, "grad_norm": 0.5378630669718135, "learning_rate": 2.018329278183293e-05, "loss": 0.61, "step": 21808 }, { "epoch": 0.6367405331231205, "grad_norm": 0.5357481094144736, "learning_rate": 2.018167072181671e-05, "loss": 0.628, "step": 21809 }, { "epoch": 0.6367697293509679, "grad_norm": 0.5081175071603768, "learning_rate": 2.0180048661800487e-05, "loss": 0.5772, "step": 21810 }, { "epoch": 0.6367989255788152, "grad_norm": 0.5276939404983767, "learning_rate": 2.017842660178427e-05, "loss": 0.6591, "step": 21811 }, { "epoch": 0.6368281218066626, "grad_norm": 0.4966887669351842, "learning_rate": 2.0176804541768047e-05, "loss": 0.5357, "step": 21812 }, { "epoch": 0.63685731803451, "grad_norm": 0.5029829367282268, "learning_rate": 2.0175182481751825e-05, "loss": 0.5479, "step": 21813 }, { "epoch": 0.6368865142623573, "grad_norm": 0.5137807648324988, "learning_rate": 2.0173560421735604e-05, "loss": 0.6433, "step": 21814 }, { "epoch": 0.6369157104902047, "grad_norm": 0.5024012482925259, "learning_rate": 2.0171938361719382e-05, "loss": 0.6071, "step": 21815 }, { "epoch": 0.636944906718052, "grad_norm": 0.5302112575292298, "learning_rate": 2.0170316301703164e-05, "loss": 0.6368, "step": 21816 }, { "epoch": 0.6369741029458994, "grad_norm": 0.5077364569163373, "learning_rate": 2.0168694241686946e-05, "loss": 0.543, "step": 21817 }, { "epoch": 0.6370032991737468, "grad_norm": 0.4847315307576859, "learning_rate": 2.0167072181670724e-05, "loss": 0.5734, "step": 21818 }, { "epoch": 0.6370324954015941, "grad_norm": 0.45897404846820977, "learning_rate": 2.0165450121654502e-05, "loss": 0.5016, "step": 21819 }, { "epoch": 0.6370616916294415, "grad_norm": 0.5054453445538213, "learning_rate": 2.016382806163828e-05, "loss": 0.5902, "step": 21820 }, { "epoch": 0.6370908878572888, "grad_norm": 0.5441474024734732, "learning_rate": 2.0162206001622063e-05, "loss": 0.656, "step": 21821 }, { "epoch": 0.6371200840851362, "grad_norm": 0.5016217254930607, "learning_rate": 2.016058394160584e-05, "loss": 0.5687, "step": 21822 }, { "epoch": 0.6371492803129836, "grad_norm": 0.5432057050669501, "learning_rate": 2.015896188158962e-05, "loss": 0.6514, "step": 21823 }, { "epoch": 0.6371784765408309, "grad_norm": 0.5088322407785305, "learning_rate": 2.0157339821573398e-05, "loss": 0.5769, "step": 21824 }, { "epoch": 0.6372076727686783, "grad_norm": 0.4799931735943867, "learning_rate": 2.0155717761557176e-05, "loss": 0.506, "step": 21825 }, { "epoch": 0.6372368689965257, "grad_norm": 0.506359531819105, "learning_rate": 2.0154095701540958e-05, "loss": 0.5609, "step": 21826 }, { "epoch": 0.637266065224373, "grad_norm": 0.5049471291482827, "learning_rate": 2.015247364152474e-05, "loss": 0.5533, "step": 21827 }, { "epoch": 0.6372952614522204, "grad_norm": 0.5325858514185228, "learning_rate": 2.0150851581508518e-05, "loss": 0.6051, "step": 21828 }, { "epoch": 0.6373244576800677, "grad_norm": 0.5169944896113354, "learning_rate": 2.0149229521492296e-05, "loss": 0.6216, "step": 21829 }, { "epoch": 0.6373536539079151, "grad_norm": 0.5048769313333098, "learning_rate": 2.0147607461476075e-05, "loss": 0.6144, "step": 21830 }, { "epoch": 0.6373828501357625, "grad_norm": 0.5037606403113184, "learning_rate": 2.0145985401459853e-05, "loss": 0.5652, "step": 21831 }, { "epoch": 0.6374120463636098, "grad_norm": 0.49806219198636487, "learning_rate": 2.0144363341443635e-05, "loss": 0.5544, "step": 21832 }, { "epoch": 0.6374412425914572, "grad_norm": 0.5073043245533743, "learning_rate": 2.0142741281427413e-05, "loss": 0.562, "step": 21833 }, { "epoch": 0.6374704388193045, "grad_norm": 0.4955580490415647, "learning_rate": 2.014111922141119e-05, "loss": 0.5518, "step": 21834 }, { "epoch": 0.6374996350471519, "grad_norm": 0.5082890650691245, "learning_rate": 2.013949716139497e-05, "loss": 0.5347, "step": 21835 }, { "epoch": 0.6375288312749993, "grad_norm": 0.5781347030999545, "learning_rate": 2.0137875101378752e-05, "loss": 0.636, "step": 21836 }, { "epoch": 0.6375580275028466, "grad_norm": 0.5421893903830851, "learning_rate": 2.0136253041362534e-05, "loss": 0.6447, "step": 21837 }, { "epoch": 0.637587223730694, "grad_norm": 0.5221509259228694, "learning_rate": 2.0134630981346312e-05, "loss": 0.6371, "step": 21838 }, { "epoch": 0.6376164199585413, "grad_norm": 0.554970446159482, "learning_rate": 2.013300892133009e-05, "loss": 0.6522, "step": 21839 }, { "epoch": 0.6376456161863887, "grad_norm": 0.5270815897847716, "learning_rate": 2.013138686131387e-05, "loss": 0.6035, "step": 21840 }, { "epoch": 0.6376748124142361, "grad_norm": 0.5245038873472245, "learning_rate": 2.0129764801297647e-05, "loss": 0.608, "step": 21841 }, { "epoch": 0.6377040086420834, "grad_norm": 0.5037033244885943, "learning_rate": 2.012814274128143e-05, "loss": 0.5706, "step": 21842 }, { "epoch": 0.6377332048699308, "grad_norm": 0.526496352639281, "learning_rate": 2.0126520681265207e-05, "loss": 0.6406, "step": 21843 }, { "epoch": 0.6377624010977782, "grad_norm": 0.4892587164109708, "learning_rate": 2.0124898621248986e-05, "loss": 0.5421, "step": 21844 }, { "epoch": 0.6377915973256255, "grad_norm": 0.5481539854149674, "learning_rate": 2.0123276561232767e-05, "loss": 0.5664, "step": 21845 }, { "epoch": 0.6378207935534729, "grad_norm": 0.5243703572412524, "learning_rate": 2.0121654501216546e-05, "loss": 0.6228, "step": 21846 }, { "epoch": 0.6378499897813202, "grad_norm": 0.48451378121925887, "learning_rate": 2.0120032441200327e-05, "loss": 0.5297, "step": 21847 }, { "epoch": 0.6378791860091676, "grad_norm": 0.5336226659754533, "learning_rate": 2.0118410381184106e-05, "loss": 0.6179, "step": 21848 }, { "epoch": 0.637908382237015, "grad_norm": 0.5338889682310248, "learning_rate": 2.0116788321167884e-05, "loss": 0.656, "step": 21849 }, { "epoch": 0.6379375784648623, "grad_norm": 0.5520449638791118, "learning_rate": 2.0115166261151663e-05, "loss": 0.5824, "step": 21850 }, { "epoch": 0.6379667746927097, "grad_norm": 0.5567328324315662, "learning_rate": 2.011354420113544e-05, "loss": 0.642, "step": 21851 }, { "epoch": 0.637995970920557, "grad_norm": 0.4890837659519228, "learning_rate": 2.0111922141119223e-05, "loss": 0.5301, "step": 21852 }, { "epoch": 0.6380251671484044, "grad_norm": 0.5125221475512337, "learning_rate": 2.0110300081103e-05, "loss": 0.5698, "step": 21853 }, { "epoch": 0.6380543633762518, "grad_norm": 0.46987020956589776, "learning_rate": 2.0108678021086783e-05, "loss": 0.5087, "step": 21854 }, { "epoch": 0.6380835596040991, "grad_norm": 0.493727705295298, "learning_rate": 2.010705596107056e-05, "loss": 0.5553, "step": 21855 }, { "epoch": 0.6381127558319465, "grad_norm": 0.5363903732836486, "learning_rate": 2.010543390105434e-05, "loss": 0.6087, "step": 21856 }, { "epoch": 0.6381419520597938, "grad_norm": 0.4960219060555217, "learning_rate": 2.010381184103812e-05, "loss": 0.5576, "step": 21857 }, { "epoch": 0.6381711482876412, "grad_norm": 0.5560517654344317, "learning_rate": 2.01021897810219e-05, "loss": 0.6147, "step": 21858 }, { "epoch": 0.6382003445154886, "grad_norm": 0.5147944420947181, "learning_rate": 2.0100567721005678e-05, "loss": 0.5979, "step": 21859 }, { "epoch": 0.6382295407433359, "grad_norm": 0.4867051743127448, "learning_rate": 2.0098945660989456e-05, "loss": 0.5487, "step": 21860 }, { "epoch": 0.6382587369711833, "grad_norm": 0.5118257784485926, "learning_rate": 2.0097323600973235e-05, "loss": 0.5812, "step": 21861 }, { "epoch": 0.6382879331990307, "grad_norm": 0.5464080987143224, "learning_rate": 2.0095701540957017e-05, "loss": 0.6635, "step": 21862 }, { "epoch": 0.638317129426878, "grad_norm": 0.4902929503976165, "learning_rate": 2.0094079480940795e-05, "loss": 0.5533, "step": 21863 }, { "epoch": 0.6383463256547254, "grad_norm": 0.5272298131056645, "learning_rate": 2.0092457420924577e-05, "loss": 0.5896, "step": 21864 }, { "epoch": 0.6383755218825727, "grad_norm": 0.5124856222411586, "learning_rate": 2.0090835360908355e-05, "loss": 0.5929, "step": 21865 }, { "epoch": 0.6384047181104201, "grad_norm": 0.49072781573282337, "learning_rate": 2.0089213300892133e-05, "loss": 0.5378, "step": 21866 }, { "epoch": 0.6384339143382675, "grad_norm": 0.5288130609146109, "learning_rate": 2.0087591240875915e-05, "loss": 0.6109, "step": 21867 }, { "epoch": 0.6384631105661148, "grad_norm": 0.5238343667364964, "learning_rate": 2.0085969180859694e-05, "loss": 0.6147, "step": 21868 }, { "epoch": 0.6384923067939622, "grad_norm": 0.5293097092307778, "learning_rate": 2.0084347120843472e-05, "loss": 0.6252, "step": 21869 }, { "epoch": 0.6385215030218095, "grad_norm": 0.5284356226349555, "learning_rate": 2.008272506082725e-05, "loss": 0.5868, "step": 21870 }, { "epoch": 0.6385506992496569, "grad_norm": 0.5312548067086245, "learning_rate": 2.008110300081103e-05, "loss": 0.5694, "step": 21871 }, { "epoch": 0.6385798954775043, "grad_norm": 0.4960049214430797, "learning_rate": 2.007948094079481e-05, "loss": 0.5478, "step": 21872 }, { "epoch": 0.6386090917053516, "grad_norm": 0.4944052954855094, "learning_rate": 2.0077858880778592e-05, "loss": 0.5662, "step": 21873 }, { "epoch": 0.638638287933199, "grad_norm": 0.5268265175811272, "learning_rate": 2.007623682076237e-05, "loss": 0.6237, "step": 21874 }, { "epoch": 0.6386674841610464, "grad_norm": 0.5271345906088145, "learning_rate": 2.007461476074615e-05, "loss": 0.6031, "step": 21875 }, { "epoch": 0.6386966803888937, "grad_norm": 0.5550004766259024, "learning_rate": 2.0072992700729927e-05, "loss": 0.6741, "step": 21876 }, { "epoch": 0.6387258766167411, "grad_norm": 0.554049884483124, "learning_rate": 2.007137064071371e-05, "loss": 0.6614, "step": 21877 }, { "epoch": 0.6387550728445884, "grad_norm": 0.5599960015789955, "learning_rate": 2.0069748580697488e-05, "loss": 0.6898, "step": 21878 }, { "epoch": 0.6387842690724358, "grad_norm": 0.5067459827406633, "learning_rate": 2.0068126520681266e-05, "loss": 0.5557, "step": 21879 }, { "epoch": 0.6388134653002832, "grad_norm": 0.5207368875729631, "learning_rate": 2.0066504460665044e-05, "loss": 0.5923, "step": 21880 }, { "epoch": 0.6388426615281305, "grad_norm": 0.5639491344430185, "learning_rate": 2.0064882400648823e-05, "loss": 0.7171, "step": 21881 }, { "epoch": 0.6388718577559779, "grad_norm": 0.4812462633291547, "learning_rate": 2.0063260340632604e-05, "loss": 0.5006, "step": 21882 }, { "epoch": 0.6389010539838252, "grad_norm": 0.5114617028008024, "learning_rate": 2.0061638280616386e-05, "loss": 0.5828, "step": 21883 }, { "epoch": 0.6389302502116726, "grad_norm": 0.6343038119795593, "learning_rate": 2.0060016220600165e-05, "loss": 0.7207, "step": 21884 }, { "epoch": 0.63895944643952, "grad_norm": 0.5291276130191187, "learning_rate": 2.0058394160583943e-05, "loss": 0.6002, "step": 21885 }, { "epoch": 0.6389886426673673, "grad_norm": 0.5391368231572368, "learning_rate": 2.005677210056772e-05, "loss": 0.6156, "step": 21886 }, { "epoch": 0.6390178388952147, "grad_norm": 0.5187273769644808, "learning_rate": 2.0055150040551503e-05, "loss": 0.5837, "step": 21887 }, { "epoch": 0.639047035123062, "grad_norm": 0.519054644312684, "learning_rate": 2.005352798053528e-05, "loss": 0.6434, "step": 21888 }, { "epoch": 0.6390762313509094, "grad_norm": 0.539788647331247, "learning_rate": 2.005190592051906e-05, "loss": 0.6543, "step": 21889 }, { "epoch": 0.6391054275787569, "grad_norm": 0.47222642818900823, "learning_rate": 2.0050283860502838e-05, "loss": 0.5163, "step": 21890 }, { "epoch": 0.6391346238066042, "grad_norm": 0.5246068279557132, "learning_rate": 2.0048661800486617e-05, "loss": 0.5825, "step": 21891 }, { "epoch": 0.6391638200344516, "grad_norm": 0.5212942802171272, "learning_rate": 2.0047039740470398e-05, "loss": 0.6441, "step": 21892 }, { "epoch": 0.639193016262299, "grad_norm": 0.5149399484819106, "learning_rate": 2.004541768045418e-05, "loss": 0.5926, "step": 21893 }, { "epoch": 0.6392222124901463, "grad_norm": 0.49899605336543446, "learning_rate": 2.004379562043796e-05, "loss": 0.5677, "step": 21894 }, { "epoch": 0.6392514087179937, "grad_norm": 0.514182226067921, "learning_rate": 2.0042173560421737e-05, "loss": 0.553, "step": 21895 }, { "epoch": 0.639280604945841, "grad_norm": 0.49317959389235244, "learning_rate": 2.0040551500405515e-05, "loss": 0.5392, "step": 21896 }, { "epoch": 0.6393098011736884, "grad_norm": 0.4998090710729048, "learning_rate": 2.0038929440389294e-05, "loss": 0.5562, "step": 21897 }, { "epoch": 0.6393389974015358, "grad_norm": 0.47593162850758475, "learning_rate": 2.0037307380373075e-05, "loss": 0.5045, "step": 21898 }, { "epoch": 0.6393681936293831, "grad_norm": 0.5143305183201466, "learning_rate": 2.0035685320356854e-05, "loss": 0.5677, "step": 21899 }, { "epoch": 0.6393973898572305, "grad_norm": 0.47790140984849816, "learning_rate": 2.0034063260340632e-05, "loss": 0.5153, "step": 21900 }, { "epoch": 0.6394265860850779, "grad_norm": 0.49220158058732216, "learning_rate": 2.003244120032441e-05, "loss": 0.5573, "step": 21901 }, { "epoch": 0.6394557823129252, "grad_norm": 0.5132434369980632, "learning_rate": 2.0030819140308192e-05, "loss": 0.6283, "step": 21902 }, { "epoch": 0.6394849785407726, "grad_norm": 0.5109787798379594, "learning_rate": 2.0029197080291974e-05, "loss": 0.5728, "step": 21903 }, { "epoch": 0.6395141747686199, "grad_norm": 0.5460679429919264, "learning_rate": 2.0027575020275752e-05, "loss": 0.7081, "step": 21904 }, { "epoch": 0.6395433709964673, "grad_norm": 0.49305996882709907, "learning_rate": 2.002595296025953e-05, "loss": 0.5006, "step": 21905 }, { "epoch": 0.6395725672243147, "grad_norm": 0.4949113992201543, "learning_rate": 2.002433090024331e-05, "loss": 0.5905, "step": 21906 }, { "epoch": 0.639601763452162, "grad_norm": 0.5811833781611533, "learning_rate": 2.0022708840227087e-05, "loss": 0.6675, "step": 21907 }, { "epoch": 0.6396309596800094, "grad_norm": 0.5403214439117104, "learning_rate": 2.002108678021087e-05, "loss": 0.6288, "step": 21908 }, { "epoch": 0.6396601559078567, "grad_norm": 0.5206991189360496, "learning_rate": 2.0019464720194648e-05, "loss": 0.585, "step": 21909 }, { "epoch": 0.6396893521357041, "grad_norm": 0.5577769513093629, "learning_rate": 2.0017842660178426e-05, "loss": 0.6485, "step": 21910 }, { "epoch": 0.6397185483635515, "grad_norm": 0.5283228694008013, "learning_rate": 2.0016220600162208e-05, "loss": 0.5913, "step": 21911 }, { "epoch": 0.6397477445913988, "grad_norm": 0.49133809288842833, "learning_rate": 2.0014598540145986e-05, "loss": 0.5246, "step": 21912 }, { "epoch": 0.6397769408192462, "grad_norm": 0.4844898274400793, "learning_rate": 2.0012976480129768e-05, "loss": 0.5823, "step": 21913 }, { "epoch": 0.6398061370470935, "grad_norm": 0.5192198311152777, "learning_rate": 2.0011354420113546e-05, "loss": 0.5662, "step": 21914 }, { "epoch": 0.6398353332749409, "grad_norm": 0.611419510386845, "learning_rate": 2.0009732360097325e-05, "loss": 0.6694, "step": 21915 }, { "epoch": 0.6398645295027883, "grad_norm": 0.49968238087600075, "learning_rate": 2.0008110300081103e-05, "loss": 0.5575, "step": 21916 }, { "epoch": 0.6398937257306356, "grad_norm": 0.6214581780221383, "learning_rate": 2.000648824006488e-05, "loss": 0.6713, "step": 21917 }, { "epoch": 0.639922921958483, "grad_norm": 0.5601446233531597, "learning_rate": 2.0004866180048663e-05, "loss": 0.7019, "step": 21918 }, { "epoch": 0.6399521181863304, "grad_norm": 0.5085209656206079, "learning_rate": 2.000324412003244e-05, "loss": 0.5872, "step": 21919 }, { "epoch": 0.6399813144141777, "grad_norm": 0.515739252457066, "learning_rate": 2.000162206001622e-05, "loss": 0.5883, "step": 21920 }, { "epoch": 0.6400105106420251, "grad_norm": 0.5115249837811261, "learning_rate": 2e-05, "loss": 0.5212, "step": 21921 }, { "epoch": 0.6400397068698724, "grad_norm": 0.5283147685513186, "learning_rate": 1.999837793998378e-05, "loss": 0.5911, "step": 21922 }, { "epoch": 0.6400689030977198, "grad_norm": 0.5433876714959075, "learning_rate": 1.9996755879967562e-05, "loss": 0.604, "step": 21923 }, { "epoch": 0.6400980993255672, "grad_norm": 0.475049565408889, "learning_rate": 1.999513381995134e-05, "loss": 0.5085, "step": 21924 }, { "epoch": 0.6401272955534145, "grad_norm": 0.5790827279301658, "learning_rate": 1.999351175993512e-05, "loss": 0.6546, "step": 21925 }, { "epoch": 0.6401564917812619, "grad_norm": 0.5382741324575798, "learning_rate": 1.9991889699918897e-05, "loss": 0.6506, "step": 21926 }, { "epoch": 0.6401856880091092, "grad_norm": 0.545677849097955, "learning_rate": 1.9990267639902675e-05, "loss": 0.6514, "step": 21927 }, { "epoch": 0.6402148842369566, "grad_norm": 0.5483143779286999, "learning_rate": 1.9988645579886457e-05, "loss": 0.6818, "step": 21928 }, { "epoch": 0.640244080464804, "grad_norm": 0.5295844523381259, "learning_rate": 1.9987023519870235e-05, "loss": 0.5914, "step": 21929 }, { "epoch": 0.6402732766926513, "grad_norm": 0.5176998461784944, "learning_rate": 1.9985401459854017e-05, "loss": 0.619, "step": 21930 }, { "epoch": 0.6403024729204987, "grad_norm": 0.4904735966562154, "learning_rate": 1.9983779399837796e-05, "loss": 0.5487, "step": 21931 }, { "epoch": 0.640331669148346, "grad_norm": 0.5415859892301963, "learning_rate": 1.9982157339821574e-05, "loss": 0.6375, "step": 21932 }, { "epoch": 0.6403608653761934, "grad_norm": 0.4776389077896517, "learning_rate": 1.9980535279805356e-05, "loss": 0.5214, "step": 21933 }, { "epoch": 0.6403900616040408, "grad_norm": 0.5556906725294436, "learning_rate": 1.9978913219789134e-05, "loss": 0.657, "step": 21934 }, { "epoch": 0.6404192578318881, "grad_norm": 0.49418428720346724, "learning_rate": 1.9977291159772912e-05, "loss": 0.5736, "step": 21935 }, { "epoch": 0.6404484540597355, "grad_norm": 0.515750591017577, "learning_rate": 1.997566909975669e-05, "loss": 0.5972, "step": 21936 }, { "epoch": 0.6404776502875829, "grad_norm": 0.5447231520729493, "learning_rate": 1.997404703974047e-05, "loss": 0.7011, "step": 21937 }, { "epoch": 0.6405068465154302, "grad_norm": 0.503546103101846, "learning_rate": 1.997242497972425e-05, "loss": 0.5715, "step": 21938 }, { "epoch": 0.6405360427432776, "grad_norm": 0.5423425977722506, "learning_rate": 1.9970802919708033e-05, "loss": 0.6197, "step": 21939 }, { "epoch": 0.6405652389711249, "grad_norm": 0.5150794052034056, "learning_rate": 1.996918085969181e-05, "loss": 0.6056, "step": 21940 }, { "epoch": 0.6405944351989723, "grad_norm": 0.5775614506866791, "learning_rate": 1.996755879967559e-05, "loss": 0.7044, "step": 21941 }, { "epoch": 0.6406236314268197, "grad_norm": 0.538477599348283, "learning_rate": 1.9965936739659368e-05, "loss": 0.6513, "step": 21942 }, { "epoch": 0.640652827654667, "grad_norm": 0.5003470460590115, "learning_rate": 1.996431467964315e-05, "loss": 0.5343, "step": 21943 }, { "epoch": 0.6406820238825144, "grad_norm": 0.5261057091774254, "learning_rate": 1.9962692619626928e-05, "loss": 0.568, "step": 21944 }, { "epoch": 0.6407112201103617, "grad_norm": 0.5053039615277499, "learning_rate": 1.9961070559610706e-05, "loss": 0.5551, "step": 21945 }, { "epoch": 0.6407404163382091, "grad_norm": 0.4950649311781468, "learning_rate": 1.9959448499594485e-05, "loss": 0.5594, "step": 21946 }, { "epoch": 0.6407696125660565, "grad_norm": 0.5189065460069993, "learning_rate": 1.9957826439578263e-05, "loss": 0.6318, "step": 21947 }, { "epoch": 0.6407988087939038, "grad_norm": 0.4957145250108978, "learning_rate": 1.9956204379562045e-05, "loss": 0.5554, "step": 21948 }, { "epoch": 0.6408280050217512, "grad_norm": 0.5709579350473526, "learning_rate": 1.9954582319545827e-05, "loss": 0.5979, "step": 21949 }, { "epoch": 0.6408572012495986, "grad_norm": 0.54279608383637, "learning_rate": 1.9952960259529605e-05, "loss": 0.6165, "step": 21950 }, { "epoch": 0.6408863974774459, "grad_norm": 0.5463410505608225, "learning_rate": 1.9951338199513383e-05, "loss": 0.634, "step": 21951 }, { "epoch": 0.6409155937052933, "grad_norm": 0.47797900135454047, "learning_rate": 1.9949716139497162e-05, "loss": 0.5396, "step": 21952 }, { "epoch": 0.6409447899331406, "grad_norm": 0.533433994953122, "learning_rate": 1.994809407948094e-05, "loss": 0.6512, "step": 21953 }, { "epoch": 0.640973986160988, "grad_norm": 0.5372481127603478, "learning_rate": 1.9946472019464722e-05, "loss": 0.6462, "step": 21954 }, { "epoch": 0.6410031823888354, "grad_norm": 0.5243178267980666, "learning_rate": 1.99448499594485e-05, "loss": 0.6202, "step": 21955 }, { "epoch": 0.6410323786166827, "grad_norm": 0.5596454623970741, "learning_rate": 1.994322789943228e-05, "loss": 0.7209, "step": 21956 }, { "epoch": 0.6410615748445301, "grad_norm": 0.5540366914300286, "learning_rate": 1.9941605839416057e-05, "loss": 0.6739, "step": 21957 }, { "epoch": 0.6410907710723774, "grad_norm": 0.5467346211371188, "learning_rate": 1.993998377939984e-05, "loss": 0.6505, "step": 21958 }, { "epoch": 0.6411199673002248, "grad_norm": 0.4956948950594085, "learning_rate": 1.993836171938362e-05, "loss": 0.5225, "step": 21959 }, { "epoch": 0.6411491635280722, "grad_norm": 0.5262107844043702, "learning_rate": 1.99367396593674e-05, "loss": 0.6261, "step": 21960 }, { "epoch": 0.6411783597559195, "grad_norm": 0.47218146625496926, "learning_rate": 1.9935117599351177e-05, "loss": 0.5134, "step": 21961 }, { "epoch": 0.6412075559837669, "grad_norm": 0.5251745112746202, "learning_rate": 1.9933495539334956e-05, "loss": 0.6488, "step": 21962 }, { "epoch": 0.6412367522116142, "grad_norm": 0.4953008863411949, "learning_rate": 1.9931873479318734e-05, "loss": 0.569, "step": 21963 }, { "epoch": 0.6412659484394616, "grad_norm": 0.5059206899867935, "learning_rate": 1.9930251419302516e-05, "loss": 0.5803, "step": 21964 }, { "epoch": 0.641295144667309, "grad_norm": 0.5262242983130102, "learning_rate": 1.9928629359286294e-05, "loss": 0.5978, "step": 21965 }, { "epoch": 0.6413243408951563, "grad_norm": 0.5425402227693438, "learning_rate": 1.9927007299270073e-05, "loss": 0.6699, "step": 21966 }, { "epoch": 0.6413535371230037, "grad_norm": 0.5248262021708142, "learning_rate": 1.992538523925385e-05, "loss": 0.6053, "step": 21967 }, { "epoch": 0.641382733350851, "grad_norm": 0.5481408924401093, "learning_rate": 1.9923763179237633e-05, "loss": 0.6117, "step": 21968 }, { "epoch": 0.6414119295786984, "grad_norm": 0.5003686820386353, "learning_rate": 1.9922141119221414e-05, "loss": 0.5745, "step": 21969 }, { "epoch": 0.6414411258065458, "grad_norm": 0.5152026146727257, "learning_rate": 1.9920519059205193e-05, "loss": 0.5557, "step": 21970 }, { "epoch": 0.6414703220343931, "grad_norm": 0.5679993517741256, "learning_rate": 1.991889699918897e-05, "loss": 0.6766, "step": 21971 }, { "epoch": 0.6414995182622405, "grad_norm": 0.509065713994129, "learning_rate": 1.991727493917275e-05, "loss": 0.6016, "step": 21972 }, { "epoch": 0.6415287144900879, "grad_norm": 0.49950411720858195, "learning_rate": 1.9915652879156528e-05, "loss": 0.5526, "step": 21973 }, { "epoch": 0.6415579107179352, "grad_norm": 0.5018798000186581, "learning_rate": 1.991403081914031e-05, "loss": 0.575, "step": 21974 }, { "epoch": 0.6415871069457826, "grad_norm": 0.4714856460393907, "learning_rate": 1.9912408759124088e-05, "loss": 0.5216, "step": 21975 }, { "epoch": 0.6416163031736299, "grad_norm": 0.4983183596723654, "learning_rate": 1.9910786699107866e-05, "loss": 0.5671, "step": 21976 }, { "epoch": 0.6416454994014773, "grad_norm": 0.542441465720787, "learning_rate": 1.9909164639091648e-05, "loss": 0.6336, "step": 21977 }, { "epoch": 0.6416746956293247, "grad_norm": 0.46844251742503273, "learning_rate": 1.9907542579075427e-05, "loss": 0.5114, "step": 21978 }, { "epoch": 0.641703891857172, "grad_norm": 0.522222259222927, "learning_rate": 1.990592051905921e-05, "loss": 0.6211, "step": 21979 }, { "epoch": 0.6417330880850194, "grad_norm": 0.5338281197767545, "learning_rate": 1.9904298459042987e-05, "loss": 0.6248, "step": 21980 }, { "epoch": 0.6417622843128667, "grad_norm": 0.520766198150497, "learning_rate": 1.9902676399026765e-05, "loss": 0.5933, "step": 21981 }, { "epoch": 0.6417914805407141, "grad_norm": 0.5338361944903124, "learning_rate": 1.9901054339010543e-05, "loss": 0.6267, "step": 21982 }, { "epoch": 0.6418206767685615, "grad_norm": 0.5229732906214719, "learning_rate": 1.9899432278994322e-05, "loss": 0.5845, "step": 21983 }, { "epoch": 0.6418498729964088, "grad_norm": 0.5565755163750518, "learning_rate": 1.9897810218978104e-05, "loss": 0.6391, "step": 21984 }, { "epoch": 0.6418790692242562, "grad_norm": 0.5304665196050743, "learning_rate": 1.9896188158961882e-05, "loss": 0.6208, "step": 21985 }, { "epoch": 0.6419082654521036, "grad_norm": 0.5261033154177353, "learning_rate": 1.989456609894566e-05, "loss": 0.6388, "step": 21986 }, { "epoch": 0.6419374616799509, "grad_norm": 0.5215887352254756, "learning_rate": 1.9892944038929442e-05, "loss": 0.6076, "step": 21987 }, { "epoch": 0.6419666579077983, "grad_norm": 0.5247916667774293, "learning_rate": 1.989132197891322e-05, "loss": 0.5901, "step": 21988 }, { "epoch": 0.6419958541356456, "grad_norm": 0.5201314217110637, "learning_rate": 1.9889699918897002e-05, "loss": 0.616, "step": 21989 }, { "epoch": 0.642025050363493, "grad_norm": 0.5230508190692362, "learning_rate": 1.988807785888078e-05, "loss": 0.5674, "step": 21990 }, { "epoch": 0.6420542465913404, "grad_norm": 0.5415605069854591, "learning_rate": 1.988645579886456e-05, "loss": 0.5799, "step": 21991 }, { "epoch": 0.6420834428191877, "grad_norm": 0.5504387893686624, "learning_rate": 1.9884833738848337e-05, "loss": 0.6789, "step": 21992 }, { "epoch": 0.6421126390470351, "grad_norm": 0.4855822400311158, "learning_rate": 1.9883211678832116e-05, "loss": 0.5178, "step": 21993 }, { "epoch": 0.6421418352748824, "grad_norm": 0.5291038261070277, "learning_rate": 1.9881589618815897e-05, "loss": 0.6301, "step": 21994 }, { "epoch": 0.6421710315027298, "grad_norm": 0.5355031811909898, "learning_rate": 1.9879967558799676e-05, "loss": 0.5939, "step": 21995 }, { "epoch": 0.6422002277305772, "grad_norm": 0.49641956610210436, "learning_rate": 1.9878345498783458e-05, "loss": 0.5778, "step": 21996 }, { "epoch": 0.6422294239584245, "grad_norm": 0.49305246616914294, "learning_rate": 1.9876723438767236e-05, "loss": 0.5618, "step": 21997 }, { "epoch": 0.6422586201862719, "grad_norm": 0.4991945475203669, "learning_rate": 1.9875101378751014e-05, "loss": 0.5609, "step": 21998 }, { "epoch": 0.6422878164141193, "grad_norm": 0.5331337483436565, "learning_rate": 1.9873479318734796e-05, "loss": 0.6262, "step": 21999 }, { "epoch": 0.6423170126419666, "grad_norm": 0.48617719470331433, "learning_rate": 1.9871857258718574e-05, "loss": 0.5066, "step": 22000 }, { "epoch": 0.642346208869814, "grad_norm": 0.5339050513747634, "learning_rate": 1.9870235198702353e-05, "loss": 0.5865, "step": 22001 }, { "epoch": 0.6423754050976613, "grad_norm": 0.5003871311966432, "learning_rate": 1.986861313868613e-05, "loss": 0.59, "step": 22002 }, { "epoch": 0.6424046013255087, "grad_norm": 0.5013196596575353, "learning_rate": 1.986699107866991e-05, "loss": 0.5838, "step": 22003 }, { "epoch": 0.6424337975533561, "grad_norm": 0.4935608266656954, "learning_rate": 1.986536901865369e-05, "loss": 0.5705, "step": 22004 }, { "epoch": 0.6424629937812034, "grad_norm": 0.522937434801185, "learning_rate": 1.9863746958637473e-05, "loss": 0.6251, "step": 22005 }, { "epoch": 0.6424921900090508, "grad_norm": 0.5512894082950146, "learning_rate": 1.986212489862125e-05, "loss": 0.6285, "step": 22006 }, { "epoch": 0.6425213862368981, "grad_norm": 0.5101923815233265, "learning_rate": 1.986050283860503e-05, "loss": 0.5818, "step": 22007 }, { "epoch": 0.6425505824647455, "grad_norm": 0.4739596004783398, "learning_rate": 1.9858880778588808e-05, "loss": 0.5104, "step": 22008 }, { "epoch": 0.6425797786925929, "grad_norm": 0.53879380541173, "learning_rate": 1.985725871857259e-05, "loss": 0.6477, "step": 22009 }, { "epoch": 0.6426089749204403, "grad_norm": 0.5247106951084634, "learning_rate": 1.985563665855637e-05, "loss": 0.6238, "step": 22010 }, { "epoch": 0.6426381711482877, "grad_norm": 0.5332508834944384, "learning_rate": 1.9854014598540147e-05, "loss": 0.6174, "step": 22011 }, { "epoch": 0.6426673673761351, "grad_norm": 0.5084378793964347, "learning_rate": 1.9852392538523925e-05, "loss": 0.5798, "step": 22012 }, { "epoch": 0.6426965636039824, "grad_norm": 0.5357830134264739, "learning_rate": 1.9850770478507704e-05, "loss": 0.637, "step": 22013 }, { "epoch": 0.6427257598318298, "grad_norm": 0.5163060877840012, "learning_rate": 1.9849148418491485e-05, "loss": 0.5394, "step": 22014 }, { "epoch": 0.6427549560596771, "grad_norm": 0.46913444017150746, "learning_rate": 1.9847526358475267e-05, "loss": 0.5053, "step": 22015 }, { "epoch": 0.6427841522875245, "grad_norm": 0.5125888517395355, "learning_rate": 1.9845904298459045e-05, "loss": 0.5434, "step": 22016 }, { "epoch": 0.6428133485153719, "grad_norm": 0.5138311625397824, "learning_rate": 1.9844282238442824e-05, "loss": 0.5693, "step": 22017 }, { "epoch": 0.6428425447432192, "grad_norm": 0.5268327344127457, "learning_rate": 1.9842660178426602e-05, "loss": 0.5962, "step": 22018 }, { "epoch": 0.6428717409710666, "grad_norm": 0.5670231513260982, "learning_rate": 1.984103811841038e-05, "loss": 0.7042, "step": 22019 }, { "epoch": 0.642900937198914, "grad_norm": 0.5107617970985623, "learning_rate": 1.9839416058394162e-05, "loss": 0.5975, "step": 22020 }, { "epoch": 0.6429301334267613, "grad_norm": 0.48288560715415463, "learning_rate": 1.983779399837794e-05, "loss": 0.5343, "step": 22021 }, { "epoch": 0.6429593296546087, "grad_norm": 0.5292110951220924, "learning_rate": 1.983617193836172e-05, "loss": 0.6215, "step": 22022 }, { "epoch": 0.642988525882456, "grad_norm": 0.5436836775873403, "learning_rate": 1.9834549878345497e-05, "loss": 0.6138, "step": 22023 }, { "epoch": 0.6430177221103034, "grad_norm": 0.48979867485560374, "learning_rate": 1.983292781832928e-05, "loss": 0.5663, "step": 22024 }, { "epoch": 0.6430469183381508, "grad_norm": 0.5277296871282829, "learning_rate": 1.983130575831306e-05, "loss": 0.5859, "step": 22025 }, { "epoch": 0.6430761145659981, "grad_norm": 0.5331176866592999, "learning_rate": 1.982968369829684e-05, "loss": 0.6389, "step": 22026 }, { "epoch": 0.6431053107938455, "grad_norm": 0.5461459351222244, "learning_rate": 1.9828061638280618e-05, "loss": 0.6194, "step": 22027 }, { "epoch": 0.6431345070216928, "grad_norm": 0.5419062130728735, "learning_rate": 1.9826439578264396e-05, "loss": 0.6209, "step": 22028 }, { "epoch": 0.6431637032495402, "grad_norm": 0.5112031277745446, "learning_rate": 1.9824817518248174e-05, "loss": 0.5869, "step": 22029 }, { "epoch": 0.6431928994773876, "grad_norm": 0.5354372512553894, "learning_rate": 1.9823195458231956e-05, "loss": 0.6422, "step": 22030 }, { "epoch": 0.6432220957052349, "grad_norm": 0.5255714542972515, "learning_rate": 1.9821573398215735e-05, "loss": 0.605, "step": 22031 }, { "epoch": 0.6432512919330823, "grad_norm": 0.5347567755041853, "learning_rate": 1.9819951338199513e-05, "loss": 0.6158, "step": 22032 }, { "epoch": 0.6432804881609296, "grad_norm": 0.5825455751674092, "learning_rate": 1.981832927818329e-05, "loss": 0.7153, "step": 22033 }, { "epoch": 0.643309684388777, "grad_norm": 0.5309177620356257, "learning_rate": 1.9816707218167073e-05, "loss": 0.5902, "step": 22034 }, { "epoch": 0.6433388806166244, "grad_norm": 0.518858362167316, "learning_rate": 1.9815085158150855e-05, "loss": 0.6149, "step": 22035 }, { "epoch": 0.6433680768444717, "grad_norm": 0.5134840887743891, "learning_rate": 1.9813463098134633e-05, "loss": 0.5996, "step": 22036 }, { "epoch": 0.6433972730723191, "grad_norm": 0.5237118017441942, "learning_rate": 1.981184103811841e-05, "loss": 0.6296, "step": 22037 }, { "epoch": 0.6434264693001664, "grad_norm": 0.49962153083316674, "learning_rate": 1.981021897810219e-05, "loss": 0.5758, "step": 22038 }, { "epoch": 0.6434556655280138, "grad_norm": 0.48518742989793817, "learning_rate": 1.980859691808597e-05, "loss": 0.5456, "step": 22039 }, { "epoch": 0.6434848617558612, "grad_norm": 0.6008572746264917, "learning_rate": 1.980697485806975e-05, "loss": 0.6857, "step": 22040 }, { "epoch": 0.6435140579837085, "grad_norm": 0.48241782934552296, "learning_rate": 1.980535279805353e-05, "loss": 0.5415, "step": 22041 }, { "epoch": 0.6435432542115559, "grad_norm": 0.4715034719885312, "learning_rate": 1.9803730738037307e-05, "loss": 0.5028, "step": 22042 }, { "epoch": 0.6435724504394033, "grad_norm": 0.4709439632013643, "learning_rate": 1.980210867802109e-05, "loss": 0.5166, "step": 22043 }, { "epoch": 0.6436016466672506, "grad_norm": 0.5397157834461708, "learning_rate": 1.9800486618004867e-05, "loss": 0.6417, "step": 22044 }, { "epoch": 0.643630842895098, "grad_norm": 0.4804017255126101, "learning_rate": 1.979886455798865e-05, "loss": 0.5414, "step": 22045 }, { "epoch": 0.6436600391229453, "grad_norm": 0.4803685833023285, "learning_rate": 1.9797242497972427e-05, "loss": 0.5586, "step": 22046 }, { "epoch": 0.6436892353507927, "grad_norm": 0.5026397925058231, "learning_rate": 1.9795620437956205e-05, "loss": 0.5752, "step": 22047 }, { "epoch": 0.6437184315786401, "grad_norm": 0.5549172463685811, "learning_rate": 1.9793998377939984e-05, "loss": 0.7138, "step": 22048 }, { "epoch": 0.6437476278064874, "grad_norm": 0.5194788067514667, "learning_rate": 1.9792376317923762e-05, "loss": 0.5948, "step": 22049 }, { "epoch": 0.6437768240343348, "grad_norm": 0.5466852882288767, "learning_rate": 1.9790754257907544e-05, "loss": 0.6819, "step": 22050 }, { "epoch": 0.6438060202621821, "grad_norm": 0.548457435200073, "learning_rate": 1.9789132197891322e-05, "loss": 0.5796, "step": 22051 }, { "epoch": 0.6438352164900295, "grad_norm": 0.5018406016371895, "learning_rate": 1.97875101378751e-05, "loss": 0.5784, "step": 22052 }, { "epoch": 0.6438644127178769, "grad_norm": 0.6152232200442265, "learning_rate": 1.9785888077858883e-05, "loss": 0.754, "step": 22053 }, { "epoch": 0.6438936089457242, "grad_norm": 0.528173832066957, "learning_rate": 1.978426601784266e-05, "loss": 0.6467, "step": 22054 }, { "epoch": 0.6439228051735716, "grad_norm": 0.5563928500931377, "learning_rate": 1.9782643957826443e-05, "loss": 0.6546, "step": 22055 }, { "epoch": 0.643952001401419, "grad_norm": 0.5088403114481306, "learning_rate": 1.978102189781022e-05, "loss": 0.6223, "step": 22056 }, { "epoch": 0.6439811976292663, "grad_norm": 0.5448122449466646, "learning_rate": 1.9779399837794e-05, "loss": 0.6977, "step": 22057 }, { "epoch": 0.6440103938571137, "grad_norm": 0.4699316455159704, "learning_rate": 1.9777777777777778e-05, "loss": 0.5197, "step": 22058 }, { "epoch": 0.644039590084961, "grad_norm": 0.5279161018262044, "learning_rate": 1.9776155717761556e-05, "loss": 0.58, "step": 22059 }, { "epoch": 0.6440687863128084, "grad_norm": 0.49083016689836295, "learning_rate": 1.9774533657745338e-05, "loss": 0.5406, "step": 22060 }, { "epoch": 0.6440979825406558, "grad_norm": 0.5231317318097474, "learning_rate": 1.9772911597729116e-05, "loss": 0.5492, "step": 22061 }, { "epoch": 0.6441271787685031, "grad_norm": 0.5279178528909123, "learning_rate": 1.9771289537712898e-05, "loss": 0.6204, "step": 22062 }, { "epoch": 0.6441563749963505, "grad_norm": 0.4907150372269818, "learning_rate": 1.9769667477696676e-05, "loss": 0.5456, "step": 22063 }, { "epoch": 0.6441855712241978, "grad_norm": 0.47485956669942847, "learning_rate": 1.9768045417680455e-05, "loss": 0.531, "step": 22064 }, { "epoch": 0.6442147674520452, "grad_norm": 0.5039657104102566, "learning_rate": 1.9766423357664237e-05, "loss": 0.6037, "step": 22065 }, { "epoch": 0.6442439636798926, "grad_norm": 0.5340519011564153, "learning_rate": 1.9764801297648015e-05, "loss": 0.6647, "step": 22066 }, { "epoch": 0.6442731599077399, "grad_norm": 0.513720842770768, "learning_rate": 1.9763179237631793e-05, "loss": 0.5399, "step": 22067 }, { "epoch": 0.6443023561355873, "grad_norm": 0.5011206051222695, "learning_rate": 1.976155717761557e-05, "loss": 0.5806, "step": 22068 }, { "epoch": 0.6443315523634346, "grad_norm": 0.5183715142087166, "learning_rate": 1.975993511759935e-05, "loss": 0.617, "step": 22069 }, { "epoch": 0.644360748591282, "grad_norm": 0.5455713528262813, "learning_rate": 1.9758313057583132e-05, "loss": 0.572, "step": 22070 }, { "epoch": 0.6443899448191294, "grad_norm": 0.530452366660286, "learning_rate": 1.975669099756691e-05, "loss": 0.5874, "step": 22071 }, { "epoch": 0.6444191410469767, "grad_norm": 0.4875403936355975, "learning_rate": 1.9755068937550692e-05, "loss": 0.541, "step": 22072 }, { "epoch": 0.6444483372748241, "grad_norm": 0.49101752802096243, "learning_rate": 1.975344687753447e-05, "loss": 0.5729, "step": 22073 }, { "epoch": 0.6444775335026715, "grad_norm": 0.529555482895131, "learning_rate": 1.975182481751825e-05, "loss": 0.6119, "step": 22074 }, { "epoch": 0.6445067297305188, "grad_norm": 0.507844761587897, "learning_rate": 1.9750202757502027e-05, "loss": 0.6239, "step": 22075 }, { "epoch": 0.6445359259583662, "grad_norm": 0.44861853672097346, "learning_rate": 1.974858069748581e-05, "loss": 0.4683, "step": 22076 }, { "epoch": 0.6445651221862135, "grad_norm": 0.529700008007679, "learning_rate": 1.9746958637469587e-05, "loss": 0.6373, "step": 22077 }, { "epoch": 0.6445943184140609, "grad_norm": 0.5510740574699029, "learning_rate": 1.9745336577453366e-05, "loss": 0.6392, "step": 22078 }, { "epoch": 0.6446235146419083, "grad_norm": 0.5267753704032035, "learning_rate": 1.9743714517437144e-05, "loss": 0.576, "step": 22079 }, { "epoch": 0.6446527108697556, "grad_norm": 0.5088159113637527, "learning_rate": 1.9742092457420926e-05, "loss": 0.5543, "step": 22080 }, { "epoch": 0.644681907097603, "grad_norm": 0.5350929254030992, "learning_rate": 1.9740470397404707e-05, "loss": 0.6271, "step": 22081 }, { "epoch": 0.6447111033254503, "grad_norm": 0.5232155742628518, "learning_rate": 1.9738848337388486e-05, "loss": 0.5723, "step": 22082 }, { "epoch": 0.6447402995532977, "grad_norm": 0.5210771533743487, "learning_rate": 1.9737226277372264e-05, "loss": 0.5421, "step": 22083 }, { "epoch": 0.6447694957811451, "grad_norm": 0.5112707258790123, "learning_rate": 1.9735604217356043e-05, "loss": 0.5465, "step": 22084 }, { "epoch": 0.6447986920089924, "grad_norm": 0.5512026535778475, "learning_rate": 1.973398215733982e-05, "loss": 0.6369, "step": 22085 }, { "epoch": 0.6448278882368398, "grad_norm": 0.5126838703741446, "learning_rate": 1.9732360097323603e-05, "loss": 0.5545, "step": 22086 }, { "epoch": 0.6448570844646871, "grad_norm": 0.493207601399362, "learning_rate": 1.973073803730738e-05, "loss": 0.5245, "step": 22087 }, { "epoch": 0.6448862806925345, "grad_norm": 0.5436733971945644, "learning_rate": 1.972911597729116e-05, "loss": 0.6464, "step": 22088 }, { "epoch": 0.6449154769203819, "grad_norm": 0.5057453590483474, "learning_rate": 1.9727493917274938e-05, "loss": 0.5177, "step": 22089 }, { "epoch": 0.6449446731482292, "grad_norm": 0.6010063487957961, "learning_rate": 1.972587185725872e-05, "loss": 0.7066, "step": 22090 }, { "epoch": 0.6449738693760766, "grad_norm": 0.5040732257939692, "learning_rate": 1.97242497972425e-05, "loss": 0.5582, "step": 22091 }, { "epoch": 0.645003065603924, "grad_norm": 0.46382583420370516, "learning_rate": 1.972262773722628e-05, "loss": 0.4688, "step": 22092 }, { "epoch": 0.6450322618317713, "grad_norm": 0.5223913478932847, "learning_rate": 1.9721005677210058e-05, "loss": 0.5727, "step": 22093 }, { "epoch": 0.6450614580596187, "grad_norm": 0.5434427816230503, "learning_rate": 1.9719383617193837e-05, "loss": 0.6467, "step": 22094 }, { "epoch": 0.645090654287466, "grad_norm": 0.5629256720308237, "learning_rate": 1.9717761557177615e-05, "loss": 0.684, "step": 22095 }, { "epoch": 0.6451198505153134, "grad_norm": 0.5096639864713185, "learning_rate": 1.9716139497161397e-05, "loss": 0.5736, "step": 22096 }, { "epoch": 0.6451490467431608, "grad_norm": 0.5248657298372343, "learning_rate": 1.9714517437145175e-05, "loss": 0.5628, "step": 22097 }, { "epoch": 0.6451782429710081, "grad_norm": 0.5427286424743794, "learning_rate": 1.9712895377128953e-05, "loss": 0.6208, "step": 22098 }, { "epoch": 0.6452074391988555, "grad_norm": 0.5262814311155146, "learning_rate": 1.9711273317112732e-05, "loss": 0.5959, "step": 22099 }, { "epoch": 0.6452366354267028, "grad_norm": 0.5298245641099458, "learning_rate": 1.9709651257096514e-05, "loss": 0.5962, "step": 22100 }, { "epoch": 0.6452658316545502, "grad_norm": 0.5158485782318546, "learning_rate": 1.9708029197080295e-05, "loss": 0.6143, "step": 22101 }, { "epoch": 0.6452950278823976, "grad_norm": 0.5169393200362403, "learning_rate": 1.9706407137064074e-05, "loss": 0.6012, "step": 22102 }, { "epoch": 0.6453242241102449, "grad_norm": 0.4889198217451442, "learning_rate": 1.9704785077047852e-05, "loss": 0.5306, "step": 22103 }, { "epoch": 0.6453534203380923, "grad_norm": 0.508383045786823, "learning_rate": 1.970316301703163e-05, "loss": 0.6144, "step": 22104 }, { "epoch": 0.6453826165659396, "grad_norm": 0.5098029198320222, "learning_rate": 1.970154095701541e-05, "loss": 0.5991, "step": 22105 }, { "epoch": 0.645411812793787, "grad_norm": 0.5310020289728345, "learning_rate": 1.969991889699919e-05, "loss": 0.6257, "step": 22106 }, { "epoch": 0.6454410090216344, "grad_norm": 0.5259035769576982, "learning_rate": 1.969829683698297e-05, "loss": 0.6126, "step": 22107 }, { "epoch": 0.6454702052494817, "grad_norm": 0.5157227974031566, "learning_rate": 1.9696674776966747e-05, "loss": 0.586, "step": 22108 }, { "epoch": 0.6454994014773291, "grad_norm": 0.5276677058841615, "learning_rate": 1.969505271695053e-05, "loss": 0.6364, "step": 22109 }, { "epoch": 0.6455285977051765, "grad_norm": 0.5019908166407732, "learning_rate": 1.9693430656934307e-05, "loss": 0.5521, "step": 22110 }, { "epoch": 0.6455577939330238, "grad_norm": 0.5233090982398069, "learning_rate": 1.969180859691809e-05, "loss": 0.6014, "step": 22111 }, { "epoch": 0.6455869901608712, "grad_norm": 0.552295160289593, "learning_rate": 1.9690186536901868e-05, "loss": 0.6709, "step": 22112 }, { "epoch": 0.6456161863887185, "grad_norm": 0.5427409985123858, "learning_rate": 1.9688564476885646e-05, "loss": 0.6214, "step": 22113 }, { "epoch": 0.6456453826165659, "grad_norm": 0.5314687860993854, "learning_rate": 1.9686942416869424e-05, "loss": 0.6342, "step": 22114 }, { "epoch": 0.6456745788444133, "grad_norm": 0.5015718376559682, "learning_rate": 1.9685320356853203e-05, "loss": 0.5863, "step": 22115 }, { "epoch": 0.6457037750722606, "grad_norm": 0.5446165313479389, "learning_rate": 1.9683698296836984e-05, "loss": 0.6096, "step": 22116 }, { "epoch": 0.645732971300108, "grad_norm": 0.5027395256814113, "learning_rate": 1.9682076236820763e-05, "loss": 0.5587, "step": 22117 }, { "epoch": 0.6457621675279553, "grad_norm": 0.5116346523042526, "learning_rate": 1.968045417680454e-05, "loss": 0.5709, "step": 22118 }, { "epoch": 0.6457913637558027, "grad_norm": 0.5342086921846183, "learning_rate": 1.9678832116788323e-05, "loss": 0.5986, "step": 22119 }, { "epoch": 0.6458205599836501, "grad_norm": 0.5490928467087114, "learning_rate": 1.96772100567721e-05, "loss": 0.5792, "step": 22120 }, { "epoch": 0.6458497562114974, "grad_norm": 0.5029642973096018, "learning_rate": 1.9675587996755883e-05, "loss": 0.5749, "step": 22121 }, { "epoch": 0.6458789524393448, "grad_norm": 0.4958334887090325, "learning_rate": 1.967396593673966e-05, "loss": 0.5749, "step": 22122 }, { "epoch": 0.6459081486671922, "grad_norm": 0.5199529692069651, "learning_rate": 1.967234387672344e-05, "loss": 0.5929, "step": 22123 }, { "epoch": 0.6459373448950395, "grad_norm": 0.5095571795125422, "learning_rate": 1.9670721816707218e-05, "loss": 0.5429, "step": 22124 }, { "epoch": 0.6459665411228869, "grad_norm": 0.5223760340014655, "learning_rate": 1.9669099756690997e-05, "loss": 0.6133, "step": 22125 }, { "epoch": 0.6459957373507342, "grad_norm": 0.5215596767906988, "learning_rate": 1.966747769667478e-05, "loss": 0.6064, "step": 22126 }, { "epoch": 0.6460249335785816, "grad_norm": 0.5048538084711489, "learning_rate": 1.9665855636658557e-05, "loss": 0.6139, "step": 22127 }, { "epoch": 0.646054129806429, "grad_norm": 0.4983975254698575, "learning_rate": 1.966423357664234e-05, "loss": 0.56, "step": 22128 }, { "epoch": 0.6460833260342763, "grad_norm": 0.5421985924590337, "learning_rate": 1.9662611516626117e-05, "loss": 0.6776, "step": 22129 }, { "epoch": 0.6461125222621237, "grad_norm": 0.5309651760444367, "learning_rate": 1.9660989456609895e-05, "loss": 0.6502, "step": 22130 }, { "epoch": 0.6461417184899712, "grad_norm": 0.5037455869917756, "learning_rate": 1.9659367396593677e-05, "loss": 0.5633, "step": 22131 }, { "epoch": 0.6461709147178185, "grad_norm": 0.5443735521924651, "learning_rate": 1.9657745336577455e-05, "loss": 0.6503, "step": 22132 }, { "epoch": 0.6462001109456659, "grad_norm": 0.51856221972526, "learning_rate": 1.9656123276561234e-05, "loss": 0.5711, "step": 22133 }, { "epoch": 0.6462293071735132, "grad_norm": 0.5150023198417927, "learning_rate": 1.9654501216545012e-05, "loss": 0.584, "step": 22134 }, { "epoch": 0.6462585034013606, "grad_norm": 0.5043964879935564, "learning_rate": 1.965287915652879e-05, "loss": 0.6073, "step": 22135 }, { "epoch": 0.646287699629208, "grad_norm": 0.5268027351086687, "learning_rate": 1.9651257096512572e-05, "loss": 0.5974, "step": 22136 }, { "epoch": 0.6463168958570553, "grad_norm": 0.5290959467951257, "learning_rate": 1.964963503649635e-05, "loss": 0.6277, "step": 22137 }, { "epoch": 0.6463460920849027, "grad_norm": 0.498396304754309, "learning_rate": 1.9648012976480132e-05, "loss": 0.5437, "step": 22138 }, { "epoch": 0.64637528831275, "grad_norm": 0.5018743611787974, "learning_rate": 1.964639091646391e-05, "loss": 0.6023, "step": 22139 }, { "epoch": 0.6464044845405974, "grad_norm": 0.49441883176260865, "learning_rate": 1.964476885644769e-05, "loss": 0.5625, "step": 22140 }, { "epoch": 0.6464336807684448, "grad_norm": 0.5296531241443225, "learning_rate": 1.9643146796431468e-05, "loss": 0.6385, "step": 22141 }, { "epoch": 0.6464628769962921, "grad_norm": 0.49645730954954914, "learning_rate": 1.964152473641525e-05, "loss": 0.561, "step": 22142 }, { "epoch": 0.6464920732241395, "grad_norm": 0.5475807981860135, "learning_rate": 1.9639902676399028e-05, "loss": 0.6572, "step": 22143 }, { "epoch": 0.6465212694519868, "grad_norm": 0.4911476163312035, "learning_rate": 1.9638280616382806e-05, "loss": 0.4962, "step": 22144 }, { "epoch": 0.6465504656798342, "grad_norm": 0.5230680038261475, "learning_rate": 1.9636658556366584e-05, "loss": 0.659, "step": 22145 }, { "epoch": 0.6465796619076816, "grad_norm": 0.6586510793134813, "learning_rate": 1.9635036496350366e-05, "loss": 0.6274, "step": 22146 }, { "epoch": 0.6466088581355289, "grad_norm": 0.5371675046265905, "learning_rate": 1.9633414436334148e-05, "loss": 0.5982, "step": 22147 }, { "epoch": 0.6466380543633763, "grad_norm": 0.5739989999420796, "learning_rate": 1.9631792376317926e-05, "loss": 0.6618, "step": 22148 }, { "epoch": 0.6466672505912237, "grad_norm": 0.5428886291530531, "learning_rate": 1.9630170316301705e-05, "loss": 0.6139, "step": 22149 }, { "epoch": 0.646696446819071, "grad_norm": 0.5329509463729414, "learning_rate": 1.9628548256285483e-05, "loss": 0.6316, "step": 22150 }, { "epoch": 0.6467256430469184, "grad_norm": 0.5498286542946992, "learning_rate": 1.962692619626926e-05, "loss": 0.6657, "step": 22151 }, { "epoch": 0.6467548392747657, "grad_norm": 0.5267051063812844, "learning_rate": 1.9625304136253043e-05, "loss": 0.5949, "step": 22152 }, { "epoch": 0.6467840355026131, "grad_norm": 0.5053456565742637, "learning_rate": 1.962368207623682e-05, "loss": 0.5653, "step": 22153 }, { "epoch": 0.6468132317304605, "grad_norm": 0.4992297836244453, "learning_rate": 1.96220600162206e-05, "loss": 0.5168, "step": 22154 }, { "epoch": 0.6468424279583078, "grad_norm": 0.5180200979934544, "learning_rate": 1.9620437956204378e-05, "loss": 0.6206, "step": 22155 }, { "epoch": 0.6468716241861552, "grad_norm": 0.5092425702987395, "learning_rate": 1.961881589618816e-05, "loss": 0.6, "step": 22156 }, { "epoch": 0.6469008204140025, "grad_norm": 0.5021470799786927, "learning_rate": 1.9617193836171942e-05, "loss": 0.5713, "step": 22157 }, { "epoch": 0.6469300166418499, "grad_norm": 0.5052179762907671, "learning_rate": 1.961557177615572e-05, "loss": 0.5317, "step": 22158 }, { "epoch": 0.6469592128696973, "grad_norm": 0.5218303612620029, "learning_rate": 1.96139497161395e-05, "loss": 0.6103, "step": 22159 }, { "epoch": 0.6469884090975446, "grad_norm": 0.5148178836145232, "learning_rate": 1.9612327656123277e-05, "loss": 0.5946, "step": 22160 }, { "epoch": 0.647017605325392, "grad_norm": 0.5909365502803415, "learning_rate": 1.9610705596107055e-05, "loss": 0.734, "step": 22161 }, { "epoch": 0.6470468015532393, "grad_norm": 0.5022225096641555, "learning_rate": 1.9609083536090837e-05, "loss": 0.62, "step": 22162 }, { "epoch": 0.6470759977810867, "grad_norm": 0.5151286977698357, "learning_rate": 1.9607461476074615e-05, "loss": 0.613, "step": 22163 }, { "epoch": 0.6471051940089341, "grad_norm": 0.5025697407780709, "learning_rate": 1.9605839416058394e-05, "loss": 0.5964, "step": 22164 }, { "epoch": 0.6471343902367814, "grad_norm": 0.5198647507240833, "learning_rate": 1.9604217356042172e-05, "loss": 0.5909, "step": 22165 }, { "epoch": 0.6471635864646288, "grad_norm": 0.5256033705916374, "learning_rate": 1.9602595296025954e-05, "loss": 0.6067, "step": 22166 }, { "epoch": 0.6471927826924762, "grad_norm": 0.5686981505066393, "learning_rate": 1.9600973236009736e-05, "loss": 0.6722, "step": 22167 }, { "epoch": 0.6472219789203235, "grad_norm": 0.5567834584137642, "learning_rate": 1.9599351175993514e-05, "loss": 0.609, "step": 22168 }, { "epoch": 0.6472511751481709, "grad_norm": 0.49627588589869076, "learning_rate": 1.9597729115977292e-05, "loss": 0.5596, "step": 22169 }, { "epoch": 0.6472803713760182, "grad_norm": 0.5639616983110083, "learning_rate": 1.959610705596107e-05, "loss": 0.6177, "step": 22170 }, { "epoch": 0.6473095676038656, "grad_norm": 0.5268121918279243, "learning_rate": 1.959448499594485e-05, "loss": 0.5946, "step": 22171 }, { "epoch": 0.647338763831713, "grad_norm": 0.5071428536817982, "learning_rate": 1.959286293592863e-05, "loss": 0.6001, "step": 22172 }, { "epoch": 0.6473679600595603, "grad_norm": 0.5089286027850161, "learning_rate": 1.959124087591241e-05, "loss": 0.5395, "step": 22173 }, { "epoch": 0.6473971562874077, "grad_norm": 0.5163114909892147, "learning_rate": 1.9589618815896188e-05, "loss": 0.609, "step": 22174 }, { "epoch": 0.647426352515255, "grad_norm": 0.5022823264958548, "learning_rate": 1.958799675587997e-05, "loss": 0.5609, "step": 22175 }, { "epoch": 0.6474555487431024, "grad_norm": 0.5441132314009944, "learning_rate": 1.9586374695863748e-05, "loss": 0.6259, "step": 22176 }, { "epoch": 0.6474847449709498, "grad_norm": 0.5579675326970643, "learning_rate": 1.958475263584753e-05, "loss": 0.6479, "step": 22177 }, { "epoch": 0.6475139411987971, "grad_norm": 0.5392806047600557, "learning_rate": 1.9583130575831308e-05, "loss": 0.6366, "step": 22178 }, { "epoch": 0.6475431374266445, "grad_norm": 0.4996035936553687, "learning_rate": 1.9581508515815086e-05, "loss": 0.5938, "step": 22179 }, { "epoch": 0.6475723336544918, "grad_norm": 0.499706474078497, "learning_rate": 1.9579886455798865e-05, "loss": 0.5504, "step": 22180 }, { "epoch": 0.6476015298823392, "grad_norm": 0.5182535580874661, "learning_rate": 1.9578264395782643e-05, "loss": 0.6176, "step": 22181 }, { "epoch": 0.6476307261101866, "grad_norm": 0.5617480918237694, "learning_rate": 1.9576642335766425e-05, "loss": 0.6383, "step": 22182 }, { "epoch": 0.6476599223380339, "grad_norm": 0.5359849893668756, "learning_rate": 1.9575020275750203e-05, "loss": 0.6543, "step": 22183 }, { "epoch": 0.6476891185658813, "grad_norm": 0.48375798411824006, "learning_rate": 1.957339821573398e-05, "loss": 0.5215, "step": 22184 }, { "epoch": 0.6477183147937287, "grad_norm": 0.5457110482350817, "learning_rate": 1.9571776155717763e-05, "loss": 0.6071, "step": 22185 }, { "epoch": 0.647747511021576, "grad_norm": 0.5049672509379071, "learning_rate": 1.9570154095701542e-05, "loss": 0.5333, "step": 22186 }, { "epoch": 0.6477767072494234, "grad_norm": 0.5054988864165503, "learning_rate": 1.9568532035685324e-05, "loss": 0.5889, "step": 22187 }, { "epoch": 0.6478059034772707, "grad_norm": 0.5184875916962093, "learning_rate": 1.9566909975669102e-05, "loss": 0.5519, "step": 22188 }, { "epoch": 0.6478350997051181, "grad_norm": 0.5472827149213403, "learning_rate": 1.956528791565288e-05, "loss": 0.6743, "step": 22189 }, { "epoch": 0.6478642959329655, "grad_norm": 0.5039218650508106, "learning_rate": 1.956366585563666e-05, "loss": 0.5382, "step": 22190 }, { "epoch": 0.6478934921608128, "grad_norm": 0.5486515751304402, "learning_rate": 1.9562043795620437e-05, "loss": 0.6032, "step": 22191 }, { "epoch": 0.6479226883886602, "grad_norm": 0.5538446344109096, "learning_rate": 1.956042173560422e-05, "loss": 0.6947, "step": 22192 }, { "epoch": 0.6479518846165075, "grad_norm": 0.5187305191990683, "learning_rate": 1.9558799675587997e-05, "loss": 0.6059, "step": 22193 }, { "epoch": 0.6479810808443549, "grad_norm": 0.5527840071093799, "learning_rate": 1.955717761557178e-05, "loss": 0.6228, "step": 22194 }, { "epoch": 0.6480102770722023, "grad_norm": 0.5451442338037145, "learning_rate": 1.9555555555555557e-05, "loss": 0.664, "step": 22195 }, { "epoch": 0.6480394733000496, "grad_norm": 0.5325412964791543, "learning_rate": 1.9553933495539336e-05, "loss": 0.6455, "step": 22196 }, { "epoch": 0.648068669527897, "grad_norm": 0.5393876449422074, "learning_rate": 1.9552311435523114e-05, "loss": 0.6409, "step": 22197 }, { "epoch": 0.6480978657557444, "grad_norm": 0.5269049574893594, "learning_rate": 1.9550689375506896e-05, "loss": 0.5636, "step": 22198 }, { "epoch": 0.6481270619835917, "grad_norm": 0.5042308226848535, "learning_rate": 1.9549067315490674e-05, "loss": 0.5837, "step": 22199 }, { "epoch": 0.6481562582114391, "grad_norm": 0.4928606122519099, "learning_rate": 1.9547445255474453e-05, "loss": 0.5478, "step": 22200 }, { "epoch": 0.6481854544392864, "grad_norm": 0.4889809263493384, "learning_rate": 1.954582319545823e-05, "loss": 0.5249, "step": 22201 }, { "epoch": 0.6482146506671338, "grad_norm": 0.501324029342784, "learning_rate": 1.9544201135442013e-05, "loss": 0.5441, "step": 22202 }, { "epoch": 0.6482438468949812, "grad_norm": 0.4986851538534019, "learning_rate": 1.954257907542579e-05, "loss": 0.5762, "step": 22203 }, { "epoch": 0.6482730431228285, "grad_norm": 0.5490804457865585, "learning_rate": 1.9540957015409573e-05, "loss": 0.6255, "step": 22204 }, { "epoch": 0.6483022393506759, "grad_norm": 0.483646322754565, "learning_rate": 1.953933495539335e-05, "loss": 0.5314, "step": 22205 }, { "epoch": 0.6483314355785232, "grad_norm": 0.4826712617927418, "learning_rate": 1.953771289537713e-05, "loss": 0.5226, "step": 22206 }, { "epoch": 0.6483606318063706, "grad_norm": 0.5736279987246974, "learning_rate": 1.9536090835360908e-05, "loss": 0.6525, "step": 22207 }, { "epoch": 0.648389828034218, "grad_norm": 0.4926296020656059, "learning_rate": 1.953446877534469e-05, "loss": 0.5256, "step": 22208 }, { "epoch": 0.6484190242620653, "grad_norm": 0.5080519625287508, "learning_rate": 1.9532846715328468e-05, "loss": 0.5766, "step": 22209 }, { "epoch": 0.6484482204899127, "grad_norm": 0.4897963173259803, "learning_rate": 1.9531224655312246e-05, "loss": 0.5558, "step": 22210 }, { "epoch": 0.64847741671776, "grad_norm": 0.5334753559282353, "learning_rate": 1.9529602595296025e-05, "loss": 0.6263, "step": 22211 }, { "epoch": 0.6485066129456074, "grad_norm": 0.5054740583936236, "learning_rate": 1.9527980535279807e-05, "loss": 0.5445, "step": 22212 }, { "epoch": 0.6485358091734548, "grad_norm": 0.5249143055307591, "learning_rate": 1.952635847526359e-05, "loss": 0.6274, "step": 22213 }, { "epoch": 0.6485650054013021, "grad_norm": 0.5255282980370641, "learning_rate": 1.9524736415247367e-05, "loss": 0.6333, "step": 22214 }, { "epoch": 0.6485942016291495, "grad_norm": 0.5758125753734685, "learning_rate": 1.9523114355231145e-05, "loss": 0.7061, "step": 22215 }, { "epoch": 0.6486233978569969, "grad_norm": 0.5058550807613782, "learning_rate": 1.9521492295214923e-05, "loss": 0.5781, "step": 22216 }, { "epoch": 0.6486525940848442, "grad_norm": 0.5685953594607097, "learning_rate": 1.9519870235198702e-05, "loss": 0.6224, "step": 22217 }, { "epoch": 0.6486817903126916, "grad_norm": 0.5309653085221285, "learning_rate": 1.9518248175182484e-05, "loss": 0.6199, "step": 22218 }, { "epoch": 0.6487109865405389, "grad_norm": 0.5495624776181258, "learning_rate": 1.9516626115166262e-05, "loss": 0.6107, "step": 22219 }, { "epoch": 0.6487401827683863, "grad_norm": 0.5160835978063518, "learning_rate": 1.951500405515004e-05, "loss": 0.6293, "step": 22220 }, { "epoch": 0.6487693789962337, "grad_norm": 0.53036130049003, "learning_rate": 1.951338199513382e-05, "loss": 0.5902, "step": 22221 }, { "epoch": 0.648798575224081, "grad_norm": 0.5000014133397668, "learning_rate": 1.95117599351176e-05, "loss": 0.5527, "step": 22222 }, { "epoch": 0.6488277714519284, "grad_norm": 0.5272212853206494, "learning_rate": 1.9510137875101382e-05, "loss": 0.5892, "step": 22223 }, { "epoch": 0.6488569676797757, "grad_norm": 0.5014331175182901, "learning_rate": 1.950851581508516e-05, "loss": 0.5654, "step": 22224 }, { "epoch": 0.6488861639076231, "grad_norm": 0.5360622453908166, "learning_rate": 1.950689375506894e-05, "loss": 0.5732, "step": 22225 }, { "epoch": 0.6489153601354705, "grad_norm": 0.5102743626083542, "learning_rate": 1.9505271695052717e-05, "loss": 0.5706, "step": 22226 }, { "epoch": 0.6489445563633178, "grad_norm": 0.5437946323125158, "learning_rate": 1.9503649635036496e-05, "loss": 0.6405, "step": 22227 }, { "epoch": 0.6489737525911652, "grad_norm": 0.5427006419877454, "learning_rate": 1.9502027575020278e-05, "loss": 0.6464, "step": 22228 }, { "epoch": 0.6490029488190125, "grad_norm": 0.5490585238263864, "learning_rate": 1.9500405515004056e-05, "loss": 0.6508, "step": 22229 }, { "epoch": 0.6490321450468599, "grad_norm": 0.5325378126946634, "learning_rate": 1.9498783454987834e-05, "loss": 0.5974, "step": 22230 }, { "epoch": 0.6490613412747073, "grad_norm": 0.5249200831169654, "learning_rate": 1.9497161394971613e-05, "loss": 0.5467, "step": 22231 }, { "epoch": 0.6490905375025546, "grad_norm": 0.5158565629588243, "learning_rate": 1.9495539334955394e-05, "loss": 0.6065, "step": 22232 }, { "epoch": 0.649119733730402, "grad_norm": 0.5222507475900797, "learning_rate": 1.9493917274939176e-05, "loss": 0.5809, "step": 22233 }, { "epoch": 0.6491489299582494, "grad_norm": 0.5482260805812057, "learning_rate": 1.9492295214922955e-05, "loss": 0.6179, "step": 22234 }, { "epoch": 0.6491781261860967, "grad_norm": 0.5171894229159684, "learning_rate": 1.9490673154906733e-05, "loss": 0.5963, "step": 22235 }, { "epoch": 0.6492073224139441, "grad_norm": 0.4951774691752794, "learning_rate": 1.948905109489051e-05, "loss": 0.5439, "step": 22236 }, { "epoch": 0.6492365186417914, "grad_norm": 0.5535647667985844, "learning_rate": 1.948742903487429e-05, "loss": 0.6192, "step": 22237 }, { "epoch": 0.6492657148696388, "grad_norm": 0.5116887229158948, "learning_rate": 1.948580697485807e-05, "loss": 0.5821, "step": 22238 }, { "epoch": 0.6492949110974862, "grad_norm": 0.5427821795525225, "learning_rate": 1.948418491484185e-05, "loss": 0.618, "step": 22239 }, { "epoch": 0.6493241073253335, "grad_norm": 0.554341978872804, "learning_rate": 1.9482562854825628e-05, "loss": 0.6894, "step": 22240 }, { "epoch": 0.6493533035531809, "grad_norm": 0.5374087860765181, "learning_rate": 1.948094079480941e-05, "loss": 0.6027, "step": 22241 }, { "epoch": 0.6493824997810282, "grad_norm": 0.5002899513847571, "learning_rate": 1.947931873479319e-05, "loss": 0.5335, "step": 22242 }, { "epoch": 0.6494116960088756, "grad_norm": 0.5523171536546189, "learning_rate": 1.947769667477697e-05, "loss": 0.6592, "step": 22243 }, { "epoch": 0.649440892236723, "grad_norm": 0.5201387507845817, "learning_rate": 1.947607461476075e-05, "loss": 0.6209, "step": 22244 }, { "epoch": 0.6494700884645703, "grad_norm": 0.5356784306743712, "learning_rate": 1.9474452554744527e-05, "loss": 0.6599, "step": 22245 }, { "epoch": 0.6494992846924177, "grad_norm": 0.5132166920450018, "learning_rate": 1.9472830494728305e-05, "loss": 0.5671, "step": 22246 }, { "epoch": 0.649528480920265, "grad_norm": 0.5113014907230585, "learning_rate": 1.9471208434712084e-05, "loss": 0.5753, "step": 22247 }, { "epoch": 0.6495576771481124, "grad_norm": 0.497758257069597, "learning_rate": 1.9469586374695865e-05, "loss": 0.5513, "step": 22248 }, { "epoch": 0.6495868733759598, "grad_norm": 0.5249094925007072, "learning_rate": 1.9467964314679644e-05, "loss": 0.5987, "step": 22249 }, { "epoch": 0.6496160696038071, "grad_norm": 0.5495688671165606, "learning_rate": 1.9466342254663422e-05, "loss": 0.6341, "step": 22250 }, { "epoch": 0.6496452658316545, "grad_norm": 0.5210189230705562, "learning_rate": 1.9464720194647204e-05, "loss": 0.5506, "step": 22251 }, { "epoch": 0.649674462059502, "grad_norm": 0.5133692367104924, "learning_rate": 1.9463098134630982e-05, "loss": 0.6014, "step": 22252 }, { "epoch": 0.6497036582873493, "grad_norm": 0.5486714834625779, "learning_rate": 1.9461476074614764e-05, "loss": 0.6404, "step": 22253 }, { "epoch": 0.6497328545151967, "grad_norm": 0.5346268360000986, "learning_rate": 1.9459854014598542e-05, "loss": 0.584, "step": 22254 }, { "epoch": 0.649762050743044, "grad_norm": 0.5335551366442495, "learning_rate": 1.945823195458232e-05, "loss": 0.5865, "step": 22255 }, { "epoch": 0.6497912469708914, "grad_norm": 0.49659179004219484, "learning_rate": 1.94566098945661e-05, "loss": 0.5847, "step": 22256 }, { "epoch": 0.6498204431987388, "grad_norm": 0.5199250282543454, "learning_rate": 1.9454987834549877e-05, "loss": 0.6151, "step": 22257 }, { "epoch": 0.6498496394265861, "grad_norm": 0.4994887876144974, "learning_rate": 1.945336577453366e-05, "loss": 0.5686, "step": 22258 }, { "epoch": 0.6498788356544335, "grad_norm": 0.4835900175166625, "learning_rate": 1.9451743714517438e-05, "loss": 0.5502, "step": 22259 }, { "epoch": 0.6499080318822809, "grad_norm": 0.4783812503374642, "learning_rate": 1.945012165450122e-05, "loss": 0.5293, "step": 22260 }, { "epoch": 0.6499372281101282, "grad_norm": 0.46370200907749193, "learning_rate": 1.9448499594484998e-05, "loss": 0.5072, "step": 22261 }, { "epoch": 0.6499664243379756, "grad_norm": 0.48058105013490265, "learning_rate": 1.9446877534468776e-05, "loss": 0.5086, "step": 22262 }, { "epoch": 0.6499956205658229, "grad_norm": 0.5631976241405363, "learning_rate": 1.9445255474452554e-05, "loss": 0.6496, "step": 22263 }, { "epoch": 0.6500248167936703, "grad_norm": 0.5747672142665133, "learning_rate": 1.9443633414436336e-05, "loss": 0.6969, "step": 22264 }, { "epoch": 0.6500540130215177, "grad_norm": 0.46201671711016473, "learning_rate": 1.9442011354420115e-05, "loss": 0.4586, "step": 22265 }, { "epoch": 0.650083209249365, "grad_norm": 0.5507511908154642, "learning_rate": 1.9440389294403893e-05, "loss": 0.639, "step": 22266 }, { "epoch": 0.6501124054772124, "grad_norm": 0.5117947507832142, "learning_rate": 1.943876723438767e-05, "loss": 0.554, "step": 22267 }, { "epoch": 0.6501416017050597, "grad_norm": 0.5377155675379256, "learning_rate": 1.9437145174371453e-05, "loss": 0.6064, "step": 22268 }, { "epoch": 0.6501707979329071, "grad_norm": 0.48928497817565725, "learning_rate": 1.943552311435523e-05, "loss": 0.5489, "step": 22269 }, { "epoch": 0.6501999941607545, "grad_norm": 0.4988518881304348, "learning_rate": 1.9433901054339013e-05, "loss": 0.5311, "step": 22270 }, { "epoch": 0.6502291903886018, "grad_norm": 0.5243658852079198, "learning_rate": 1.943227899432279e-05, "loss": 0.6234, "step": 22271 }, { "epoch": 0.6502583866164492, "grad_norm": 0.49562983764959906, "learning_rate": 1.943065693430657e-05, "loss": 0.573, "step": 22272 }, { "epoch": 0.6502875828442966, "grad_norm": 0.5045719745300923, "learning_rate": 1.942903487429035e-05, "loss": 0.5945, "step": 22273 }, { "epoch": 0.6503167790721439, "grad_norm": 0.5518442101717252, "learning_rate": 1.942741281427413e-05, "loss": 0.638, "step": 22274 }, { "epoch": 0.6503459752999913, "grad_norm": 0.5130692207883543, "learning_rate": 1.942579075425791e-05, "loss": 0.5823, "step": 22275 }, { "epoch": 0.6503751715278386, "grad_norm": 0.5427492879977777, "learning_rate": 1.9424168694241687e-05, "loss": 0.6162, "step": 22276 }, { "epoch": 0.650404367755686, "grad_norm": 0.49812464098245285, "learning_rate": 1.9422546634225465e-05, "loss": 0.5804, "step": 22277 }, { "epoch": 0.6504335639835334, "grad_norm": 0.6193135470387306, "learning_rate": 1.9420924574209247e-05, "loss": 0.6504, "step": 22278 }, { "epoch": 0.6504627602113807, "grad_norm": 0.5352978316553225, "learning_rate": 1.941930251419303e-05, "loss": 0.6245, "step": 22279 }, { "epoch": 0.6504919564392281, "grad_norm": 0.5654308465769072, "learning_rate": 1.9417680454176807e-05, "loss": 0.6049, "step": 22280 }, { "epoch": 0.6505211526670754, "grad_norm": 0.5338037402161206, "learning_rate": 1.9416058394160586e-05, "loss": 0.6336, "step": 22281 }, { "epoch": 0.6505503488949228, "grad_norm": 0.5467846119205165, "learning_rate": 1.9414436334144364e-05, "loss": 0.6402, "step": 22282 }, { "epoch": 0.6505795451227702, "grad_norm": 0.5443975762192, "learning_rate": 1.9412814274128142e-05, "loss": 0.6616, "step": 22283 }, { "epoch": 0.6506087413506175, "grad_norm": 0.5090775193284236, "learning_rate": 1.9411192214111924e-05, "loss": 0.5943, "step": 22284 }, { "epoch": 0.6506379375784649, "grad_norm": 0.5059380609370969, "learning_rate": 1.9409570154095702e-05, "loss": 0.5735, "step": 22285 }, { "epoch": 0.6506671338063122, "grad_norm": 0.5949391476258645, "learning_rate": 1.940794809407948e-05, "loss": 0.6446, "step": 22286 }, { "epoch": 0.6506963300341596, "grad_norm": 0.5680235650425839, "learning_rate": 1.940632603406326e-05, "loss": 0.6744, "step": 22287 }, { "epoch": 0.650725526262007, "grad_norm": 0.5402191971924504, "learning_rate": 1.940470397404704e-05, "loss": 0.6255, "step": 22288 }, { "epoch": 0.6507547224898543, "grad_norm": 0.4934386726304902, "learning_rate": 1.9403081914030823e-05, "loss": 0.5071, "step": 22289 }, { "epoch": 0.6507839187177017, "grad_norm": 0.498077719158481, "learning_rate": 1.94014598540146e-05, "loss": 0.5426, "step": 22290 }, { "epoch": 0.650813114945549, "grad_norm": 0.5016332200554511, "learning_rate": 1.939983779399838e-05, "loss": 0.5471, "step": 22291 }, { "epoch": 0.6508423111733964, "grad_norm": 0.5133784613670387, "learning_rate": 1.9398215733982158e-05, "loss": 0.599, "step": 22292 }, { "epoch": 0.6508715074012438, "grad_norm": 0.5068068131517445, "learning_rate": 1.9396593673965936e-05, "loss": 0.5683, "step": 22293 }, { "epoch": 0.6509007036290911, "grad_norm": 0.507536479954263, "learning_rate": 1.9394971613949718e-05, "loss": 0.5916, "step": 22294 }, { "epoch": 0.6509298998569385, "grad_norm": 0.5049296883775203, "learning_rate": 1.9393349553933496e-05, "loss": 0.5585, "step": 22295 }, { "epoch": 0.6509590960847859, "grad_norm": 0.4915428711496874, "learning_rate": 1.9391727493917275e-05, "loss": 0.5698, "step": 22296 }, { "epoch": 0.6509882923126332, "grad_norm": 0.520995252259327, "learning_rate": 1.9390105433901053e-05, "loss": 0.6411, "step": 22297 }, { "epoch": 0.6510174885404806, "grad_norm": 0.5349414394398297, "learning_rate": 1.9388483373884835e-05, "loss": 0.5631, "step": 22298 }, { "epoch": 0.6510466847683279, "grad_norm": 0.554618174423405, "learning_rate": 1.9386861313868617e-05, "loss": 0.6226, "step": 22299 }, { "epoch": 0.6510758809961753, "grad_norm": 0.521720069401915, "learning_rate": 1.9385239253852395e-05, "loss": 0.6269, "step": 22300 }, { "epoch": 0.6511050772240227, "grad_norm": 0.5254554464310522, "learning_rate": 1.9383617193836173e-05, "loss": 0.5758, "step": 22301 }, { "epoch": 0.65113427345187, "grad_norm": 0.5085778072373613, "learning_rate": 1.9381995133819952e-05, "loss": 0.5834, "step": 22302 }, { "epoch": 0.6511634696797174, "grad_norm": 0.540540541789285, "learning_rate": 1.938037307380373e-05, "loss": 0.6654, "step": 22303 }, { "epoch": 0.6511926659075647, "grad_norm": 0.5036202153732243, "learning_rate": 1.9378751013787512e-05, "loss": 0.553, "step": 22304 }, { "epoch": 0.6512218621354121, "grad_norm": 0.5581974259504973, "learning_rate": 1.937712895377129e-05, "loss": 0.658, "step": 22305 }, { "epoch": 0.6512510583632595, "grad_norm": 0.5492743293814037, "learning_rate": 1.937550689375507e-05, "loss": 0.6455, "step": 22306 }, { "epoch": 0.6512802545911068, "grad_norm": 0.5274538853546981, "learning_rate": 1.9373884833738847e-05, "loss": 0.6368, "step": 22307 }, { "epoch": 0.6513094508189542, "grad_norm": 0.5317732576561243, "learning_rate": 1.937226277372263e-05, "loss": 0.6122, "step": 22308 }, { "epoch": 0.6513386470468016, "grad_norm": 0.584554450757406, "learning_rate": 1.937064071370641e-05, "loss": 0.6841, "step": 22309 }, { "epoch": 0.6513678432746489, "grad_norm": 0.5053623411627359, "learning_rate": 1.936901865369019e-05, "loss": 0.5527, "step": 22310 }, { "epoch": 0.6513970395024963, "grad_norm": 0.5238320089352035, "learning_rate": 1.9367396593673967e-05, "loss": 0.5988, "step": 22311 }, { "epoch": 0.6514262357303436, "grad_norm": 0.5018454451611233, "learning_rate": 1.9365774533657746e-05, "loss": 0.5465, "step": 22312 }, { "epoch": 0.651455431958191, "grad_norm": 0.518301680595346, "learning_rate": 1.9364152473641524e-05, "loss": 0.6253, "step": 22313 }, { "epoch": 0.6514846281860384, "grad_norm": 0.5468929460733889, "learning_rate": 1.9362530413625306e-05, "loss": 0.6214, "step": 22314 }, { "epoch": 0.6515138244138857, "grad_norm": 0.5243131776850886, "learning_rate": 1.9360908353609084e-05, "loss": 0.5832, "step": 22315 }, { "epoch": 0.6515430206417331, "grad_norm": 0.49502497431437725, "learning_rate": 1.9359286293592863e-05, "loss": 0.5631, "step": 22316 }, { "epoch": 0.6515722168695804, "grad_norm": 0.5871831660563341, "learning_rate": 1.9357664233576644e-05, "loss": 0.6184, "step": 22317 }, { "epoch": 0.6516014130974278, "grad_norm": 0.5084442268849891, "learning_rate": 1.9356042173560423e-05, "loss": 0.6268, "step": 22318 }, { "epoch": 0.6516306093252752, "grad_norm": 0.5331271857893246, "learning_rate": 1.93544201135442e-05, "loss": 0.6446, "step": 22319 }, { "epoch": 0.6516598055531225, "grad_norm": 0.5495487122240617, "learning_rate": 1.9352798053527983e-05, "loss": 0.6898, "step": 22320 }, { "epoch": 0.6516890017809699, "grad_norm": 0.5397014094570496, "learning_rate": 1.935117599351176e-05, "loss": 0.6788, "step": 22321 }, { "epoch": 0.6517181980088173, "grad_norm": 0.5387899666341297, "learning_rate": 1.934955393349554e-05, "loss": 0.6569, "step": 22322 }, { "epoch": 0.6517473942366646, "grad_norm": 0.4891801321411557, "learning_rate": 1.9347931873479318e-05, "loss": 0.5257, "step": 22323 }, { "epoch": 0.651776590464512, "grad_norm": 0.513517623923161, "learning_rate": 1.93463098134631e-05, "loss": 0.5989, "step": 22324 }, { "epoch": 0.6518057866923593, "grad_norm": 0.5758255241037524, "learning_rate": 1.9344687753446878e-05, "loss": 0.6021, "step": 22325 }, { "epoch": 0.6518349829202067, "grad_norm": 0.5096917634459762, "learning_rate": 1.934306569343066e-05, "loss": 0.6078, "step": 22326 }, { "epoch": 0.6518641791480541, "grad_norm": 0.5449628624692507, "learning_rate": 1.9341443633414438e-05, "loss": 0.6536, "step": 22327 }, { "epoch": 0.6518933753759014, "grad_norm": 0.5314945600488571, "learning_rate": 1.9339821573398217e-05, "loss": 0.6574, "step": 22328 }, { "epoch": 0.6519225716037488, "grad_norm": 0.5286852004637156, "learning_rate": 1.9338199513381995e-05, "loss": 0.6271, "step": 22329 }, { "epoch": 0.6519517678315961, "grad_norm": 0.5504029760759609, "learning_rate": 1.9336577453365777e-05, "loss": 0.6406, "step": 22330 }, { "epoch": 0.6519809640594435, "grad_norm": 0.49415644918112417, "learning_rate": 1.9334955393349555e-05, "loss": 0.5441, "step": 22331 }, { "epoch": 0.6520101602872909, "grad_norm": 0.5329127633173384, "learning_rate": 1.9333333333333333e-05, "loss": 0.6175, "step": 22332 }, { "epoch": 0.6520393565151382, "grad_norm": 0.4764016741381541, "learning_rate": 1.9331711273317112e-05, "loss": 0.5032, "step": 22333 }, { "epoch": 0.6520685527429856, "grad_norm": 0.5170599457489352, "learning_rate": 1.9330089213300894e-05, "loss": 0.6132, "step": 22334 }, { "epoch": 0.652097748970833, "grad_norm": 0.5514475866782844, "learning_rate": 1.9328467153284672e-05, "loss": 0.6383, "step": 22335 }, { "epoch": 0.6521269451986803, "grad_norm": 0.5200803126389021, "learning_rate": 1.9326845093268454e-05, "loss": 0.5638, "step": 22336 }, { "epoch": 0.6521561414265277, "grad_norm": 0.5168386747169242, "learning_rate": 1.9325223033252232e-05, "loss": 0.6229, "step": 22337 }, { "epoch": 0.652185337654375, "grad_norm": 0.5263157825127811, "learning_rate": 1.932360097323601e-05, "loss": 0.6293, "step": 22338 }, { "epoch": 0.6522145338822224, "grad_norm": 0.5329587085717912, "learning_rate": 1.932197891321979e-05, "loss": 0.6054, "step": 22339 }, { "epoch": 0.6522437301100698, "grad_norm": 0.5610167734412145, "learning_rate": 1.932035685320357e-05, "loss": 0.6688, "step": 22340 }, { "epoch": 0.6522729263379171, "grad_norm": 0.5292273236705604, "learning_rate": 1.931873479318735e-05, "loss": 0.616, "step": 22341 }, { "epoch": 0.6523021225657645, "grad_norm": 0.5107838923814374, "learning_rate": 1.9317112733171127e-05, "loss": 0.5668, "step": 22342 }, { "epoch": 0.6523313187936118, "grad_norm": 0.4665158674958188, "learning_rate": 1.9315490673154906e-05, "loss": 0.5136, "step": 22343 }, { "epoch": 0.6523605150214592, "grad_norm": 0.5669243010661511, "learning_rate": 1.9313868613138687e-05, "loss": 0.5997, "step": 22344 }, { "epoch": 0.6523897112493066, "grad_norm": 0.49940447333446375, "learning_rate": 1.931224655312247e-05, "loss": 0.563, "step": 22345 }, { "epoch": 0.6524189074771539, "grad_norm": 0.5521211943591041, "learning_rate": 1.9310624493106248e-05, "loss": 0.6807, "step": 22346 }, { "epoch": 0.6524481037050013, "grad_norm": 0.5078247011060871, "learning_rate": 1.9309002433090026e-05, "loss": 0.578, "step": 22347 }, { "epoch": 0.6524772999328486, "grad_norm": 0.5084422149101822, "learning_rate": 1.9307380373073804e-05, "loss": 0.5728, "step": 22348 }, { "epoch": 0.652506496160696, "grad_norm": 0.6471979538608846, "learning_rate": 1.9305758313057583e-05, "loss": 0.6243, "step": 22349 }, { "epoch": 0.6525356923885434, "grad_norm": 0.4885104149130079, "learning_rate": 1.9304136253041365e-05, "loss": 0.5329, "step": 22350 }, { "epoch": 0.6525648886163907, "grad_norm": 0.5121820315378383, "learning_rate": 1.9302514193025143e-05, "loss": 0.594, "step": 22351 }, { "epoch": 0.6525940848442381, "grad_norm": 0.5059616915334966, "learning_rate": 1.930089213300892e-05, "loss": 0.5201, "step": 22352 }, { "epoch": 0.6526232810720854, "grad_norm": 0.5124546633495292, "learning_rate": 1.92992700729927e-05, "loss": 0.5922, "step": 22353 }, { "epoch": 0.6526524772999328, "grad_norm": 0.5703303805759358, "learning_rate": 1.929764801297648e-05, "loss": 0.7029, "step": 22354 }, { "epoch": 0.6526816735277802, "grad_norm": 0.5123296025034174, "learning_rate": 1.9296025952960263e-05, "loss": 0.5484, "step": 22355 }, { "epoch": 0.6527108697556275, "grad_norm": 0.5186084037428859, "learning_rate": 1.929440389294404e-05, "loss": 0.5636, "step": 22356 }, { "epoch": 0.6527400659834749, "grad_norm": 0.5200780148041967, "learning_rate": 1.929278183292782e-05, "loss": 0.593, "step": 22357 }, { "epoch": 0.6527692622113223, "grad_norm": 0.5138693614005112, "learning_rate": 1.9291159772911598e-05, "loss": 0.5848, "step": 22358 }, { "epoch": 0.6527984584391696, "grad_norm": 0.5339182239238297, "learning_rate": 1.9289537712895377e-05, "loss": 0.6154, "step": 22359 }, { "epoch": 0.652827654667017, "grad_norm": 0.4854559836468401, "learning_rate": 1.928791565287916e-05, "loss": 0.5443, "step": 22360 }, { "epoch": 0.6528568508948643, "grad_norm": 0.5216500353961812, "learning_rate": 1.9286293592862937e-05, "loss": 0.6386, "step": 22361 }, { "epoch": 0.6528860471227117, "grad_norm": 0.5084826763293124, "learning_rate": 1.9284671532846715e-05, "loss": 0.5595, "step": 22362 }, { "epoch": 0.6529152433505591, "grad_norm": 0.5711760649421297, "learning_rate": 1.9283049472830494e-05, "loss": 0.6843, "step": 22363 }, { "epoch": 0.6529444395784064, "grad_norm": 0.5109156881657251, "learning_rate": 1.9281427412814275e-05, "loss": 0.5669, "step": 22364 }, { "epoch": 0.6529736358062538, "grad_norm": 0.5709317045213123, "learning_rate": 1.9279805352798057e-05, "loss": 0.6507, "step": 22365 }, { "epoch": 0.6530028320341011, "grad_norm": 0.5107831693668733, "learning_rate": 1.9278183292781835e-05, "loss": 0.5597, "step": 22366 }, { "epoch": 0.6530320282619485, "grad_norm": 0.5176092943408764, "learning_rate": 1.9276561232765614e-05, "loss": 0.5873, "step": 22367 }, { "epoch": 0.6530612244897959, "grad_norm": 0.6043191424192664, "learning_rate": 1.9274939172749392e-05, "loss": 0.6189, "step": 22368 }, { "epoch": 0.6530904207176432, "grad_norm": 0.47938799797131787, "learning_rate": 1.927331711273317e-05, "loss": 0.5373, "step": 22369 }, { "epoch": 0.6531196169454906, "grad_norm": 0.531232916360322, "learning_rate": 1.9271695052716952e-05, "loss": 0.6401, "step": 22370 }, { "epoch": 0.653148813173338, "grad_norm": 0.48224460189697277, "learning_rate": 1.927007299270073e-05, "loss": 0.5548, "step": 22371 }, { "epoch": 0.6531780094011854, "grad_norm": 0.5314485561773649, "learning_rate": 1.926845093268451e-05, "loss": 0.6333, "step": 22372 }, { "epoch": 0.6532072056290328, "grad_norm": 0.5617017702926794, "learning_rate": 1.9266828872668287e-05, "loss": 0.6886, "step": 22373 }, { "epoch": 0.6532364018568801, "grad_norm": 0.5204758753789898, "learning_rate": 1.926520681265207e-05, "loss": 0.6263, "step": 22374 }, { "epoch": 0.6532655980847275, "grad_norm": 0.5500510298872888, "learning_rate": 1.926358475263585e-05, "loss": 0.6221, "step": 22375 }, { "epoch": 0.6532947943125749, "grad_norm": 0.533542854590666, "learning_rate": 1.926196269261963e-05, "loss": 0.6329, "step": 22376 }, { "epoch": 0.6533239905404222, "grad_norm": 0.5290656274636205, "learning_rate": 1.9260340632603408e-05, "loss": 0.578, "step": 22377 }, { "epoch": 0.6533531867682696, "grad_norm": 0.511340458789175, "learning_rate": 1.9258718572587186e-05, "loss": 0.5642, "step": 22378 }, { "epoch": 0.653382382996117, "grad_norm": 0.5095817122267302, "learning_rate": 1.9257096512570964e-05, "loss": 0.61, "step": 22379 }, { "epoch": 0.6534115792239643, "grad_norm": 0.5391565464700432, "learning_rate": 1.9255474452554746e-05, "loss": 0.6608, "step": 22380 }, { "epoch": 0.6534407754518117, "grad_norm": 0.5256220983692884, "learning_rate": 1.9253852392538525e-05, "loss": 0.6265, "step": 22381 }, { "epoch": 0.653469971679659, "grad_norm": 0.513897868042354, "learning_rate": 1.9252230332522303e-05, "loss": 0.5678, "step": 22382 }, { "epoch": 0.6534991679075064, "grad_norm": 0.5059801332011556, "learning_rate": 1.9250608272506085e-05, "loss": 0.5727, "step": 22383 }, { "epoch": 0.6535283641353538, "grad_norm": 0.5166377049270249, "learning_rate": 1.9248986212489863e-05, "loss": 0.6045, "step": 22384 }, { "epoch": 0.6535575603632011, "grad_norm": 0.525373961700164, "learning_rate": 1.924736415247364e-05, "loss": 0.6314, "step": 22385 }, { "epoch": 0.6535867565910485, "grad_norm": 0.5464793794235041, "learning_rate": 1.9245742092457423e-05, "loss": 0.6692, "step": 22386 }, { "epoch": 0.6536159528188958, "grad_norm": 0.6122079476764231, "learning_rate": 1.92441200324412e-05, "loss": 0.6427, "step": 22387 }, { "epoch": 0.6536451490467432, "grad_norm": 0.49199351470944935, "learning_rate": 1.924249797242498e-05, "loss": 0.5547, "step": 22388 }, { "epoch": 0.6536743452745906, "grad_norm": 0.4882746823235648, "learning_rate": 1.924087591240876e-05, "loss": 0.5561, "step": 22389 }, { "epoch": 0.6537035415024379, "grad_norm": 0.5218264376772568, "learning_rate": 1.923925385239254e-05, "loss": 0.6151, "step": 22390 }, { "epoch": 0.6537327377302853, "grad_norm": 0.5203816686241669, "learning_rate": 1.923763179237632e-05, "loss": 0.5849, "step": 22391 }, { "epoch": 0.6537619339581326, "grad_norm": 0.5423769270728593, "learning_rate": 1.92360097323601e-05, "loss": 0.6092, "step": 22392 }, { "epoch": 0.65379113018598, "grad_norm": 0.5348121198874448, "learning_rate": 1.923438767234388e-05, "loss": 0.62, "step": 22393 }, { "epoch": 0.6538203264138274, "grad_norm": 0.5449962999048839, "learning_rate": 1.9232765612327657e-05, "loss": 0.6403, "step": 22394 }, { "epoch": 0.6538495226416747, "grad_norm": 0.4349324342219729, "learning_rate": 1.9231143552311435e-05, "loss": 0.4539, "step": 22395 }, { "epoch": 0.6538787188695221, "grad_norm": 0.47490349697761713, "learning_rate": 1.9229521492295217e-05, "loss": 0.5066, "step": 22396 }, { "epoch": 0.6539079150973695, "grad_norm": 0.5735824364067056, "learning_rate": 1.9227899432278996e-05, "loss": 0.6189, "step": 22397 }, { "epoch": 0.6539371113252168, "grad_norm": 0.5578028384030512, "learning_rate": 1.9226277372262774e-05, "loss": 0.6286, "step": 22398 }, { "epoch": 0.6539663075530642, "grad_norm": 0.5034018246035469, "learning_rate": 1.9224655312246552e-05, "loss": 0.5805, "step": 22399 }, { "epoch": 0.6539955037809115, "grad_norm": 0.5221615212546492, "learning_rate": 1.9223033252230334e-05, "loss": 0.5893, "step": 22400 }, { "epoch": 0.6540247000087589, "grad_norm": 0.5089101663353159, "learning_rate": 1.9221411192214112e-05, "loss": 0.5797, "step": 22401 }, { "epoch": 0.6540538962366063, "grad_norm": 0.5202589611002326, "learning_rate": 1.9219789132197894e-05, "loss": 0.5997, "step": 22402 }, { "epoch": 0.6540830924644536, "grad_norm": 0.5467351920846193, "learning_rate": 1.9218167072181673e-05, "loss": 0.6524, "step": 22403 }, { "epoch": 0.654112288692301, "grad_norm": 0.5014542296024782, "learning_rate": 1.921654501216545e-05, "loss": 0.542, "step": 22404 }, { "epoch": 0.6541414849201483, "grad_norm": 0.5535457513742743, "learning_rate": 1.921492295214923e-05, "loss": 0.667, "step": 22405 }, { "epoch": 0.6541706811479957, "grad_norm": 0.5199651801424634, "learning_rate": 1.921330089213301e-05, "loss": 0.5631, "step": 22406 }, { "epoch": 0.6541998773758431, "grad_norm": 0.526311621598369, "learning_rate": 1.921167883211679e-05, "loss": 0.5883, "step": 22407 }, { "epoch": 0.6542290736036904, "grad_norm": 0.5199086405638013, "learning_rate": 1.9210056772100568e-05, "loss": 0.5855, "step": 22408 }, { "epoch": 0.6542582698315378, "grad_norm": 0.4917667543096736, "learning_rate": 1.9208434712084346e-05, "loss": 0.5079, "step": 22409 }, { "epoch": 0.6542874660593851, "grad_norm": 0.5642147947884393, "learning_rate": 1.9206812652068128e-05, "loss": 0.6886, "step": 22410 }, { "epoch": 0.6543166622872325, "grad_norm": 0.504400451429466, "learning_rate": 1.920519059205191e-05, "loss": 0.5818, "step": 22411 }, { "epoch": 0.6543458585150799, "grad_norm": 0.5403773056337556, "learning_rate": 1.9203568532035688e-05, "loss": 0.5929, "step": 22412 }, { "epoch": 0.6543750547429272, "grad_norm": 0.5330868563275092, "learning_rate": 1.9201946472019466e-05, "loss": 0.5728, "step": 22413 }, { "epoch": 0.6544042509707746, "grad_norm": 0.5134947243394092, "learning_rate": 1.9200324412003245e-05, "loss": 0.6172, "step": 22414 }, { "epoch": 0.654433447198622, "grad_norm": 0.4860552733108361, "learning_rate": 1.9198702351987023e-05, "loss": 0.5454, "step": 22415 }, { "epoch": 0.6544626434264693, "grad_norm": 0.47063948876496453, "learning_rate": 1.9197080291970805e-05, "loss": 0.512, "step": 22416 }, { "epoch": 0.6544918396543167, "grad_norm": 0.48658491450628644, "learning_rate": 1.9195458231954583e-05, "loss": 0.5535, "step": 22417 }, { "epoch": 0.654521035882164, "grad_norm": 0.48487638004748274, "learning_rate": 1.9193836171938362e-05, "loss": 0.5376, "step": 22418 }, { "epoch": 0.6545502321100114, "grad_norm": 0.5144855431222356, "learning_rate": 1.919221411192214e-05, "loss": 0.6081, "step": 22419 }, { "epoch": 0.6545794283378588, "grad_norm": 0.5685890149194284, "learning_rate": 1.919059205190592e-05, "loss": 0.6734, "step": 22420 }, { "epoch": 0.6546086245657061, "grad_norm": 0.5510735266982271, "learning_rate": 1.9188969991889704e-05, "loss": 0.6173, "step": 22421 }, { "epoch": 0.6546378207935535, "grad_norm": 0.46355606673318106, "learning_rate": 1.9187347931873482e-05, "loss": 0.512, "step": 22422 }, { "epoch": 0.6546670170214008, "grad_norm": 0.5162433823495162, "learning_rate": 1.918572587185726e-05, "loss": 0.6067, "step": 22423 }, { "epoch": 0.6546962132492482, "grad_norm": 0.4845267912455798, "learning_rate": 1.918410381184104e-05, "loss": 0.5579, "step": 22424 }, { "epoch": 0.6547254094770956, "grad_norm": 0.48787496362556954, "learning_rate": 1.9182481751824817e-05, "loss": 0.5631, "step": 22425 }, { "epoch": 0.6547546057049429, "grad_norm": 0.49960288942276243, "learning_rate": 1.91808596918086e-05, "loss": 0.5782, "step": 22426 }, { "epoch": 0.6547838019327903, "grad_norm": 0.5662550229628841, "learning_rate": 1.9179237631792377e-05, "loss": 0.6485, "step": 22427 }, { "epoch": 0.6548129981606376, "grad_norm": 0.50479005750971, "learning_rate": 1.9177615571776156e-05, "loss": 0.5288, "step": 22428 }, { "epoch": 0.654842194388485, "grad_norm": 0.5591391014172578, "learning_rate": 1.9175993511759934e-05, "loss": 0.6899, "step": 22429 }, { "epoch": 0.6548713906163324, "grad_norm": 0.5185293230556767, "learning_rate": 1.9174371451743716e-05, "loss": 0.6007, "step": 22430 }, { "epoch": 0.6549005868441797, "grad_norm": 0.5652634072439822, "learning_rate": 1.9172749391727497e-05, "loss": 0.6723, "step": 22431 }, { "epoch": 0.6549297830720271, "grad_norm": 0.5265494339344288, "learning_rate": 1.9171127331711276e-05, "loss": 0.6019, "step": 22432 }, { "epoch": 0.6549589792998745, "grad_norm": 0.5239043022048239, "learning_rate": 1.9169505271695054e-05, "loss": 0.63, "step": 22433 }, { "epoch": 0.6549881755277218, "grad_norm": 0.5052100897339958, "learning_rate": 1.9167883211678833e-05, "loss": 0.5729, "step": 22434 }, { "epoch": 0.6550173717555692, "grad_norm": 0.5185485233080555, "learning_rate": 1.916626115166261e-05, "loss": 0.589, "step": 22435 }, { "epoch": 0.6550465679834165, "grad_norm": 0.5239867037562543, "learning_rate": 1.9164639091646393e-05, "loss": 0.5943, "step": 22436 }, { "epoch": 0.6550757642112639, "grad_norm": 0.5330240521697435, "learning_rate": 1.916301703163017e-05, "loss": 0.6412, "step": 22437 }, { "epoch": 0.6551049604391113, "grad_norm": 0.4889794014347023, "learning_rate": 1.916139497161395e-05, "loss": 0.5676, "step": 22438 }, { "epoch": 0.6551341566669586, "grad_norm": 0.5615087262479417, "learning_rate": 1.9159772911597728e-05, "loss": 0.6732, "step": 22439 }, { "epoch": 0.655163352894806, "grad_norm": 0.538164859713279, "learning_rate": 1.915815085158151e-05, "loss": 0.6172, "step": 22440 }, { "epoch": 0.6551925491226533, "grad_norm": 0.5151323583840611, "learning_rate": 1.9156528791565288e-05, "loss": 0.5655, "step": 22441 }, { "epoch": 0.6552217453505007, "grad_norm": 0.5051971560322643, "learning_rate": 1.915490673154907e-05, "loss": 0.5603, "step": 22442 }, { "epoch": 0.6552509415783481, "grad_norm": 0.49497957288267663, "learning_rate": 1.9153284671532848e-05, "loss": 0.5709, "step": 22443 }, { "epoch": 0.6552801378061954, "grad_norm": 0.5125504862269892, "learning_rate": 1.9151662611516627e-05, "loss": 0.5852, "step": 22444 }, { "epoch": 0.6553093340340428, "grad_norm": 0.5428809671322792, "learning_rate": 1.9150040551500405e-05, "loss": 0.6273, "step": 22445 }, { "epoch": 0.6553385302618902, "grad_norm": 0.5140032006828078, "learning_rate": 1.9148418491484187e-05, "loss": 0.5897, "step": 22446 }, { "epoch": 0.6553677264897375, "grad_norm": 0.5079566571737758, "learning_rate": 1.9146796431467965e-05, "loss": 0.5724, "step": 22447 }, { "epoch": 0.6553969227175849, "grad_norm": 0.49922490822332277, "learning_rate": 1.9145174371451743e-05, "loss": 0.5806, "step": 22448 }, { "epoch": 0.6554261189454322, "grad_norm": 0.47474320576967877, "learning_rate": 1.9143552311435525e-05, "loss": 0.536, "step": 22449 }, { "epoch": 0.6554553151732796, "grad_norm": 0.5143285743866495, "learning_rate": 1.9141930251419304e-05, "loss": 0.5627, "step": 22450 }, { "epoch": 0.655484511401127, "grad_norm": 0.559040543511688, "learning_rate": 1.9140308191403082e-05, "loss": 0.6063, "step": 22451 }, { "epoch": 0.6555137076289743, "grad_norm": 0.4966618047211823, "learning_rate": 1.9138686131386864e-05, "loss": 0.5608, "step": 22452 }, { "epoch": 0.6555429038568217, "grad_norm": 0.5015470872309822, "learning_rate": 1.9137064071370642e-05, "loss": 0.5774, "step": 22453 }, { "epoch": 0.655572100084669, "grad_norm": 0.5102322093735504, "learning_rate": 1.913544201135442e-05, "loss": 0.5646, "step": 22454 }, { "epoch": 0.6556012963125164, "grad_norm": 0.5536843044908845, "learning_rate": 1.91338199513382e-05, "loss": 0.6802, "step": 22455 }, { "epoch": 0.6556304925403638, "grad_norm": 0.5260272532456077, "learning_rate": 1.913219789132198e-05, "loss": 0.6127, "step": 22456 }, { "epoch": 0.6556596887682111, "grad_norm": 0.5245072769912982, "learning_rate": 1.913057583130576e-05, "loss": 0.5952, "step": 22457 }, { "epoch": 0.6556888849960585, "grad_norm": 0.5087089334216219, "learning_rate": 1.9128953771289537e-05, "loss": 0.5999, "step": 22458 }, { "epoch": 0.6557180812239058, "grad_norm": 0.5150523709607271, "learning_rate": 1.912733171127332e-05, "loss": 0.5496, "step": 22459 }, { "epoch": 0.6557472774517532, "grad_norm": 0.48622151477444037, "learning_rate": 1.9125709651257097e-05, "loss": 0.5193, "step": 22460 }, { "epoch": 0.6557764736796006, "grad_norm": 0.5165179725027513, "learning_rate": 1.9124087591240876e-05, "loss": 0.5923, "step": 22461 }, { "epoch": 0.6558056699074479, "grad_norm": 0.5128459291065991, "learning_rate": 1.9122465531224658e-05, "loss": 0.6035, "step": 22462 }, { "epoch": 0.6558348661352953, "grad_norm": 0.5246449158760075, "learning_rate": 1.9120843471208436e-05, "loss": 0.6317, "step": 22463 }, { "epoch": 0.6558640623631427, "grad_norm": 0.5841248003651179, "learning_rate": 1.9119221411192214e-05, "loss": 0.6712, "step": 22464 }, { "epoch": 0.65589325859099, "grad_norm": 0.49366059490748065, "learning_rate": 1.9117599351175993e-05, "loss": 0.5536, "step": 22465 }, { "epoch": 0.6559224548188374, "grad_norm": 0.5212462315750953, "learning_rate": 1.9115977291159774e-05, "loss": 0.6043, "step": 22466 }, { "epoch": 0.6559516510466847, "grad_norm": 0.5194358161058935, "learning_rate": 1.9114355231143553e-05, "loss": 0.5538, "step": 22467 }, { "epoch": 0.6559808472745321, "grad_norm": 0.528066819534464, "learning_rate": 1.9112733171127335e-05, "loss": 0.5695, "step": 22468 }, { "epoch": 0.6560100435023795, "grad_norm": 0.4694344385501695, "learning_rate": 1.9111111111111113e-05, "loss": 0.5073, "step": 22469 }, { "epoch": 0.6560392397302268, "grad_norm": 0.5113630271674754, "learning_rate": 1.910948905109489e-05, "loss": 0.5955, "step": 22470 }, { "epoch": 0.6560684359580742, "grad_norm": 0.5708652018969428, "learning_rate": 1.910786699107867e-05, "loss": 0.5542, "step": 22471 }, { "epoch": 0.6560976321859215, "grad_norm": 0.5462053908225173, "learning_rate": 1.910624493106245e-05, "loss": 0.6597, "step": 22472 }, { "epoch": 0.6561268284137689, "grad_norm": 0.5038923212331708, "learning_rate": 1.910462287104623e-05, "loss": 0.5697, "step": 22473 }, { "epoch": 0.6561560246416163, "grad_norm": 0.5210924221337884, "learning_rate": 1.9103000811030008e-05, "loss": 0.6001, "step": 22474 }, { "epoch": 0.6561852208694636, "grad_norm": 0.48569873703578537, "learning_rate": 1.9101378751013787e-05, "loss": 0.5329, "step": 22475 }, { "epoch": 0.656214417097311, "grad_norm": 0.50449105752107, "learning_rate": 1.909975669099757e-05, "loss": 0.5646, "step": 22476 }, { "epoch": 0.6562436133251583, "grad_norm": 0.5218292489689969, "learning_rate": 1.909813463098135e-05, "loss": 0.5673, "step": 22477 }, { "epoch": 0.6562728095530057, "grad_norm": 0.502328088838963, "learning_rate": 1.909651257096513e-05, "loss": 0.6032, "step": 22478 }, { "epoch": 0.6563020057808531, "grad_norm": 0.5296915886884549, "learning_rate": 1.9094890510948907e-05, "loss": 0.6078, "step": 22479 }, { "epoch": 0.6563312020087004, "grad_norm": 0.4721135611960381, "learning_rate": 1.9093268450932685e-05, "loss": 0.5393, "step": 22480 }, { "epoch": 0.6563603982365478, "grad_norm": 0.5323262139251338, "learning_rate": 1.9091646390916464e-05, "loss": 0.633, "step": 22481 }, { "epoch": 0.6563895944643952, "grad_norm": 0.5406067173142018, "learning_rate": 1.9090024330900245e-05, "loss": 0.6185, "step": 22482 }, { "epoch": 0.6564187906922425, "grad_norm": 0.46431022358280877, "learning_rate": 1.9088402270884024e-05, "loss": 0.4944, "step": 22483 }, { "epoch": 0.6564479869200899, "grad_norm": 0.502988130472566, "learning_rate": 1.9086780210867802e-05, "loss": 0.5978, "step": 22484 }, { "epoch": 0.6564771831479372, "grad_norm": 0.5046115160046095, "learning_rate": 1.908515815085158e-05, "loss": 0.5658, "step": 22485 }, { "epoch": 0.6565063793757846, "grad_norm": 0.5524755910320077, "learning_rate": 1.908353609083536e-05, "loss": 0.6855, "step": 22486 }, { "epoch": 0.656535575603632, "grad_norm": 0.49184141943377596, "learning_rate": 1.9081914030819144e-05, "loss": 0.5417, "step": 22487 }, { "epoch": 0.6565647718314793, "grad_norm": 0.5306174861635443, "learning_rate": 1.9080291970802922e-05, "loss": 0.617, "step": 22488 }, { "epoch": 0.6565939680593267, "grad_norm": 0.5014966414532803, "learning_rate": 1.90786699107867e-05, "loss": 0.5813, "step": 22489 }, { "epoch": 0.656623164287174, "grad_norm": 0.4953958895384083, "learning_rate": 1.907704785077048e-05, "loss": 0.5127, "step": 22490 }, { "epoch": 0.6566523605150214, "grad_norm": 0.5144886933760168, "learning_rate": 1.9075425790754258e-05, "loss": 0.5182, "step": 22491 }, { "epoch": 0.6566815567428688, "grad_norm": 0.5522679688668322, "learning_rate": 1.907380373073804e-05, "loss": 0.6457, "step": 22492 }, { "epoch": 0.6567107529707162, "grad_norm": 0.5024860727716681, "learning_rate": 1.9072181670721818e-05, "loss": 0.5916, "step": 22493 }, { "epoch": 0.6567399491985636, "grad_norm": 0.5150022813945307, "learning_rate": 1.9070559610705596e-05, "loss": 0.5825, "step": 22494 }, { "epoch": 0.656769145426411, "grad_norm": 0.541065109331154, "learning_rate": 1.9068937550689374e-05, "loss": 0.6022, "step": 22495 }, { "epoch": 0.6567983416542583, "grad_norm": 0.4978793933674769, "learning_rate": 1.9067315490673156e-05, "loss": 0.4904, "step": 22496 }, { "epoch": 0.6568275378821057, "grad_norm": 0.49060459125299766, "learning_rate": 1.9065693430656938e-05, "loss": 0.5406, "step": 22497 }, { "epoch": 0.656856734109953, "grad_norm": 0.5284239315289118, "learning_rate": 1.9064071370640716e-05, "loss": 0.6019, "step": 22498 }, { "epoch": 0.6568859303378004, "grad_norm": 0.5216870230174893, "learning_rate": 1.9062449310624495e-05, "loss": 0.5777, "step": 22499 }, { "epoch": 0.6569151265656478, "grad_norm": 0.5185102599748311, "learning_rate": 1.9060827250608273e-05, "loss": 0.5951, "step": 22500 }, { "epoch": 0.6569443227934951, "grad_norm": 0.5303795927904376, "learning_rate": 1.905920519059205e-05, "loss": 0.6194, "step": 22501 }, { "epoch": 0.6569735190213425, "grad_norm": 0.5354655700088397, "learning_rate": 1.9057583130575833e-05, "loss": 0.6017, "step": 22502 }, { "epoch": 0.6570027152491899, "grad_norm": 0.5050999743250207, "learning_rate": 1.905596107055961e-05, "loss": 0.539, "step": 22503 }, { "epoch": 0.6570319114770372, "grad_norm": 0.4937851270747392, "learning_rate": 1.905433901054339e-05, "loss": 0.5153, "step": 22504 }, { "epoch": 0.6570611077048846, "grad_norm": 0.579871459513608, "learning_rate": 1.905271695052717e-05, "loss": 0.6754, "step": 22505 }, { "epoch": 0.6570903039327319, "grad_norm": 0.5419191528493447, "learning_rate": 1.905109489051095e-05, "loss": 0.6124, "step": 22506 }, { "epoch": 0.6571195001605793, "grad_norm": 0.5041167199267368, "learning_rate": 1.904947283049473e-05, "loss": 0.5324, "step": 22507 }, { "epoch": 0.6571486963884267, "grad_norm": 0.5221511777374108, "learning_rate": 1.904785077047851e-05, "loss": 0.5694, "step": 22508 }, { "epoch": 0.657177892616274, "grad_norm": 0.5107136202799899, "learning_rate": 1.904622871046229e-05, "loss": 0.5821, "step": 22509 }, { "epoch": 0.6572070888441214, "grad_norm": 0.5101461759042806, "learning_rate": 1.9044606650446067e-05, "loss": 0.5637, "step": 22510 }, { "epoch": 0.6572362850719687, "grad_norm": 0.49460363522222434, "learning_rate": 1.9042984590429845e-05, "loss": 0.5327, "step": 22511 }, { "epoch": 0.6572654812998161, "grad_norm": 0.5204387532513881, "learning_rate": 1.9041362530413627e-05, "loss": 0.5784, "step": 22512 }, { "epoch": 0.6572946775276635, "grad_norm": 0.536726844414156, "learning_rate": 1.9039740470397405e-05, "loss": 0.5828, "step": 22513 }, { "epoch": 0.6573238737555108, "grad_norm": 0.48190112135710417, "learning_rate": 1.9038118410381184e-05, "loss": 0.5148, "step": 22514 }, { "epoch": 0.6573530699833582, "grad_norm": 0.5437401897909978, "learning_rate": 1.9036496350364966e-05, "loss": 0.6327, "step": 22515 }, { "epoch": 0.6573822662112055, "grad_norm": 0.5651810009814793, "learning_rate": 1.9034874290348744e-05, "loss": 0.6637, "step": 22516 }, { "epoch": 0.6574114624390529, "grad_norm": 0.4829931506589095, "learning_rate": 1.9033252230332522e-05, "loss": 0.4991, "step": 22517 }, { "epoch": 0.6574406586669003, "grad_norm": 0.4819648808167279, "learning_rate": 1.9031630170316304e-05, "loss": 0.5232, "step": 22518 }, { "epoch": 0.6574698548947476, "grad_norm": 0.5083970477378942, "learning_rate": 1.9030008110300082e-05, "loss": 0.5885, "step": 22519 }, { "epoch": 0.657499051122595, "grad_norm": 0.480388356056426, "learning_rate": 1.902838605028386e-05, "loss": 0.5502, "step": 22520 }, { "epoch": 0.6575282473504424, "grad_norm": 0.581520698109413, "learning_rate": 1.902676399026764e-05, "loss": 0.6483, "step": 22521 }, { "epoch": 0.6575574435782897, "grad_norm": 0.5392078460204328, "learning_rate": 1.902514193025142e-05, "loss": 0.6863, "step": 22522 }, { "epoch": 0.6575866398061371, "grad_norm": 0.5260641131020033, "learning_rate": 1.90235198702352e-05, "loss": 0.593, "step": 22523 }, { "epoch": 0.6576158360339844, "grad_norm": 0.5282256233085643, "learning_rate": 1.9021897810218978e-05, "loss": 0.604, "step": 22524 }, { "epoch": 0.6576450322618318, "grad_norm": 0.5405882650202402, "learning_rate": 1.902027575020276e-05, "loss": 0.5976, "step": 22525 }, { "epoch": 0.6576742284896792, "grad_norm": 0.5098318635754305, "learning_rate": 1.9018653690186538e-05, "loss": 0.5943, "step": 22526 }, { "epoch": 0.6577034247175265, "grad_norm": 0.5236608569143515, "learning_rate": 1.9017031630170316e-05, "loss": 0.5901, "step": 22527 }, { "epoch": 0.6577326209453739, "grad_norm": 0.49004195638565223, "learning_rate": 1.9015409570154098e-05, "loss": 0.5542, "step": 22528 }, { "epoch": 0.6577618171732212, "grad_norm": 0.5624884697362977, "learning_rate": 1.9013787510137876e-05, "loss": 0.6676, "step": 22529 }, { "epoch": 0.6577910134010686, "grad_norm": 0.5258667548000618, "learning_rate": 1.9012165450121655e-05, "loss": 0.6182, "step": 22530 }, { "epoch": 0.657820209628916, "grad_norm": 0.5174318276581663, "learning_rate": 1.9010543390105433e-05, "loss": 0.5901, "step": 22531 }, { "epoch": 0.6578494058567633, "grad_norm": 0.5449260578566651, "learning_rate": 1.9008921330089215e-05, "loss": 0.6447, "step": 22532 }, { "epoch": 0.6578786020846107, "grad_norm": 0.5218778168439872, "learning_rate": 1.9007299270072993e-05, "loss": 0.6057, "step": 22533 }, { "epoch": 0.657907798312458, "grad_norm": 0.4892670535773346, "learning_rate": 1.9005677210056775e-05, "loss": 0.5582, "step": 22534 }, { "epoch": 0.6579369945403054, "grad_norm": 0.5166313894755536, "learning_rate": 1.9004055150040553e-05, "loss": 0.6, "step": 22535 }, { "epoch": 0.6579661907681528, "grad_norm": 0.5206653499846349, "learning_rate": 1.9002433090024332e-05, "loss": 0.586, "step": 22536 }, { "epoch": 0.6579953869960001, "grad_norm": 0.49139183916234974, "learning_rate": 1.900081103000811e-05, "loss": 0.5329, "step": 22537 }, { "epoch": 0.6580245832238475, "grad_norm": 0.5371056190476386, "learning_rate": 1.8999188969991892e-05, "loss": 0.6213, "step": 22538 }, { "epoch": 0.6580537794516949, "grad_norm": 0.5649660958585984, "learning_rate": 1.899756690997567e-05, "loss": 0.6425, "step": 22539 }, { "epoch": 0.6580829756795422, "grad_norm": 0.5026247848396626, "learning_rate": 1.899594484995945e-05, "loss": 0.5239, "step": 22540 }, { "epoch": 0.6581121719073896, "grad_norm": 0.5275190186888744, "learning_rate": 1.8994322789943227e-05, "loss": 0.6251, "step": 22541 }, { "epoch": 0.6581413681352369, "grad_norm": 0.5304278586095537, "learning_rate": 1.8992700729927005e-05, "loss": 0.5857, "step": 22542 }, { "epoch": 0.6581705643630843, "grad_norm": 0.505256863547308, "learning_rate": 1.899107866991079e-05, "loss": 0.5472, "step": 22543 }, { "epoch": 0.6581997605909317, "grad_norm": 0.5134022672929928, "learning_rate": 1.898945660989457e-05, "loss": 0.5937, "step": 22544 }, { "epoch": 0.658228956818779, "grad_norm": 0.5494988214878247, "learning_rate": 1.8987834549878347e-05, "loss": 0.6036, "step": 22545 }, { "epoch": 0.6582581530466264, "grad_norm": 0.5080199268185661, "learning_rate": 1.8986212489862126e-05, "loss": 0.5602, "step": 22546 }, { "epoch": 0.6582873492744737, "grad_norm": 0.530466969954489, "learning_rate": 1.8984590429845904e-05, "loss": 0.596, "step": 22547 }, { "epoch": 0.6583165455023211, "grad_norm": 0.5157568735058367, "learning_rate": 1.8982968369829686e-05, "loss": 0.5781, "step": 22548 }, { "epoch": 0.6583457417301685, "grad_norm": 0.5213842709117965, "learning_rate": 1.8981346309813464e-05, "loss": 0.6061, "step": 22549 }, { "epoch": 0.6583749379580158, "grad_norm": 0.5296916159319957, "learning_rate": 1.8979724249797243e-05, "loss": 0.5938, "step": 22550 }, { "epoch": 0.6584041341858632, "grad_norm": 0.5171085991222186, "learning_rate": 1.897810218978102e-05, "loss": 0.5708, "step": 22551 }, { "epoch": 0.6584333304137105, "grad_norm": 0.47134599960559376, "learning_rate": 1.89764801297648e-05, "loss": 0.5125, "step": 22552 }, { "epoch": 0.6584625266415579, "grad_norm": 0.4774478822950367, "learning_rate": 1.8974858069748584e-05, "loss": 0.5315, "step": 22553 }, { "epoch": 0.6584917228694053, "grad_norm": 0.5262511126379315, "learning_rate": 1.8973236009732363e-05, "loss": 0.6332, "step": 22554 }, { "epoch": 0.6585209190972526, "grad_norm": 0.5329771572729066, "learning_rate": 1.897161394971614e-05, "loss": 0.6058, "step": 22555 }, { "epoch": 0.6585501153251, "grad_norm": 0.5333724197327795, "learning_rate": 1.896999188969992e-05, "loss": 0.629, "step": 22556 }, { "epoch": 0.6585793115529474, "grad_norm": 0.5308699687568467, "learning_rate": 1.8968369829683698e-05, "loss": 0.5689, "step": 22557 }, { "epoch": 0.6586085077807947, "grad_norm": 0.5053284349009032, "learning_rate": 1.896674776966748e-05, "loss": 0.5582, "step": 22558 }, { "epoch": 0.6586377040086421, "grad_norm": 0.5373799342895887, "learning_rate": 1.8965125709651258e-05, "loss": 0.6303, "step": 22559 }, { "epoch": 0.6586669002364894, "grad_norm": 0.5095725059849332, "learning_rate": 1.8963503649635036e-05, "loss": 0.5039, "step": 22560 }, { "epoch": 0.6586960964643368, "grad_norm": 0.5460744013989357, "learning_rate": 1.8961881589618815e-05, "loss": 0.6587, "step": 22561 }, { "epoch": 0.6587252926921842, "grad_norm": 0.5211977850259056, "learning_rate": 1.8960259529602597e-05, "loss": 0.5751, "step": 22562 }, { "epoch": 0.6587544889200315, "grad_norm": 0.537658714112956, "learning_rate": 1.8958637469586375e-05, "loss": 0.6199, "step": 22563 }, { "epoch": 0.6587836851478789, "grad_norm": 0.5126465122782459, "learning_rate": 1.8957015409570157e-05, "loss": 0.5715, "step": 22564 }, { "epoch": 0.6588128813757262, "grad_norm": 0.5416799728021169, "learning_rate": 1.8955393349553935e-05, "loss": 0.674, "step": 22565 }, { "epoch": 0.6588420776035736, "grad_norm": 0.4889125027767383, "learning_rate": 1.8953771289537714e-05, "loss": 0.5417, "step": 22566 }, { "epoch": 0.658871273831421, "grad_norm": 0.5519257374739047, "learning_rate": 1.8952149229521492e-05, "loss": 0.629, "step": 22567 }, { "epoch": 0.6589004700592683, "grad_norm": 0.4940261328186994, "learning_rate": 1.8950527169505274e-05, "loss": 0.5344, "step": 22568 }, { "epoch": 0.6589296662871157, "grad_norm": 0.5160474673466144, "learning_rate": 1.8948905109489052e-05, "loss": 0.5662, "step": 22569 }, { "epoch": 0.658958862514963, "grad_norm": 0.4924787122665789, "learning_rate": 1.894728304947283e-05, "loss": 0.5503, "step": 22570 }, { "epoch": 0.6589880587428104, "grad_norm": 0.5536541057235993, "learning_rate": 1.894566098945661e-05, "loss": 0.6687, "step": 22571 }, { "epoch": 0.6590172549706578, "grad_norm": 0.5300515963296134, "learning_rate": 1.894403892944039e-05, "loss": 0.6427, "step": 22572 }, { "epoch": 0.6590464511985051, "grad_norm": 0.5853080160158068, "learning_rate": 1.894241686942417e-05, "loss": 0.6392, "step": 22573 }, { "epoch": 0.6590756474263525, "grad_norm": 0.49740149021870766, "learning_rate": 1.894079480940795e-05, "loss": 0.5627, "step": 22574 }, { "epoch": 0.6591048436541999, "grad_norm": 0.5316148584065188, "learning_rate": 1.893917274939173e-05, "loss": 0.5951, "step": 22575 }, { "epoch": 0.6591340398820472, "grad_norm": 0.5182031754426222, "learning_rate": 1.8937550689375507e-05, "loss": 0.5887, "step": 22576 }, { "epoch": 0.6591632361098946, "grad_norm": 0.5424457177331868, "learning_rate": 1.8935928629359286e-05, "loss": 0.6434, "step": 22577 }, { "epoch": 0.6591924323377419, "grad_norm": 0.5258994254836541, "learning_rate": 1.8934306569343068e-05, "loss": 0.6026, "step": 22578 }, { "epoch": 0.6592216285655893, "grad_norm": 0.5229325641044708, "learning_rate": 1.8932684509326846e-05, "loss": 0.5836, "step": 22579 }, { "epoch": 0.6592508247934367, "grad_norm": 0.5777492169892309, "learning_rate": 1.8931062449310624e-05, "loss": 0.61, "step": 22580 }, { "epoch": 0.659280021021284, "grad_norm": 0.5024092580810012, "learning_rate": 1.8929440389294406e-05, "loss": 0.6, "step": 22581 }, { "epoch": 0.6593092172491314, "grad_norm": 0.5524376217570867, "learning_rate": 1.8927818329278184e-05, "loss": 0.5633, "step": 22582 }, { "epoch": 0.6593384134769787, "grad_norm": 0.44512754400827187, "learning_rate": 1.8926196269261963e-05, "loss": 0.4342, "step": 22583 }, { "epoch": 0.6593676097048261, "grad_norm": 0.4736966984495838, "learning_rate": 1.8924574209245745e-05, "loss": 0.5005, "step": 22584 }, { "epoch": 0.6593968059326735, "grad_norm": 0.48965158751684706, "learning_rate": 1.8922952149229523e-05, "loss": 0.5444, "step": 22585 }, { "epoch": 0.6594260021605208, "grad_norm": 0.5508231311500286, "learning_rate": 1.89213300892133e-05, "loss": 0.6456, "step": 22586 }, { "epoch": 0.6594551983883682, "grad_norm": 0.5471861274952843, "learning_rate": 1.891970802919708e-05, "loss": 0.623, "step": 22587 }, { "epoch": 0.6594843946162156, "grad_norm": 0.5153873190950022, "learning_rate": 1.891808596918086e-05, "loss": 0.5944, "step": 22588 }, { "epoch": 0.6595135908440629, "grad_norm": 0.5412893348343564, "learning_rate": 1.891646390916464e-05, "loss": 0.6563, "step": 22589 }, { "epoch": 0.6595427870719103, "grad_norm": 0.5534515040391913, "learning_rate": 1.8914841849148418e-05, "loss": 0.7196, "step": 22590 }, { "epoch": 0.6595719832997576, "grad_norm": 0.4987140494408497, "learning_rate": 1.89132197891322e-05, "loss": 0.5568, "step": 22591 }, { "epoch": 0.659601179527605, "grad_norm": 0.5052073013815168, "learning_rate": 1.891159772911598e-05, "loss": 0.5452, "step": 22592 }, { "epoch": 0.6596303757554524, "grad_norm": 0.49647406481943496, "learning_rate": 1.8909975669099757e-05, "loss": 0.55, "step": 22593 }, { "epoch": 0.6596595719832997, "grad_norm": 0.5314735087145279, "learning_rate": 1.890835360908354e-05, "loss": 0.6313, "step": 22594 }, { "epoch": 0.6596887682111471, "grad_norm": 0.5052657741540815, "learning_rate": 1.8906731549067317e-05, "loss": 0.5863, "step": 22595 }, { "epoch": 0.6597179644389944, "grad_norm": 0.5131533811970044, "learning_rate": 1.8905109489051095e-05, "loss": 0.5911, "step": 22596 }, { "epoch": 0.6597471606668418, "grad_norm": 0.5509845380367878, "learning_rate": 1.8903487429034874e-05, "loss": 0.6205, "step": 22597 }, { "epoch": 0.6597763568946892, "grad_norm": 0.5066625110336286, "learning_rate": 1.8901865369018655e-05, "loss": 0.6063, "step": 22598 }, { "epoch": 0.6598055531225365, "grad_norm": 0.5568829325327399, "learning_rate": 1.8900243309002434e-05, "loss": 0.6525, "step": 22599 }, { "epoch": 0.6598347493503839, "grad_norm": 0.5206312671944074, "learning_rate": 1.8898621248986215e-05, "loss": 0.6081, "step": 22600 }, { "epoch": 0.6598639455782312, "grad_norm": 0.5251724999975238, "learning_rate": 1.8896999188969994e-05, "loss": 0.5956, "step": 22601 }, { "epoch": 0.6598931418060786, "grad_norm": 0.5262478999235758, "learning_rate": 1.8895377128953772e-05, "loss": 0.6062, "step": 22602 }, { "epoch": 0.659922338033926, "grad_norm": 0.5239205577753511, "learning_rate": 1.889375506893755e-05, "loss": 0.5776, "step": 22603 }, { "epoch": 0.6599515342617733, "grad_norm": 0.5649597830342632, "learning_rate": 1.8892133008921332e-05, "loss": 0.6563, "step": 22604 }, { "epoch": 0.6599807304896207, "grad_norm": 0.5283719229236995, "learning_rate": 1.889051094890511e-05, "loss": 0.5765, "step": 22605 }, { "epoch": 0.660009926717468, "grad_norm": 0.5349692493922035, "learning_rate": 1.888888888888889e-05, "loss": 0.6266, "step": 22606 }, { "epoch": 0.6600391229453154, "grad_norm": 0.5241458506030734, "learning_rate": 1.8887266828872667e-05, "loss": 0.599, "step": 22607 }, { "epoch": 0.6600683191731628, "grad_norm": 0.525507286400487, "learning_rate": 1.8885644768856446e-05, "loss": 0.6315, "step": 22608 }, { "epoch": 0.6600975154010101, "grad_norm": 0.5405419849232243, "learning_rate": 1.8884022708840228e-05, "loss": 0.6715, "step": 22609 }, { "epoch": 0.6601267116288575, "grad_norm": 0.5436204184184086, "learning_rate": 1.888240064882401e-05, "loss": 0.639, "step": 22610 }, { "epoch": 0.6601559078567049, "grad_norm": 0.48797797554468075, "learning_rate": 1.8880778588807788e-05, "loss": 0.5335, "step": 22611 }, { "epoch": 0.6601851040845522, "grad_norm": 0.4888797261631518, "learning_rate": 1.8879156528791566e-05, "loss": 0.5436, "step": 22612 }, { "epoch": 0.6602143003123997, "grad_norm": 0.5378523818989455, "learning_rate": 1.8877534468775345e-05, "loss": 0.6481, "step": 22613 }, { "epoch": 0.660243496540247, "grad_norm": 0.5475570498031509, "learning_rate": 1.8875912408759126e-05, "loss": 0.6172, "step": 22614 }, { "epoch": 0.6602726927680944, "grad_norm": 0.5400592016825377, "learning_rate": 1.8874290348742905e-05, "loss": 0.6383, "step": 22615 }, { "epoch": 0.6603018889959418, "grad_norm": 0.49783971660764714, "learning_rate": 1.8872668288726683e-05, "loss": 0.5141, "step": 22616 }, { "epoch": 0.6603310852237891, "grad_norm": 0.49559867934828283, "learning_rate": 1.887104622871046e-05, "loss": 0.5604, "step": 22617 }, { "epoch": 0.6603602814516365, "grad_norm": 0.49807906521046535, "learning_rate": 1.886942416869424e-05, "loss": 0.5589, "step": 22618 }, { "epoch": 0.6603894776794839, "grad_norm": 0.5267566869702611, "learning_rate": 1.8867802108678025e-05, "loss": 0.6154, "step": 22619 }, { "epoch": 0.6604186739073312, "grad_norm": 0.5570740308287095, "learning_rate": 1.8866180048661803e-05, "loss": 0.6492, "step": 22620 }, { "epoch": 0.6604478701351786, "grad_norm": 0.4990735379447498, "learning_rate": 1.886455798864558e-05, "loss": 0.5381, "step": 22621 }, { "epoch": 0.6604770663630259, "grad_norm": 0.5217542870605579, "learning_rate": 1.886293592862936e-05, "loss": 0.5787, "step": 22622 }, { "epoch": 0.6605062625908733, "grad_norm": 0.5143872055889253, "learning_rate": 1.886131386861314e-05, "loss": 0.5853, "step": 22623 }, { "epoch": 0.6605354588187207, "grad_norm": 0.5262581632979805, "learning_rate": 1.885969180859692e-05, "loss": 0.6195, "step": 22624 }, { "epoch": 0.660564655046568, "grad_norm": 0.5506338117809233, "learning_rate": 1.88580697485807e-05, "loss": 0.6217, "step": 22625 }, { "epoch": 0.6605938512744154, "grad_norm": 0.48298683835036493, "learning_rate": 1.8856447688564477e-05, "loss": 0.5443, "step": 22626 }, { "epoch": 0.6606230475022628, "grad_norm": 0.5512332824148582, "learning_rate": 1.8854825628548255e-05, "loss": 0.6206, "step": 22627 }, { "epoch": 0.6606522437301101, "grad_norm": 0.6432917012387703, "learning_rate": 1.8853203568532037e-05, "loss": 0.7067, "step": 22628 }, { "epoch": 0.6606814399579575, "grad_norm": 0.5958438965042544, "learning_rate": 1.8851581508515815e-05, "loss": 0.6874, "step": 22629 }, { "epoch": 0.6607106361858048, "grad_norm": 0.5233414262138437, "learning_rate": 1.8849959448499597e-05, "loss": 0.5631, "step": 22630 }, { "epoch": 0.6607398324136522, "grad_norm": 0.5157112492105972, "learning_rate": 1.8848337388483376e-05, "loss": 0.5988, "step": 22631 }, { "epoch": 0.6607690286414996, "grad_norm": 0.4950280467610531, "learning_rate": 1.8846715328467154e-05, "loss": 0.591, "step": 22632 }, { "epoch": 0.6607982248693469, "grad_norm": 0.5719986043200482, "learning_rate": 1.8845093268450932e-05, "loss": 0.6503, "step": 22633 }, { "epoch": 0.6608274210971943, "grad_norm": 0.5989972498911212, "learning_rate": 1.8843471208434714e-05, "loss": 0.6277, "step": 22634 }, { "epoch": 0.6608566173250416, "grad_norm": 0.5140014417953633, "learning_rate": 1.8841849148418492e-05, "loss": 0.5778, "step": 22635 }, { "epoch": 0.660885813552889, "grad_norm": 0.5467215806517168, "learning_rate": 1.884022708840227e-05, "loss": 0.6551, "step": 22636 }, { "epoch": 0.6609150097807364, "grad_norm": 0.4902848977969524, "learning_rate": 1.883860502838605e-05, "loss": 0.5167, "step": 22637 }, { "epoch": 0.6609442060085837, "grad_norm": 0.5012954454921313, "learning_rate": 1.883698296836983e-05, "loss": 0.5515, "step": 22638 }, { "epoch": 0.6609734022364311, "grad_norm": 0.5442002555419883, "learning_rate": 1.883536090835361e-05, "loss": 0.6614, "step": 22639 }, { "epoch": 0.6610025984642784, "grad_norm": 0.5605740534071262, "learning_rate": 1.883373884833739e-05, "loss": 0.642, "step": 22640 }, { "epoch": 0.6610317946921258, "grad_norm": 0.5164243898081073, "learning_rate": 1.883211678832117e-05, "loss": 0.5826, "step": 22641 }, { "epoch": 0.6610609909199732, "grad_norm": 0.5100231379981135, "learning_rate": 1.8830494728304948e-05, "loss": 0.5589, "step": 22642 }, { "epoch": 0.6610901871478205, "grad_norm": 0.5272376276856813, "learning_rate": 1.8828872668288726e-05, "loss": 0.5849, "step": 22643 }, { "epoch": 0.6611193833756679, "grad_norm": 0.530146387355181, "learning_rate": 1.8827250608272508e-05, "loss": 0.6097, "step": 22644 }, { "epoch": 0.6611485796035153, "grad_norm": 0.5227497006338987, "learning_rate": 1.8825628548256286e-05, "loss": 0.5876, "step": 22645 }, { "epoch": 0.6611777758313626, "grad_norm": 0.5621602929504931, "learning_rate": 1.8824006488240065e-05, "loss": 0.6483, "step": 22646 }, { "epoch": 0.66120697205921, "grad_norm": 0.5112939075835051, "learning_rate": 1.8822384428223846e-05, "loss": 0.5949, "step": 22647 }, { "epoch": 0.6612361682870573, "grad_norm": 0.4765470153617634, "learning_rate": 1.8820762368207625e-05, "loss": 0.5036, "step": 22648 }, { "epoch": 0.6612653645149047, "grad_norm": 0.5379320422156348, "learning_rate": 1.8819140308191403e-05, "loss": 0.5865, "step": 22649 }, { "epoch": 0.6612945607427521, "grad_norm": 0.518287327516227, "learning_rate": 1.8817518248175185e-05, "loss": 0.6211, "step": 22650 }, { "epoch": 0.6613237569705994, "grad_norm": 0.5374526761854017, "learning_rate": 1.8815896188158963e-05, "loss": 0.5981, "step": 22651 }, { "epoch": 0.6613529531984468, "grad_norm": 0.5123480859924995, "learning_rate": 1.8814274128142742e-05, "loss": 0.5602, "step": 22652 }, { "epoch": 0.6613821494262941, "grad_norm": 0.5030703623610345, "learning_rate": 1.881265206812652e-05, "loss": 0.5387, "step": 22653 }, { "epoch": 0.6614113456541415, "grad_norm": 0.5175867998003872, "learning_rate": 1.8811030008110302e-05, "loss": 0.6157, "step": 22654 }, { "epoch": 0.6614405418819889, "grad_norm": 0.5848388433311583, "learning_rate": 1.880940794809408e-05, "loss": 0.6851, "step": 22655 }, { "epoch": 0.6614697381098362, "grad_norm": 0.5094835761449436, "learning_rate": 1.880778588807786e-05, "loss": 0.6078, "step": 22656 }, { "epoch": 0.6614989343376836, "grad_norm": 0.5003820396506411, "learning_rate": 1.880616382806164e-05, "loss": 0.5554, "step": 22657 }, { "epoch": 0.661528130565531, "grad_norm": 0.5114446052189958, "learning_rate": 1.880454176804542e-05, "loss": 0.6198, "step": 22658 }, { "epoch": 0.6615573267933783, "grad_norm": 0.5036149042040656, "learning_rate": 1.8802919708029197e-05, "loss": 0.5935, "step": 22659 }, { "epoch": 0.6615865230212257, "grad_norm": 0.537094844348327, "learning_rate": 1.880129764801298e-05, "loss": 0.5925, "step": 22660 }, { "epoch": 0.661615719249073, "grad_norm": 0.5307021976227174, "learning_rate": 1.8799675587996757e-05, "loss": 0.6363, "step": 22661 }, { "epoch": 0.6616449154769204, "grad_norm": 0.5411267352046734, "learning_rate": 1.8798053527980536e-05, "loss": 0.6242, "step": 22662 }, { "epoch": 0.6616741117047678, "grad_norm": 0.5766177106812898, "learning_rate": 1.8796431467964314e-05, "loss": 0.6522, "step": 22663 }, { "epoch": 0.6617033079326151, "grad_norm": 0.5764513885585991, "learning_rate": 1.8794809407948092e-05, "loss": 0.7095, "step": 22664 }, { "epoch": 0.6617325041604625, "grad_norm": 0.5145264600223864, "learning_rate": 1.8793187347931874e-05, "loss": 0.5945, "step": 22665 }, { "epoch": 0.6617617003883098, "grad_norm": 0.5524239133359943, "learning_rate": 1.8791565287915656e-05, "loss": 0.6531, "step": 22666 }, { "epoch": 0.6617908966161572, "grad_norm": 0.5295675104315094, "learning_rate": 1.8789943227899434e-05, "loss": 0.6436, "step": 22667 }, { "epoch": 0.6618200928440046, "grad_norm": 0.5127528859295527, "learning_rate": 1.8788321167883213e-05, "loss": 0.5772, "step": 22668 }, { "epoch": 0.6618492890718519, "grad_norm": 0.4938048626083896, "learning_rate": 1.878669910786699e-05, "loss": 0.5526, "step": 22669 }, { "epoch": 0.6618784852996993, "grad_norm": 0.5163449838264629, "learning_rate": 1.8785077047850773e-05, "loss": 0.5405, "step": 22670 }, { "epoch": 0.6619076815275466, "grad_norm": 0.4517702840720998, "learning_rate": 1.878345498783455e-05, "loss": 0.4835, "step": 22671 }, { "epoch": 0.661936877755394, "grad_norm": 0.47373954752972164, "learning_rate": 1.878183292781833e-05, "loss": 0.5272, "step": 22672 }, { "epoch": 0.6619660739832414, "grad_norm": 0.5046368172008231, "learning_rate": 1.8780210867802108e-05, "loss": 0.5618, "step": 22673 }, { "epoch": 0.6619952702110887, "grad_norm": 0.5386049253451554, "learning_rate": 1.8778588807785886e-05, "loss": 0.6399, "step": 22674 }, { "epoch": 0.6620244664389361, "grad_norm": 0.5028169946206333, "learning_rate": 1.8776966747769668e-05, "loss": 0.5769, "step": 22675 }, { "epoch": 0.6620536626667834, "grad_norm": 0.5303860034658748, "learning_rate": 1.877534468775345e-05, "loss": 0.5804, "step": 22676 }, { "epoch": 0.6620828588946308, "grad_norm": 0.5166458823772979, "learning_rate": 1.8773722627737228e-05, "loss": 0.6245, "step": 22677 }, { "epoch": 0.6621120551224782, "grad_norm": 0.5322499373194219, "learning_rate": 1.8772100567721007e-05, "loss": 0.5837, "step": 22678 }, { "epoch": 0.6621412513503255, "grad_norm": 0.47887711123410254, "learning_rate": 1.8770478507704785e-05, "loss": 0.5172, "step": 22679 }, { "epoch": 0.6621704475781729, "grad_norm": 0.5374959008213878, "learning_rate": 1.8768856447688567e-05, "loss": 0.6428, "step": 22680 }, { "epoch": 0.6621996438060203, "grad_norm": 0.5286699750665428, "learning_rate": 1.8767234387672345e-05, "loss": 0.5911, "step": 22681 }, { "epoch": 0.6622288400338676, "grad_norm": 0.5298572797446474, "learning_rate": 1.8765612327656123e-05, "loss": 0.6796, "step": 22682 }, { "epoch": 0.662258036261715, "grad_norm": 0.5200068652905051, "learning_rate": 1.8763990267639902e-05, "loss": 0.5988, "step": 22683 }, { "epoch": 0.6622872324895623, "grad_norm": 0.4539686800003586, "learning_rate": 1.876236820762368e-05, "loss": 0.4816, "step": 22684 }, { "epoch": 0.6623164287174097, "grad_norm": 0.619131369142813, "learning_rate": 1.8760746147607462e-05, "loss": 0.7256, "step": 22685 }, { "epoch": 0.6623456249452571, "grad_norm": 0.4862851197320003, "learning_rate": 1.8759124087591244e-05, "loss": 0.5197, "step": 22686 }, { "epoch": 0.6623748211731044, "grad_norm": 0.5502792816877877, "learning_rate": 1.8757502027575022e-05, "loss": 0.6435, "step": 22687 }, { "epoch": 0.6624040174009518, "grad_norm": 0.542254840816881, "learning_rate": 1.87558799675588e-05, "loss": 0.5957, "step": 22688 }, { "epoch": 0.6624332136287991, "grad_norm": 0.5770625440884471, "learning_rate": 1.875425790754258e-05, "loss": 0.6767, "step": 22689 }, { "epoch": 0.6624624098566465, "grad_norm": 0.5430872634222, "learning_rate": 1.875263584752636e-05, "loss": 0.6137, "step": 22690 }, { "epoch": 0.6624916060844939, "grad_norm": 0.5273264200623617, "learning_rate": 1.875101378751014e-05, "loss": 0.6407, "step": 22691 }, { "epoch": 0.6625208023123412, "grad_norm": 0.5358162792512948, "learning_rate": 1.8749391727493917e-05, "loss": 0.5779, "step": 22692 }, { "epoch": 0.6625499985401886, "grad_norm": 0.5450524248616938, "learning_rate": 1.8747769667477696e-05, "loss": 0.6229, "step": 22693 }, { "epoch": 0.662579194768036, "grad_norm": 0.5381276065689132, "learning_rate": 1.8746147607461478e-05, "loss": 0.6183, "step": 22694 }, { "epoch": 0.6626083909958833, "grad_norm": 0.560530462996637, "learning_rate": 1.8744525547445256e-05, "loss": 0.6688, "step": 22695 }, { "epoch": 0.6626375872237307, "grad_norm": 0.530699534604464, "learning_rate": 1.8742903487429038e-05, "loss": 0.5627, "step": 22696 }, { "epoch": 0.662666783451578, "grad_norm": 0.5376118225202917, "learning_rate": 1.8741281427412816e-05, "loss": 0.6211, "step": 22697 }, { "epoch": 0.6626959796794254, "grad_norm": 0.5556270299921905, "learning_rate": 1.8739659367396594e-05, "loss": 0.6293, "step": 22698 }, { "epoch": 0.6627251759072728, "grad_norm": 0.5156464437946152, "learning_rate": 1.8738037307380373e-05, "loss": 0.6065, "step": 22699 }, { "epoch": 0.6627543721351201, "grad_norm": 0.5351579318394868, "learning_rate": 1.8736415247364155e-05, "loss": 0.6339, "step": 22700 }, { "epoch": 0.6627835683629675, "grad_norm": 0.5482906895011365, "learning_rate": 1.8734793187347933e-05, "loss": 0.6608, "step": 22701 }, { "epoch": 0.6628127645908148, "grad_norm": 0.5027341234617911, "learning_rate": 1.873317112733171e-05, "loss": 0.4988, "step": 22702 }, { "epoch": 0.6628419608186622, "grad_norm": 0.5229202274960834, "learning_rate": 1.873154906731549e-05, "loss": 0.5906, "step": 22703 }, { "epoch": 0.6628711570465096, "grad_norm": 0.5236150837003551, "learning_rate": 1.872992700729927e-05, "loss": 0.5584, "step": 22704 }, { "epoch": 0.6629003532743569, "grad_norm": 0.5171448969144742, "learning_rate": 1.872830494728305e-05, "loss": 0.6268, "step": 22705 }, { "epoch": 0.6629295495022043, "grad_norm": 0.5366161535893633, "learning_rate": 1.872668288726683e-05, "loss": 0.6278, "step": 22706 }, { "epoch": 0.6629587457300516, "grad_norm": 0.5207258071992158, "learning_rate": 1.872506082725061e-05, "loss": 0.6116, "step": 22707 }, { "epoch": 0.662987941957899, "grad_norm": 0.5960355363011823, "learning_rate": 1.8723438767234388e-05, "loss": 0.6755, "step": 22708 }, { "epoch": 0.6630171381857464, "grad_norm": 0.47482513131936355, "learning_rate": 1.8721816707218167e-05, "loss": 0.5019, "step": 22709 }, { "epoch": 0.6630463344135937, "grad_norm": 0.5304228407962776, "learning_rate": 1.872019464720195e-05, "loss": 0.6048, "step": 22710 }, { "epoch": 0.6630755306414411, "grad_norm": 0.5408252860114202, "learning_rate": 1.8718572587185727e-05, "loss": 0.6372, "step": 22711 }, { "epoch": 0.6631047268692885, "grad_norm": 0.5124619068952331, "learning_rate": 1.8716950527169505e-05, "loss": 0.5611, "step": 22712 }, { "epoch": 0.6631339230971358, "grad_norm": 0.5436504780571496, "learning_rate": 1.8715328467153287e-05, "loss": 0.6677, "step": 22713 }, { "epoch": 0.6631631193249832, "grad_norm": 0.532473569854396, "learning_rate": 1.8713706407137065e-05, "loss": 0.6168, "step": 22714 }, { "epoch": 0.6631923155528305, "grad_norm": 0.5231736601179078, "learning_rate": 1.8712084347120844e-05, "loss": 0.6213, "step": 22715 }, { "epoch": 0.6632215117806779, "grad_norm": 0.4989999964541057, "learning_rate": 1.8710462287104625e-05, "loss": 0.5575, "step": 22716 }, { "epoch": 0.6632507080085253, "grad_norm": 0.5282952413799689, "learning_rate": 1.8708840227088404e-05, "loss": 0.6117, "step": 22717 }, { "epoch": 0.6632799042363726, "grad_norm": 0.4853252301351741, "learning_rate": 1.8707218167072182e-05, "loss": 0.5054, "step": 22718 }, { "epoch": 0.66330910046422, "grad_norm": 0.5297847752965181, "learning_rate": 1.870559610705596e-05, "loss": 0.607, "step": 22719 }, { "epoch": 0.6633382966920673, "grad_norm": 0.559945734989715, "learning_rate": 1.8703974047039742e-05, "loss": 0.656, "step": 22720 }, { "epoch": 0.6633674929199147, "grad_norm": 0.5730237124586895, "learning_rate": 1.870235198702352e-05, "loss": 0.6011, "step": 22721 }, { "epoch": 0.6633966891477621, "grad_norm": 0.5205679328946476, "learning_rate": 1.87007299270073e-05, "loss": 0.5973, "step": 22722 }, { "epoch": 0.6634258853756094, "grad_norm": 0.5082064442816575, "learning_rate": 1.869910786699108e-05, "loss": 0.5845, "step": 22723 }, { "epoch": 0.6634550816034568, "grad_norm": 0.48032278142960766, "learning_rate": 1.869748580697486e-05, "loss": 0.5064, "step": 22724 }, { "epoch": 0.6634842778313041, "grad_norm": 0.4751046302258944, "learning_rate": 1.8695863746958638e-05, "loss": 0.5546, "step": 22725 }, { "epoch": 0.6635134740591515, "grad_norm": 0.5267189530831032, "learning_rate": 1.869424168694242e-05, "loss": 0.5926, "step": 22726 }, { "epoch": 0.6635426702869989, "grad_norm": 0.5209125875509446, "learning_rate": 1.8692619626926198e-05, "loss": 0.5644, "step": 22727 }, { "epoch": 0.6635718665148462, "grad_norm": 0.5155205378536941, "learning_rate": 1.8690997566909976e-05, "loss": 0.5737, "step": 22728 }, { "epoch": 0.6636010627426936, "grad_norm": 0.5610175099914705, "learning_rate": 1.8689375506893754e-05, "loss": 0.6212, "step": 22729 }, { "epoch": 0.663630258970541, "grad_norm": 0.5088084913133725, "learning_rate": 1.8687753446877533e-05, "loss": 0.5748, "step": 22730 }, { "epoch": 0.6636594551983883, "grad_norm": 0.5058672686676887, "learning_rate": 1.8686131386861315e-05, "loss": 0.5636, "step": 22731 }, { "epoch": 0.6636886514262357, "grad_norm": 0.5097010518192194, "learning_rate": 1.8684509326845096e-05, "loss": 0.5941, "step": 22732 }, { "epoch": 0.663717847654083, "grad_norm": 0.501473974696188, "learning_rate": 1.8682887266828875e-05, "loss": 0.5613, "step": 22733 }, { "epoch": 0.6637470438819305, "grad_norm": 0.5151457158483493, "learning_rate": 1.8681265206812653e-05, "loss": 0.5848, "step": 22734 }, { "epoch": 0.6637762401097779, "grad_norm": 0.5278049896159518, "learning_rate": 1.867964314679643e-05, "loss": 0.6482, "step": 22735 }, { "epoch": 0.6638054363376252, "grad_norm": 0.553134827833639, "learning_rate": 1.8678021086780213e-05, "loss": 0.6057, "step": 22736 }, { "epoch": 0.6638346325654726, "grad_norm": 0.48784754898183214, "learning_rate": 1.867639902676399e-05, "loss": 0.5745, "step": 22737 }, { "epoch": 0.66386382879332, "grad_norm": 0.5062457810973808, "learning_rate": 1.867477696674777e-05, "loss": 0.547, "step": 22738 }, { "epoch": 0.6638930250211673, "grad_norm": 0.5161398049064972, "learning_rate": 1.867315490673155e-05, "loss": 0.6072, "step": 22739 }, { "epoch": 0.6639222212490147, "grad_norm": 0.4712214939958192, "learning_rate": 1.8671532846715327e-05, "loss": 0.5132, "step": 22740 }, { "epoch": 0.663951417476862, "grad_norm": 0.4876550355202175, "learning_rate": 1.866991078669911e-05, "loss": 0.5424, "step": 22741 }, { "epoch": 0.6639806137047094, "grad_norm": 0.51741360864758, "learning_rate": 1.866828872668289e-05, "loss": 0.6078, "step": 22742 }, { "epoch": 0.6640098099325568, "grad_norm": 0.5402027227274044, "learning_rate": 1.866666666666667e-05, "loss": 0.6238, "step": 22743 }, { "epoch": 0.6640390061604041, "grad_norm": 0.5638540700613717, "learning_rate": 1.8665044606650447e-05, "loss": 0.7077, "step": 22744 }, { "epoch": 0.6640682023882515, "grad_norm": 0.5222820684059687, "learning_rate": 1.8663422546634225e-05, "loss": 0.5749, "step": 22745 }, { "epoch": 0.6640973986160988, "grad_norm": 0.5310678724599247, "learning_rate": 1.8661800486618007e-05, "loss": 0.6342, "step": 22746 }, { "epoch": 0.6641265948439462, "grad_norm": 0.5991325511903135, "learning_rate": 1.8660178426601786e-05, "loss": 0.6132, "step": 22747 }, { "epoch": 0.6641557910717936, "grad_norm": 0.608776213472024, "learning_rate": 1.8658556366585564e-05, "loss": 0.7117, "step": 22748 }, { "epoch": 0.6641849872996409, "grad_norm": 0.5071185774033701, "learning_rate": 1.8656934306569342e-05, "loss": 0.5679, "step": 22749 }, { "epoch": 0.6642141835274883, "grad_norm": 0.48782953552634645, "learning_rate": 1.865531224655312e-05, "loss": 0.527, "step": 22750 }, { "epoch": 0.6642433797553357, "grad_norm": 0.5286322890824297, "learning_rate": 1.8653690186536902e-05, "loss": 0.6209, "step": 22751 }, { "epoch": 0.664272575983183, "grad_norm": 0.5294147812329705, "learning_rate": 1.8652068126520684e-05, "loss": 0.6137, "step": 22752 }, { "epoch": 0.6643017722110304, "grad_norm": 0.5236742851932144, "learning_rate": 1.8650446066504463e-05, "loss": 0.6496, "step": 22753 }, { "epoch": 0.6643309684388777, "grad_norm": 0.5440356528584198, "learning_rate": 1.864882400648824e-05, "loss": 0.5949, "step": 22754 }, { "epoch": 0.6643601646667251, "grad_norm": 0.5116033898917396, "learning_rate": 1.864720194647202e-05, "loss": 0.5607, "step": 22755 }, { "epoch": 0.6643893608945725, "grad_norm": 0.5500504747306203, "learning_rate": 1.86455798864558e-05, "loss": 0.6336, "step": 22756 }, { "epoch": 0.6644185571224198, "grad_norm": 0.4995834443409402, "learning_rate": 1.864395782643958e-05, "loss": 0.5556, "step": 22757 }, { "epoch": 0.6644477533502672, "grad_norm": 0.5036237486705326, "learning_rate": 1.8642335766423358e-05, "loss": 0.5564, "step": 22758 }, { "epoch": 0.6644769495781145, "grad_norm": 0.4702989824145475, "learning_rate": 1.8640713706407136e-05, "loss": 0.5084, "step": 22759 }, { "epoch": 0.6645061458059619, "grad_norm": 0.5316289720770829, "learning_rate": 1.8639091646390915e-05, "loss": 0.6375, "step": 22760 }, { "epoch": 0.6645353420338093, "grad_norm": 0.5276237309902425, "learning_rate": 1.8637469586374696e-05, "loss": 0.6278, "step": 22761 }, { "epoch": 0.6645645382616566, "grad_norm": 0.46879662918021103, "learning_rate": 1.8635847526358478e-05, "loss": 0.5209, "step": 22762 }, { "epoch": 0.664593734489504, "grad_norm": 0.5409424776423815, "learning_rate": 1.8634225466342256e-05, "loss": 0.6467, "step": 22763 }, { "epoch": 0.6646229307173513, "grad_norm": 0.506117855315957, "learning_rate": 1.8632603406326035e-05, "loss": 0.5719, "step": 22764 }, { "epoch": 0.6646521269451987, "grad_norm": 0.5308376484090406, "learning_rate": 1.8630981346309813e-05, "loss": 0.663, "step": 22765 }, { "epoch": 0.6646813231730461, "grad_norm": 0.5220593810988747, "learning_rate": 1.8629359286293595e-05, "loss": 0.6111, "step": 22766 }, { "epoch": 0.6647105194008934, "grad_norm": 0.5866928378785113, "learning_rate": 1.8627737226277373e-05, "loss": 0.7457, "step": 22767 }, { "epoch": 0.6647397156287408, "grad_norm": 0.51802287753759, "learning_rate": 1.8626115166261152e-05, "loss": 0.6153, "step": 22768 }, { "epoch": 0.6647689118565882, "grad_norm": 0.5153376204090375, "learning_rate": 1.862449310624493e-05, "loss": 0.5572, "step": 22769 }, { "epoch": 0.6647981080844355, "grad_norm": 0.5515309629631471, "learning_rate": 1.8622871046228712e-05, "loss": 0.6448, "step": 22770 }, { "epoch": 0.6648273043122829, "grad_norm": 0.5349890791321265, "learning_rate": 1.862124898621249e-05, "loss": 0.6153, "step": 22771 }, { "epoch": 0.6648565005401302, "grad_norm": 0.49159066487689024, "learning_rate": 1.8619626926196272e-05, "loss": 0.5347, "step": 22772 }, { "epoch": 0.6648856967679776, "grad_norm": 0.5292511128925386, "learning_rate": 1.861800486618005e-05, "loss": 0.6141, "step": 22773 }, { "epoch": 0.664914892995825, "grad_norm": 0.5195147645988277, "learning_rate": 1.861638280616383e-05, "loss": 0.5835, "step": 22774 }, { "epoch": 0.6649440892236723, "grad_norm": 0.545161147038437, "learning_rate": 1.8614760746147607e-05, "loss": 0.658, "step": 22775 }, { "epoch": 0.6649732854515197, "grad_norm": 0.47677903233469887, "learning_rate": 1.861313868613139e-05, "loss": 0.5199, "step": 22776 }, { "epoch": 0.665002481679367, "grad_norm": 0.47566401168686767, "learning_rate": 1.8611516626115167e-05, "loss": 0.5151, "step": 22777 }, { "epoch": 0.6650316779072144, "grad_norm": 0.5032369336741996, "learning_rate": 1.8609894566098946e-05, "loss": 0.5641, "step": 22778 }, { "epoch": 0.6650608741350618, "grad_norm": 0.5314947001625984, "learning_rate": 1.8608272506082727e-05, "loss": 0.5332, "step": 22779 }, { "epoch": 0.6650900703629091, "grad_norm": 0.5107960211105436, "learning_rate": 1.8606650446066506e-05, "loss": 0.6109, "step": 22780 }, { "epoch": 0.6651192665907565, "grad_norm": 0.4592563821319671, "learning_rate": 1.8605028386050284e-05, "loss": 0.4905, "step": 22781 }, { "epoch": 0.6651484628186038, "grad_norm": 0.5115724122733231, "learning_rate": 1.8603406326034066e-05, "loss": 0.5989, "step": 22782 }, { "epoch": 0.6651776590464512, "grad_norm": 0.4807108938246661, "learning_rate": 1.8601784266017844e-05, "loss": 0.5792, "step": 22783 }, { "epoch": 0.6652068552742986, "grad_norm": 0.4818167604651155, "learning_rate": 1.8600162206001623e-05, "loss": 0.5034, "step": 22784 }, { "epoch": 0.6652360515021459, "grad_norm": 0.5370882861920143, "learning_rate": 1.85985401459854e-05, "loss": 0.6362, "step": 22785 }, { "epoch": 0.6652652477299933, "grad_norm": 0.5263262902672893, "learning_rate": 1.859691808596918e-05, "loss": 0.6343, "step": 22786 }, { "epoch": 0.6652944439578407, "grad_norm": 0.5255661557765114, "learning_rate": 1.859529602595296e-05, "loss": 0.5639, "step": 22787 }, { "epoch": 0.665323640185688, "grad_norm": 0.546468178306637, "learning_rate": 1.859367396593674e-05, "loss": 0.6465, "step": 22788 }, { "epoch": 0.6653528364135354, "grad_norm": 0.5304670882471274, "learning_rate": 1.859205190592052e-05, "loss": 0.6349, "step": 22789 }, { "epoch": 0.6653820326413827, "grad_norm": 0.48092785753654427, "learning_rate": 1.85904298459043e-05, "loss": 0.546, "step": 22790 }, { "epoch": 0.6654112288692301, "grad_norm": 0.48898405416359486, "learning_rate": 1.8588807785888078e-05, "loss": 0.5432, "step": 22791 }, { "epoch": 0.6654404250970775, "grad_norm": 0.48617492278106533, "learning_rate": 1.858718572587186e-05, "loss": 0.5575, "step": 22792 }, { "epoch": 0.6654696213249248, "grad_norm": 0.5337806806845548, "learning_rate": 1.8585563665855638e-05, "loss": 0.5811, "step": 22793 }, { "epoch": 0.6654988175527722, "grad_norm": 0.5001653759946713, "learning_rate": 1.8583941605839417e-05, "loss": 0.5777, "step": 22794 }, { "epoch": 0.6655280137806195, "grad_norm": 0.5037983253878003, "learning_rate": 1.8582319545823195e-05, "loss": 0.5835, "step": 22795 }, { "epoch": 0.6655572100084669, "grad_norm": 0.5037111076749909, "learning_rate": 1.8580697485806973e-05, "loss": 0.5484, "step": 22796 }, { "epoch": 0.6655864062363143, "grad_norm": 0.5288320375244973, "learning_rate": 1.8579075425790755e-05, "loss": 0.6347, "step": 22797 }, { "epoch": 0.6656156024641616, "grad_norm": 0.5336382610981041, "learning_rate": 1.8577453365774537e-05, "loss": 0.6339, "step": 22798 }, { "epoch": 0.665644798692009, "grad_norm": 0.48282169645491313, "learning_rate": 1.8575831305758315e-05, "loss": 0.5023, "step": 22799 }, { "epoch": 0.6656739949198563, "grad_norm": 0.4659247908828325, "learning_rate": 1.8574209245742094e-05, "loss": 0.4625, "step": 22800 }, { "epoch": 0.6657031911477037, "grad_norm": 0.5097399278402097, "learning_rate": 1.8572587185725872e-05, "loss": 0.6058, "step": 22801 }, { "epoch": 0.6657323873755511, "grad_norm": 0.4953424272611475, "learning_rate": 1.8570965125709654e-05, "loss": 0.5222, "step": 22802 }, { "epoch": 0.6657615836033984, "grad_norm": 0.5251636276791352, "learning_rate": 1.8569343065693432e-05, "loss": 0.6077, "step": 22803 }, { "epoch": 0.6657907798312458, "grad_norm": 0.5306314159934927, "learning_rate": 1.856772100567721e-05, "loss": 0.64, "step": 22804 }, { "epoch": 0.6658199760590932, "grad_norm": 0.5027523628311465, "learning_rate": 1.856609894566099e-05, "loss": 0.5847, "step": 22805 }, { "epoch": 0.6658491722869405, "grad_norm": 0.5181166025700167, "learning_rate": 1.8564476885644767e-05, "loss": 0.5821, "step": 22806 }, { "epoch": 0.6658783685147879, "grad_norm": 0.5180389495663729, "learning_rate": 1.856285482562855e-05, "loss": 0.6139, "step": 22807 }, { "epoch": 0.6659075647426352, "grad_norm": 0.4885889476951582, "learning_rate": 1.856123276561233e-05, "loss": 0.5188, "step": 22808 }, { "epoch": 0.6659367609704826, "grad_norm": 0.44146946280016763, "learning_rate": 1.855961070559611e-05, "loss": 0.4529, "step": 22809 }, { "epoch": 0.66596595719833, "grad_norm": 0.5051301491961564, "learning_rate": 1.8557988645579887e-05, "loss": 0.5728, "step": 22810 }, { "epoch": 0.6659951534261773, "grad_norm": 0.536159301301915, "learning_rate": 1.8556366585563666e-05, "loss": 0.6267, "step": 22811 }, { "epoch": 0.6660243496540247, "grad_norm": 0.5308888720905515, "learning_rate": 1.8554744525547448e-05, "loss": 0.621, "step": 22812 }, { "epoch": 0.666053545881872, "grad_norm": 0.5291844824380058, "learning_rate": 1.8553122465531226e-05, "loss": 0.6023, "step": 22813 }, { "epoch": 0.6660827421097194, "grad_norm": 0.485739646757131, "learning_rate": 1.8551500405515004e-05, "loss": 0.5588, "step": 22814 }, { "epoch": 0.6661119383375668, "grad_norm": 0.5176601495914787, "learning_rate": 1.8549878345498783e-05, "loss": 0.583, "step": 22815 }, { "epoch": 0.6661411345654141, "grad_norm": 0.4919735859892122, "learning_rate": 1.854825628548256e-05, "loss": 0.5801, "step": 22816 }, { "epoch": 0.6661703307932615, "grad_norm": 0.5357807131773462, "learning_rate": 1.8546634225466343e-05, "loss": 0.6241, "step": 22817 }, { "epoch": 0.6661995270211089, "grad_norm": 0.48472530617443055, "learning_rate": 1.8545012165450125e-05, "loss": 0.5294, "step": 22818 }, { "epoch": 0.6662287232489562, "grad_norm": 0.49686644236169647, "learning_rate": 1.8543390105433903e-05, "loss": 0.5177, "step": 22819 }, { "epoch": 0.6662579194768036, "grad_norm": 0.5063609056807069, "learning_rate": 1.854176804541768e-05, "loss": 0.5567, "step": 22820 }, { "epoch": 0.6662871157046509, "grad_norm": 0.5126056982560226, "learning_rate": 1.854014598540146e-05, "loss": 0.5543, "step": 22821 }, { "epoch": 0.6663163119324983, "grad_norm": 0.4753920244464603, "learning_rate": 1.853852392538524e-05, "loss": 0.489, "step": 22822 }, { "epoch": 0.6663455081603457, "grad_norm": 0.5464641080300807, "learning_rate": 1.853690186536902e-05, "loss": 0.6146, "step": 22823 }, { "epoch": 0.666374704388193, "grad_norm": 0.5671356374255718, "learning_rate": 1.8535279805352798e-05, "loss": 0.62, "step": 22824 }, { "epoch": 0.6664039006160404, "grad_norm": 0.5527203720977848, "learning_rate": 1.8533657745336577e-05, "loss": 0.6383, "step": 22825 }, { "epoch": 0.6664330968438877, "grad_norm": 0.4948212244505231, "learning_rate": 1.8532035685320355e-05, "loss": 0.5328, "step": 22826 }, { "epoch": 0.6664622930717351, "grad_norm": 0.5556725512341967, "learning_rate": 1.8530413625304137e-05, "loss": 0.6525, "step": 22827 }, { "epoch": 0.6664914892995825, "grad_norm": 0.5131844678480368, "learning_rate": 1.852879156528792e-05, "loss": 0.5902, "step": 22828 }, { "epoch": 0.6665206855274298, "grad_norm": 0.5273849920013363, "learning_rate": 1.8527169505271697e-05, "loss": 0.636, "step": 22829 }, { "epoch": 0.6665498817552772, "grad_norm": 0.4957247483704773, "learning_rate": 1.8525547445255475e-05, "loss": 0.5753, "step": 22830 }, { "epoch": 0.6665790779831245, "grad_norm": 0.557488020004594, "learning_rate": 1.8523925385239254e-05, "loss": 0.7028, "step": 22831 }, { "epoch": 0.6666082742109719, "grad_norm": 0.5240264193534334, "learning_rate": 1.8522303325223035e-05, "loss": 0.5912, "step": 22832 }, { "epoch": 0.6666374704388193, "grad_norm": 0.573453521804447, "learning_rate": 1.8520681265206814e-05, "loss": 0.6518, "step": 22833 }, { "epoch": 0.6666666666666666, "grad_norm": 0.49831894408426214, "learning_rate": 1.8519059205190592e-05, "loss": 0.5888, "step": 22834 }, { "epoch": 0.666695862894514, "grad_norm": 0.5517765852493411, "learning_rate": 1.851743714517437e-05, "loss": 0.6554, "step": 22835 }, { "epoch": 0.6667250591223614, "grad_norm": 0.5017571473669457, "learning_rate": 1.8515815085158152e-05, "loss": 0.5572, "step": 22836 }, { "epoch": 0.6667542553502087, "grad_norm": 0.47879329363974554, "learning_rate": 1.851419302514193e-05, "loss": 0.5214, "step": 22837 }, { "epoch": 0.6667834515780561, "grad_norm": 0.5138440256140244, "learning_rate": 1.8512570965125712e-05, "loss": 0.5231, "step": 22838 }, { "epoch": 0.6668126478059034, "grad_norm": 0.5329379345501333, "learning_rate": 1.851094890510949e-05, "loss": 0.613, "step": 22839 }, { "epoch": 0.6668418440337508, "grad_norm": 0.5199362579029086, "learning_rate": 1.850932684509327e-05, "loss": 0.5809, "step": 22840 }, { "epoch": 0.6668710402615982, "grad_norm": 0.537534886536408, "learning_rate": 1.8507704785077048e-05, "loss": 0.6198, "step": 22841 }, { "epoch": 0.6669002364894455, "grad_norm": 0.519217400628247, "learning_rate": 1.850608272506083e-05, "loss": 0.611, "step": 22842 }, { "epoch": 0.6669294327172929, "grad_norm": 0.5259818213661084, "learning_rate": 1.8504460665044608e-05, "loss": 0.5855, "step": 22843 }, { "epoch": 0.6669586289451402, "grad_norm": 0.561163299840953, "learning_rate": 1.8502838605028386e-05, "loss": 0.7104, "step": 22844 }, { "epoch": 0.6669878251729876, "grad_norm": 0.5489216201601329, "learning_rate": 1.8501216545012168e-05, "loss": 0.6226, "step": 22845 }, { "epoch": 0.667017021400835, "grad_norm": 0.5420593794616085, "learning_rate": 1.8499594484995946e-05, "loss": 0.6712, "step": 22846 }, { "epoch": 0.6670462176286823, "grad_norm": 0.5747092376479033, "learning_rate": 1.8497972424979725e-05, "loss": 0.7155, "step": 22847 }, { "epoch": 0.6670754138565297, "grad_norm": 0.5236644977993189, "learning_rate": 1.8496350364963506e-05, "loss": 0.6019, "step": 22848 }, { "epoch": 0.667104610084377, "grad_norm": 0.5049846102105923, "learning_rate": 1.8494728304947285e-05, "loss": 0.5945, "step": 22849 }, { "epoch": 0.6671338063122244, "grad_norm": 0.578145015412442, "learning_rate": 1.8493106244931063e-05, "loss": 0.7126, "step": 22850 }, { "epoch": 0.6671630025400718, "grad_norm": 0.4673652506998481, "learning_rate": 1.849148418491484e-05, "loss": 0.4826, "step": 22851 }, { "epoch": 0.6671921987679191, "grad_norm": 0.5516153799121895, "learning_rate": 1.848986212489862e-05, "loss": 0.649, "step": 22852 }, { "epoch": 0.6672213949957665, "grad_norm": 0.5115789432758501, "learning_rate": 1.84882400648824e-05, "loss": 0.5991, "step": 22853 }, { "epoch": 0.6672505912236139, "grad_norm": 0.5803500665805774, "learning_rate": 1.848661800486618e-05, "loss": 0.6512, "step": 22854 }, { "epoch": 0.6672797874514613, "grad_norm": 0.5094103261320769, "learning_rate": 1.8484995944849962e-05, "loss": 0.5801, "step": 22855 }, { "epoch": 0.6673089836793087, "grad_norm": 0.5105897396058745, "learning_rate": 1.848337388483374e-05, "loss": 0.6099, "step": 22856 }, { "epoch": 0.667338179907156, "grad_norm": 0.5515017263594015, "learning_rate": 1.848175182481752e-05, "loss": 0.6722, "step": 22857 }, { "epoch": 0.6673673761350034, "grad_norm": 0.5456496604869128, "learning_rate": 1.84801297648013e-05, "loss": 0.6002, "step": 22858 }, { "epoch": 0.6673965723628508, "grad_norm": 0.5366015486337136, "learning_rate": 1.847850770478508e-05, "loss": 0.6054, "step": 22859 }, { "epoch": 0.6674257685906981, "grad_norm": 0.5342293279151953, "learning_rate": 1.8476885644768857e-05, "loss": 0.6143, "step": 22860 }, { "epoch": 0.6674549648185455, "grad_norm": 0.5339823267731806, "learning_rate": 1.8475263584752635e-05, "loss": 0.6027, "step": 22861 }, { "epoch": 0.6674841610463929, "grad_norm": 0.498597204965469, "learning_rate": 1.8473641524736414e-05, "loss": 0.5445, "step": 22862 }, { "epoch": 0.6675133572742402, "grad_norm": 0.54323079651512, "learning_rate": 1.8472019464720195e-05, "loss": 0.6335, "step": 22863 }, { "epoch": 0.6675425535020876, "grad_norm": 0.5680042801165291, "learning_rate": 1.8470397404703977e-05, "loss": 0.6756, "step": 22864 }, { "epoch": 0.6675717497299349, "grad_norm": 0.47823878446679086, "learning_rate": 1.8468775344687756e-05, "loss": 0.5441, "step": 22865 }, { "epoch": 0.6676009459577823, "grad_norm": 0.5273673149596834, "learning_rate": 1.8467153284671534e-05, "loss": 0.5923, "step": 22866 }, { "epoch": 0.6676301421856297, "grad_norm": 0.51153804069003, "learning_rate": 1.8465531224655312e-05, "loss": 0.5826, "step": 22867 }, { "epoch": 0.667659338413477, "grad_norm": 0.5420410545474761, "learning_rate": 1.8463909164639094e-05, "loss": 0.5982, "step": 22868 }, { "epoch": 0.6676885346413244, "grad_norm": 0.5162029563351249, "learning_rate": 1.8462287104622873e-05, "loss": 0.6048, "step": 22869 }, { "epoch": 0.6677177308691717, "grad_norm": 0.49726295330059694, "learning_rate": 1.846066504460665e-05, "loss": 0.5614, "step": 22870 }, { "epoch": 0.6677469270970191, "grad_norm": 0.5291261506017938, "learning_rate": 1.845904298459043e-05, "loss": 0.585, "step": 22871 }, { "epoch": 0.6677761233248665, "grad_norm": 0.48952893237543843, "learning_rate": 1.8457420924574208e-05, "loss": 0.558, "step": 22872 }, { "epoch": 0.6678053195527138, "grad_norm": 0.5106320089980587, "learning_rate": 1.845579886455799e-05, "loss": 0.6164, "step": 22873 }, { "epoch": 0.6678345157805612, "grad_norm": 0.5370775099649909, "learning_rate": 1.845417680454177e-05, "loss": 0.6193, "step": 22874 }, { "epoch": 0.6678637120084085, "grad_norm": 0.5389935643940077, "learning_rate": 1.845255474452555e-05, "loss": 0.6286, "step": 22875 }, { "epoch": 0.6678929082362559, "grad_norm": 0.5230712882022014, "learning_rate": 1.8450932684509328e-05, "loss": 0.6351, "step": 22876 }, { "epoch": 0.6679221044641033, "grad_norm": 0.48300559481825894, "learning_rate": 1.8449310624493106e-05, "loss": 0.5429, "step": 22877 }, { "epoch": 0.6679513006919506, "grad_norm": 0.4963167157459999, "learning_rate": 1.8447688564476888e-05, "loss": 0.5267, "step": 22878 }, { "epoch": 0.667980496919798, "grad_norm": 0.4687956127151906, "learning_rate": 1.8446066504460666e-05, "loss": 0.4964, "step": 22879 }, { "epoch": 0.6680096931476454, "grad_norm": 0.5413915865610428, "learning_rate": 1.8444444444444445e-05, "loss": 0.59, "step": 22880 }, { "epoch": 0.6680388893754927, "grad_norm": 0.5032484713804044, "learning_rate": 1.8442822384428223e-05, "loss": 0.5761, "step": 22881 }, { "epoch": 0.6680680856033401, "grad_norm": 0.539797033351488, "learning_rate": 1.8441200324412e-05, "loss": 0.6619, "step": 22882 }, { "epoch": 0.6680972818311874, "grad_norm": 0.5041503198731675, "learning_rate": 1.8439578264395783e-05, "loss": 0.6068, "step": 22883 }, { "epoch": 0.6681264780590348, "grad_norm": 0.5190145119974744, "learning_rate": 1.8437956204379565e-05, "loss": 0.5815, "step": 22884 }, { "epoch": 0.6681556742868822, "grad_norm": 0.4964271845807271, "learning_rate": 1.8436334144363343e-05, "loss": 0.5627, "step": 22885 }, { "epoch": 0.6681848705147295, "grad_norm": 0.5131292884397245, "learning_rate": 1.8434712084347122e-05, "loss": 0.6108, "step": 22886 }, { "epoch": 0.6682140667425769, "grad_norm": 0.5571927369923481, "learning_rate": 1.84330900243309e-05, "loss": 0.6662, "step": 22887 }, { "epoch": 0.6682432629704242, "grad_norm": 0.5159226170658275, "learning_rate": 1.8431467964314682e-05, "loss": 0.6121, "step": 22888 }, { "epoch": 0.6682724591982716, "grad_norm": 0.5412019794913968, "learning_rate": 1.842984590429846e-05, "loss": 0.6183, "step": 22889 }, { "epoch": 0.668301655426119, "grad_norm": 0.526249931430776, "learning_rate": 1.842822384428224e-05, "loss": 0.5853, "step": 22890 }, { "epoch": 0.6683308516539663, "grad_norm": 0.5298893667737855, "learning_rate": 1.8426601784266017e-05, "loss": 0.6029, "step": 22891 }, { "epoch": 0.6683600478818137, "grad_norm": 0.5048378091959126, "learning_rate": 1.8424979724249795e-05, "loss": 0.607, "step": 22892 }, { "epoch": 0.668389244109661, "grad_norm": 0.4996140452265925, "learning_rate": 1.8423357664233577e-05, "loss": 0.5631, "step": 22893 }, { "epoch": 0.6684184403375084, "grad_norm": 0.5184421522596442, "learning_rate": 1.842173560421736e-05, "loss": 0.6086, "step": 22894 }, { "epoch": 0.6684476365653558, "grad_norm": 0.47173220300104135, "learning_rate": 1.8420113544201137e-05, "loss": 0.4871, "step": 22895 }, { "epoch": 0.6684768327932031, "grad_norm": 0.5066297299307376, "learning_rate": 1.8418491484184916e-05, "loss": 0.5523, "step": 22896 }, { "epoch": 0.6685060290210505, "grad_norm": 0.5218254755244562, "learning_rate": 1.8416869424168694e-05, "loss": 0.5907, "step": 22897 }, { "epoch": 0.6685352252488979, "grad_norm": 0.571976068832014, "learning_rate": 1.8415247364152476e-05, "loss": 0.6559, "step": 22898 }, { "epoch": 0.6685644214767452, "grad_norm": 0.4738086130542225, "learning_rate": 1.8413625304136254e-05, "loss": 0.5336, "step": 22899 }, { "epoch": 0.6685936177045926, "grad_norm": 0.5105919103462329, "learning_rate": 1.8412003244120033e-05, "loss": 0.5774, "step": 22900 }, { "epoch": 0.6686228139324399, "grad_norm": 0.5139987616391964, "learning_rate": 1.841038118410381e-05, "loss": 0.5691, "step": 22901 }, { "epoch": 0.6686520101602873, "grad_norm": 0.5204166072217433, "learning_rate": 1.8408759124087593e-05, "loss": 0.5343, "step": 22902 }, { "epoch": 0.6686812063881347, "grad_norm": 0.47659809136283227, "learning_rate": 1.840713706407137e-05, "loss": 0.4791, "step": 22903 }, { "epoch": 0.668710402615982, "grad_norm": 0.5284761670469285, "learning_rate": 1.8405515004055153e-05, "loss": 0.5838, "step": 22904 }, { "epoch": 0.6687395988438294, "grad_norm": 0.4827862235970305, "learning_rate": 1.840389294403893e-05, "loss": 0.5041, "step": 22905 }, { "epoch": 0.6687687950716767, "grad_norm": 0.5112138922622889, "learning_rate": 1.840227088402271e-05, "loss": 0.6097, "step": 22906 }, { "epoch": 0.6687979912995241, "grad_norm": 0.5585427427826054, "learning_rate": 1.8400648824006488e-05, "loss": 0.6305, "step": 22907 }, { "epoch": 0.6688271875273715, "grad_norm": 0.5383368789813878, "learning_rate": 1.8399026763990266e-05, "loss": 0.6336, "step": 22908 }, { "epoch": 0.6688563837552188, "grad_norm": 0.5576421642630928, "learning_rate": 1.8397404703974048e-05, "loss": 0.6003, "step": 22909 }, { "epoch": 0.6688855799830662, "grad_norm": 0.5076602643194071, "learning_rate": 1.8395782643957826e-05, "loss": 0.5565, "step": 22910 }, { "epoch": 0.6689147762109136, "grad_norm": 0.5583432660639897, "learning_rate": 1.8394160583941605e-05, "loss": 0.6429, "step": 22911 }, { "epoch": 0.6689439724387609, "grad_norm": 0.6145731368566759, "learning_rate": 1.8392538523925387e-05, "loss": 0.6796, "step": 22912 }, { "epoch": 0.6689731686666083, "grad_norm": 0.529035992930882, "learning_rate": 1.8390916463909165e-05, "loss": 0.554, "step": 22913 }, { "epoch": 0.6690023648944556, "grad_norm": 0.5132206979577911, "learning_rate": 1.8389294403892947e-05, "loss": 0.5717, "step": 22914 }, { "epoch": 0.669031561122303, "grad_norm": 0.5258083912833305, "learning_rate": 1.8387672343876725e-05, "loss": 0.5618, "step": 22915 }, { "epoch": 0.6690607573501504, "grad_norm": 0.54750279288419, "learning_rate": 1.8386050283860504e-05, "loss": 0.6296, "step": 22916 }, { "epoch": 0.6690899535779977, "grad_norm": 0.5132074802574719, "learning_rate": 1.8384428223844282e-05, "loss": 0.5599, "step": 22917 }, { "epoch": 0.6691191498058451, "grad_norm": 0.5555199648720338, "learning_rate": 1.838280616382806e-05, "loss": 0.6233, "step": 22918 }, { "epoch": 0.6691483460336924, "grad_norm": 0.5104333799026416, "learning_rate": 1.8381184103811842e-05, "loss": 0.6034, "step": 22919 }, { "epoch": 0.6691775422615398, "grad_norm": 0.48411217506997894, "learning_rate": 1.837956204379562e-05, "loss": 0.4966, "step": 22920 }, { "epoch": 0.6692067384893872, "grad_norm": 0.5104005132095328, "learning_rate": 1.8377939983779402e-05, "loss": 0.5792, "step": 22921 }, { "epoch": 0.6692359347172345, "grad_norm": 0.5268929248674971, "learning_rate": 1.837631792376318e-05, "loss": 0.5718, "step": 22922 }, { "epoch": 0.6692651309450819, "grad_norm": 0.533999904993855, "learning_rate": 1.837469586374696e-05, "loss": 0.6326, "step": 22923 }, { "epoch": 0.6692943271729292, "grad_norm": 0.5150271004675514, "learning_rate": 1.837307380373074e-05, "loss": 0.5789, "step": 22924 }, { "epoch": 0.6693235234007766, "grad_norm": 0.5328624194950109, "learning_rate": 1.837145174371452e-05, "loss": 0.6194, "step": 22925 }, { "epoch": 0.669352719628624, "grad_norm": 0.5573368066238963, "learning_rate": 1.8369829683698297e-05, "loss": 0.6635, "step": 22926 }, { "epoch": 0.6693819158564713, "grad_norm": 0.5411357973730289, "learning_rate": 1.8368207623682076e-05, "loss": 0.5516, "step": 22927 }, { "epoch": 0.6694111120843187, "grad_norm": 0.5115144955464609, "learning_rate": 1.8366585563665854e-05, "loss": 0.6397, "step": 22928 }, { "epoch": 0.669440308312166, "grad_norm": 0.496106287422112, "learning_rate": 1.8364963503649636e-05, "loss": 0.5271, "step": 22929 }, { "epoch": 0.6694695045400134, "grad_norm": 0.5037992974493289, "learning_rate": 1.8363341443633418e-05, "loss": 0.6065, "step": 22930 }, { "epoch": 0.6694987007678608, "grad_norm": 0.4972839637942673, "learning_rate": 1.8361719383617196e-05, "loss": 0.5603, "step": 22931 }, { "epoch": 0.6695278969957081, "grad_norm": 0.5008686281447607, "learning_rate": 1.8360097323600974e-05, "loss": 0.5771, "step": 22932 }, { "epoch": 0.6695570932235555, "grad_norm": 0.47874064056756405, "learning_rate": 1.8358475263584753e-05, "loss": 0.4841, "step": 22933 }, { "epoch": 0.6695862894514029, "grad_norm": 0.5242647883290638, "learning_rate": 1.8356853203568535e-05, "loss": 0.6166, "step": 22934 }, { "epoch": 0.6696154856792502, "grad_norm": 0.4676954663961918, "learning_rate": 1.8355231143552313e-05, "loss": 0.4746, "step": 22935 }, { "epoch": 0.6696446819070976, "grad_norm": 0.5097060708141217, "learning_rate": 1.835360908353609e-05, "loss": 0.5647, "step": 22936 }, { "epoch": 0.6696738781349449, "grad_norm": 0.5653198427069702, "learning_rate": 1.835198702351987e-05, "loss": 0.7229, "step": 22937 }, { "epoch": 0.6697030743627923, "grad_norm": 0.5549249195086405, "learning_rate": 1.8350364963503648e-05, "loss": 0.6796, "step": 22938 }, { "epoch": 0.6697322705906397, "grad_norm": 0.4710409100753787, "learning_rate": 1.834874290348743e-05, "loss": 0.495, "step": 22939 }, { "epoch": 0.669761466818487, "grad_norm": 0.517704959932192, "learning_rate": 1.834712084347121e-05, "loss": 0.5848, "step": 22940 }, { "epoch": 0.6697906630463344, "grad_norm": 0.5584144221094766, "learning_rate": 1.834549878345499e-05, "loss": 0.6348, "step": 22941 }, { "epoch": 0.6698198592741818, "grad_norm": 0.5238254642073868, "learning_rate": 1.834387672343877e-05, "loss": 0.5782, "step": 22942 }, { "epoch": 0.6698490555020291, "grad_norm": 0.5109098472073325, "learning_rate": 1.8342254663422547e-05, "loss": 0.5636, "step": 22943 }, { "epoch": 0.6698782517298765, "grad_norm": 0.4747799782191297, "learning_rate": 1.834063260340633e-05, "loss": 0.5212, "step": 22944 }, { "epoch": 0.6699074479577238, "grad_norm": 0.5434216452361406, "learning_rate": 1.8339010543390107e-05, "loss": 0.6794, "step": 22945 }, { "epoch": 0.6699366441855712, "grad_norm": 0.5264383578961276, "learning_rate": 1.8337388483373885e-05, "loss": 0.5947, "step": 22946 }, { "epoch": 0.6699658404134186, "grad_norm": 0.4856965266745398, "learning_rate": 1.8335766423357664e-05, "loss": 0.5028, "step": 22947 }, { "epoch": 0.6699950366412659, "grad_norm": 0.536058320894259, "learning_rate": 1.8334144363341442e-05, "loss": 0.6522, "step": 22948 }, { "epoch": 0.6700242328691133, "grad_norm": 0.5122986257888916, "learning_rate": 1.8332522303325224e-05, "loss": 0.5433, "step": 22949 }, { "epoch": 0.6700534290969606, "grad_norm": 0.48533942006077097, "learning_rate": 1.8330900243309005e-05, "loss": 0.5265, "step": 22950 }, { "epoch": 0.670082625324808, "grad_norm": 0.5070382821434614, "learning_rate": 1.8329278183292784e-05, "loss": 0.6013, "step": 22951 }, { "epoch": 0.6701118215526554, "grad_norm": 0.47853115803326435, "learning_rate": 1.8327656123276562e-05, "loss": 0.5367, "step": 22952 }, { "epoch": 0.6701410177805027, "grad_norm": 0.4719306259672335, "learning_rate": 1.832603406326034e-05, "loss": 0.5132, "step": 22953 }, { "epoch": 0.6701702140083501, "grad_norm": 0.5382871777758911, "learning_rate": 1.8324412003244122e-05, "loss": 0.6363, "step": 22954 }, { "epoch": 0.6701994102361974, "grad_norm": 0.5353209500543739, "learning_rate": 1.83227899432279e-05, "loss": 0.6257, "step": 22955 }, { "epoch": 0.6702286064640448, "grad_norm": 0.526560162496287, "learning_rate": 1.832116788321168e-05, "loss": 0.622, "step": 22956 }, { "epoch": 0.6702578026918922, "grad_norm": 0.5119230441437527, "learning_rate": 1.8319545823195458e-05, "loss": 0.5828, "step": 22957 }, { "epoch": 0.6702869989197395, "grad_norm": 0.5073171022651705, "learning_rate": 1.8317923763179236e-05, "loss": 0.5713, "step": 22958 }, { "epoch": 0.6703161951475869, "grad_norm": 0.5503548667716803, "learning_rate": 1.8316301703163018e-05, "loss": 0.6195, "step": 22959 }, { "epoch": 0.6703453913754343, "grad_norm": 0.5027338989687019, "learning_rate": 1.83146796431468e-05, "loss": 0.555, "step": 22960 }, { "epoch": 0.6703745876032816, "grad_norm": 0.49020959423780786, "learning_rate": 1.8313057583130578e-05, "loss": 0.5241, "step": 22961 }, { "epoch": 0.670403783831129, "grad_norm": 0.5163559638019602, "learning_rate": 1.8311435523114356e-05, "loss": 0.576, "step": 22962 }, { "epoch": 0.6704329800589763, "grad_norm": 0.5144140168686437, "learning_rate": 1.8309813463098135e-05, "loss": 0.5596, "step": 22963 }, { "epoch": 0.6704621762868237, "grad_norm": 0.5001321918602746, "learning_rate": 1.8308191403081916e-05, "loss": 0.5539, "step": 22964 }, { "epoch": 0.6704913725146711, "grad_norm": 0.5501550138467554, "learning_rate": 1.8306569343065695e-05, "loss": 0.6586, "step": 22965 }, { "epoch": 0.6705205687425184, "grad_norm": 0.5084686206975559, "learning_rate": 1.8304947283049473e-05, "loss": 0.6066, "step": 22966 }, { "epoch": 0.6705497649703658, "grad_norm": 0.5216565332391521, "learning_rate": 1.830332522303325e-05, "loss": 0.5468, "step": 22967 }, { "epoch": 0.6705789611982131, "grad_norm": 0.5330051623337861, "learning_rate": 1.8301703163017033e-05, "loss": 0.6141, "step": 22968 }, { "epoch": 0.6706081574260605, "grad_norm": 0.5458821929897206, "learning_rate": 1.830008110300081e-05, "loss": 0.5917, "step": 22969 }, { "epoch": 0.6706373536539079, "grad_norm": 0.45841137399761805, "learning_rate": 1.8298459042984593e-05, "loss": 0.5009, "step": 22970 }, { "epoch": 0.6706665498817552, "grad_norm": 0.5227720270887987, "learning_rate": 1.829683698296837e-05, "loss": 0.5735, "step": 22971 }, { "epoch": 0.6706957461096026, "grad_norm": 0.4922522380794449, "learning_rate": 1.829521492295215e-05, "loss": 0.5497, "step": 22972 }, { "epoch": 0.67072494233745, "grad_norm": 0.5094958143331865, "learning_rate": 1.829359286293593e-05, "loss": 0.5485, "step": 22973 }, { "epoch": 0.6707541385652973, "grad_norm": 0.5251792795825324, "learning_rate": 1.8291970802919707e-05, "loss": 0.6298, "step": 22974 }, { "epoch": 0.6707833347931448, "grad_norm": 0.5164844352116676, "learning_rate": 1.829034874290349e-05, "loss": 0.612, "step": 22975 }, { "epoch": 0.6708125310209921, "grad_norm": 0.555713903529476, "learning_rate": 1.8288726682887267e-05, "loss": 0.6402, "step": 22976 }, { "epoch": 0.6708417272488395, "grad_norm": 0.5546893756457392, "learning_rate": 1.8287104622871045e-05, "loss": 0.6735, "step": 22977 }, { "epoch": 0.6708709234766869, "grad_norm": 0.5315792236339784, "learning_rate": 1.8285482562854827e-05, "loss": 0.6237, "step": 22978 }, { "epoch": 0.6709001197045342, "grad_norm": 0.49382299635806226, "learning_rate": 1.8283860502838605e-05, "loss": 0.5231, "step": 22979 }, { "epoch": 0.6709293159323816, "grad_norm": 0.518610408828829, "learning_rate": 1.8282238442822387e-05, "loss": 0.5617, "step": 22980 }, { "epoch": 0.670958512160229, "grad_norm": 0.5197711004144872, "learning_rate": 1.8280616382806166e-05, "loss": 0.5886, "step": 22981 }, { "epoch": 0.6709877083880763, "grad_norm": 0.5763136734721056, "learning_rate": 1.8278994322789944e-05, "loss": 0.6791, "step": 22982 }, { "epoch": 0.6710169046159237, "grad_norm": 0.5372147771253591, "learning_rate": 1.8277372262773722e-05, "loss": 0.643, "step": 22983 }, { "epoch": 0.671046100843771, "grad_norm": 0.5217070598047681, "learning_rate": 1.82757502027575e-05, "loss": 0.6239, "step": 22984 }, { "epoch": 0.6710752970716184, "grad_norm": 0.5427483911577963, "learning_rate": 1.8274128142741282e-05, "loss": 0.6278, "step": 22985 }, { "epoch": 0.6711044932994658, "grad_norm": 0.5047357232154364, "learning_rate": 1.827250608272506e-05, "loss": 0.5529, "step": 22986 }, { "epoch": 0.6711336895273131, "grad_norm": 0.5205930869412656, "learning_rate": 1.8270884022708843e-05, "loss": 0.5938, "step": 22987 }, { "epoch": 0.6711628857551605, "grad_norm": 0.5052526785029474, "learning_rate": 1.826926196269262e-05, "loss": 0.5723, "step": 22988 }, { "epoch": 0.6711920819830078, "grad_norm": 0.5423215493614856, "learning_rate": 1.82676399026764e-05, "loss": 0.6121, "step": 22989 }, { "epoch": 0.6712212782108552, "grad_norm": 0.557838097293638, "learning_rate": 1.826601784266018e-05, "loss": 0.6429, "step": 22990 }, { "epoch": 0.6712504744387026, "grad_norm": 0.5502406557718071, "learning_rate": 1.826439578264396e-05, "loss": 0.6378, "step": 22991 }, { "epoch": 0.6712796706665499, "grad_norm": 0.5002806383552122, "learning_rate": 1.8262773722627738e-05, "loss": 0.5547, "step": 22992 }, { "epoch": 0.6713088668943973, "grad_norm": 0.5167105742731524, "learning_rate": 1.8261151662611516e-05, "loss": 0.6222, "step": 22993 }, { "epoch": 0.6713380631222446, "grad_norm": 0.5576115965617274, "learning_rate": 1.8259529602595295e-05, "loss": 0.5983, "step": 22994 }, { "epoch": 0.671367259350092, "grad_norm": 0.5108108003110577, "learning_rate": 1.8257907542579076e-05, "loss": 0.5787, "step": 22995 }, { "epoch": 0.6713964555779394, "grad_norm": 0.5089571126403362, "learning_rate": 1.8256285482562855e-05, "loss": 0.6043, "step": 22996 }, { "epoch": 0.6714256518057867, "grad_norm": 0.5270970076150183, "learning_rate": 1.8254663422546637e-05, "loss": 0.6453, "step": 22997 }, { "epoch": 0.6714548480336341, "grad_norm": 0.529641422908525, "learning_rate": 1.8253041362530415e-05, "loss": 0.6296, "step": 22998 }, { "epoch": 0.6714840442614814, "grad_norm": 0.4942049840726582, "learning_rate": 1.8251419302514193e-05, "loss": 0.5545, "step": 22999 }, { "epoch": 0.6715132404893288, "grad_norm": 0.5464249931835025, "learning_rate": 1.8249797242497975e-05, "loss": 0.6091, "step": 23000 }, { "epoch": 0.6715424367171762, "grad_norm": 0.5585336123571141, "learning_rate": 1.8248175182481753e-05, "loss": 0.5598, "step": 23001 }, { "epoch": 0.6715716329450235, "grad_norm": 0.5452872828688433, "learning_rate": 1.8246553122465532e-05, "loss": 0.6347, "step": 23002 }, { "epoch": 0.6716008291728709, "grad_norm": 0.5145104579413137, "learning_rate": 1.824493106244931e-05, "loss": 0.5829, "step": 23003 }, { "epoch": 0.6716300254007183, "grad_norm": 0.5189111907537546, "learning_rate": 1.824330900243309e-05, "loss": 0.5973, "step": 23004 }, { "epoch": 0.6716592216285656, "grad_norm": 0.5366943181257047, "learning_rate": 1.824168694241687e-05, "loss": 0.6107, "step": 23005 }, { "epoch": 0.671688417856413, "grad_norm": 0.50768490178222, "learning_rate": 1.8240064882400652e-05, "loss": 0.6049, "step": 23006 }, { "epoch": 0.6717176140842603, "grad_norm": 0.5244433290171964, "learning_rate": 1.823844282238443e-05, "loss": 0.622, "step": 23007 }, { "epoch": 0.6717468103121077, "grad_norm": 0.5296395768960419, "learning_rate": 1.823682076236821e-05, "loss": 0.6465, "step": 23008 }, { "epoch": 0.6717760065399551, "grad_norm": 0.5522437125086938, "learning_rate": 1.8235198702351987e-05, "loss": 0.6612, "step": 23009 }, { "epoch": 0.6718052027678024, "grad_norm": 0.44435890404036005, "learning_rate": 1.823357664233577e-05, "loss": 0.4457, "step": 23010 }, { "epoch": 0.6718343989956498, "grad_norm": 0.5565496558879958, "learning_rate": 1.8231954582319547e-05, "loss": 0.6104, "step": 23011 }, { "epoch": 0.6718635952234971, "grad_norm": 0.5576701673968111, "learning_rate": 1.8230332522303326e-05, "loss": 0.6632, "step": 23012 }, { "epoch": 0.6718927914513445, "grad_norm": 0.5729386961104904, "learning_rate": 1.8228710462287104e-05, "loss": 0.6941, "step": 23013 }, { "epoch": 0.6719219876791919, "grad_norm": 0.5441833215221362, "learning_rate": 1.8227088402270882e-05, "loss": 0.6407, "step": 23014 }, { "epoch": 0.6719511839070392, "grad_norm": 0.5168482473897775, "learning_rate": 1.8225466342254664e-05, "loss": 0.5857, "step": 23015 }, { "epoch": 0.6719803801348866, "grad_norm": 0.4962142088530991, "learning_rate": 1.8223844282238446e-05, "loss": 0.5464, "step": 23016 }, { "epoch": 0.672009576362734, "grad_norm": 0.48319931187425635, "learning_rate": 1.8222222222222224e-05, "loss": 0.5316, "step": 23017 }, { "epoch": 0.6720387725905813, "grad_norm": 0.5243699891650011, "learning_rate": 1.8220600162206003e-05, "loss": 0.5581, "step": 23018 }, { "epoch": 0.6720679688184287, "grad_norm": 0.5409539539965003, "learning_rate": 1.821897810218978e-05, "loss": 0.6409, "step": 23019 }, { "epoch": 0.672097165046276, "grad_norm": 0.5329288122684452, "learning_rate": 1.8217356042173563e-05, "loss": 0.6019, "step": 23020 }, { "epoch": 0.6721263612741234, "grad_norm": 0.5358033965595586, "learning_rate": 1.821573398215734e-05, "loss": 0.6249, "step": 23021 }, { "epoch": 0.6721555575019708, "grad_norm": 0.517536381406693, "learning_rate": 1.821411192214112e-05, "loss": 0.5621, "step": 23022 }, { "epoch": 0.6721847537298181, "grad_norm": 0.5019442767645151, "learning_rate": 1.8212489862124898e-05, "loss": 0.5296, "step": 23023 }, { "epoch": 0.6722139499576655, "grad_norm": 0.5011565700135255, "learning_rate": 1.8210867802108676e-05, "loss": 0.5746, "step": 23024 }, { "epoch": 0.6722431461855128, "grad_norm": 0.5236487865961014, "learning_rate": 1.8209245742092458e-05, "loss": 0.6119, "step": 23025 }, { "epoch": 0.6722723424133602, "grad_norm": 0.4896652708942532, "learning_rate": 1.820762368207624e-05, "loss": 0.5385, "step": 23026 }, { "epoch": 0.6723015386412076, "grad_norm": 0.5020931178000806, "learning_rate": 1.8206001622060018e-05, "loss": 0.5643, "step": 23027 }, { "epoch": 0.6723307348690549, "grad_norm": 0.5057292018449308, "learning_rate": 1.8204379562043797e-05, "loss": 0.575, "step": 23028 }, { "epoch": 0.6723599310969023, "grad_norm": 0.5195841050393797, "learning_rate": 1.8202757502027575e-05, "loss": 0.6152, "step": 23029 }, { "epoch": 0.6723891273247496, "grad_norm": 0.5082795336113866, "learning_rate": 1.8201135442011353e-05, "loss": 0.5977, "step": 23030 }, { "epoch": 0.672418323552597, "grad_norm": 0.5489657743145817, "learning_rate": 1.8199513381995135e-05, "loss": 0.6209, "step": 23031 }, { "epoch": 0.6724475197804444, "grad_norm": 0.4838757971179643, "learning_rate": 1.8197891321978913e-05, "loss": 0.5149, "step": 23032 }, { "epoch": 0.6724767160082917, "grad_norm": 0.5323000383436493, "learning_rate": 1.8196269261962692e-05, "loss": 0.6353, "step": 23033 }, { "epoch": 0.6725059122361391, "grad_norm": 0.4700674970994327, "learning_rate": 1.8194647201946474e-05, "loss": 0.5194, "step": 23034 }, { "epoch": 0.6725351084639865, "grad_norm": 0.5369376602505886, "learning_rate": 1.8193025141930252e-05, "loss": 0.6458, "step": 23035 }, { "epoch": 0.6725643046918338, "grad_norm": 0.5169518402675287, "learning_rate": 1.8191403081914034e-05, "loss": 0.5984, "step": 23036 }, { "epoch": 0.6725935009196812, "grad_norm": 0.5095967048623181, "learning_rate": 1.8189781021897812e-05, "loss": 0.5689, "step": 23037 }, { "epoch": 0.6726226971475285, "grad_norm": 0.5178843140632239, "learning_rate": 1.818815896188159e-05, "loss": 0.5933, "step": 23038 }, { "epoch": 0.6726518933753759, "grad_norm": 0.49313272497081945, "learning_rate": 1.818653690186537e-05, "loss": 0.5653, "step": 23039 }, { "epoch": 0.6726810896032233, "grad_norm": 0.5136645915972317, "learning_rate": 1.8184914841849147e-05, "loss": 0.5667, "step": 23040 }, { "epoch": 0.6727102858310706, "grad_norm": 0.5530222421483219, "learning_rate": 1.818329278183293e-05, "loss": 0.6557, "step": 23041 }, { "epoch": 0.672739482058918, "grad_norm": 0.5152665663229278, "learning_rate": 1.8181670721816707e-05, "loss": 0.5598, "step": 23042 }, { "epoch": 0.6727686782867653, "grad_norm": 0.4658403296382303, "learning_rate": 1.8180048661800486e-05, "loss": 0.496, "step": 23043 }, { "epoch": 0.6727978745146127, "grad_norm": 0.5435620839203416, "learning_rate": 1.8178426601784268e-05, "loss": 0.6344, "step": 23044 }, { "epoch": 0.6728270707424601, "grad_norm": 0.5527167586623348, "learning_rate": 1.8176804541768046e-05, "loss": 0.654, "step": 23045 }, { "epoch": 0.6728562669703074, "grad_norm": 0.49622890534116615, "learning_rate": 1.8175182481751828e-05, "loss": 0.5126, "step": 23046 }, { "epoch": 0.6728854631981548, "grad_norm": 0.47487264456633094, "learning_rate": 1.8173560421735606e-05, "loss": 0.5136, "step": 23047 }, { "epoch": 0.6729146594260021, "grad_norm": 0.5088207582510509, "learning_rate": 1.8171938361719384e-05, "loss": 0.5811, "step": 23048 }, { "epoch": 0.6729438556538495, "grad_norm": 0.5149620470863608, "learning_rate": 1.8170316301703163e-05, "loss": 0.6165, "step": 23049 }, { "epoch": 0.6729730518816969, "grad_norm": 0.5020841398703753, "learning_rate": 1.816869424168694e-05, "loss": 0.5681, "step": 23050 }, { "epoch": 0.6730022481095442, "grad_norm": 0.511134362980988, "learning_rate": 1.8167072181670723e-05, "loss": 0.6038, "step": 23051 }, { "epoch": 0.6730314443373916, "grad_norm": 0.5339914887973889, "learning_rate": 1.81654501216545e-05, "loss": 0.599, "step": 23052 }, { "epoch": 0.673060640565239, "grad_norm": 0.5042253616432169, "learning_rate": 1.8163828061638283e-05, "loss": 0.5695, "step": 23053 }, { "epoch": 0.6730898367930863, "grad_norm": 0.4987404745428374, "learning_rate": 1.816220600162206e-05, "loss": 0.5868, "step": 23054 }, { "epoch": 0.6731190330209337, "grad_norm": 0.5574912132860607, "learning_rate": 1.816058394160584e-05, "loss": 0.6208, "step": 23055 }, { "epoch": 0.673148229248781, "grad_norm": 0.5099664939376377, "learning_rate": 1.815896188158962e-05, "loss": 0.5863, "step": 23056 }, { "epoch": 0.6731774254766284, "grad_norm": 0.5102664283610168, "learning_rate": 1.81573398215734e-05, "loss": 0.5549, "step": 23057 }, { "epoch": 0.6732066217044758, "grad_norm": 0.5179496302370696, "learning_rate": 1.8155717761557178e-05, "loss": 0.5861, "step": 23058 }, { "epoch": 0.6732358179323231, "grad_norm": 0.5252077808628145, "learning_rate": 1.8154095701540957e-05, "loss": 0.5995, "step": 23059 }, { "epoch": 0.6732650141601705, "grad_norm": 0.49247768055861785, "learning_rate": 1.8152473641524735e-05, "loss": 0.5603, "step": 23060 }, { "epoch": 0.6732942103880178, "grad_norm": 0.4619063916345802, "learning_rate": 1.8150851581508517e-05, "loss": 0.478, "step": 23061 }, { "epoch": 0.6733234066158652, "grad_norm": 0.55319043947011, "learning_rate": 1.8149229521492295e-05, "loss": 0.6431, "step": 23062 }, { "epoch": 0.6733526028437126, "grad_norm": 0.5203291568269086, "learning_rate": 1.8147607461476077e-05, "loss": 0.5713, "step": 23063 }, { "epoch": 0.6733817990715599, "grad_norm": 0.4940978731376965, "learning_rate": 1.8145985401459855e-05, "loss": 0.5634, "step": 23064 }, { "epoch": 0.6734109952994073, "grad_norm": 0.5018980499705207, "learning_rate": 1.8144363341443634e-05, "loss": 0.5468, "step": 23065 }, { "epoch": 0.6734401915272547, "grad_norm": 0.5104546222191263, "learning_rate": 1.8142741281427415e-05, "loss": 0.5993, "step": 23066 }, { "epoch": 0.673469387755102, "grad_norm": 0.4808638800369219, "learning_rate": 1.8141119221411194e-05, "loss": 0.4961, "step": 23067 }, { "epoch": 0.6734985839829494, "grad_norm": 0.5365671645838721, "learning_rate": 1.8139497161394972e-05, "loss": 0.5876, "step": 23068 }, { "epoch": 0.6735277802107967, "grad_norm": 0.5083789565010951, "learning_rate": 1.813787510137875e-05, "loss": 0.5439, "step": 23069 }, { "epoch": 0.6735569764386441, "grad_norm": 0.5799217646562884, "learning_rate": 1.813625304136253e-05, "loss": 0.7002, "step": 23070 }, { "epoch": 0.6735861726664915, "grad_norm": 0.5518306961182695, "learning_rate": 1.813463098134631e-05, "loss": 0.6414, "step": 23071 }, { "epoch": 0.6736153688943388, "grad_norm": 0.5074266493473097, "learning_rate": 1.8133008921330092e-05, "loss": 0.5574, "step": 23072 }, { "epoch": 0.6736445651221862, "grad_norm": 0.5241498898372795, "learning_rate": 1.813138686131387e-05, "loss": 0.5823, "step": 23073 }, { "epoch": 0.6736737613500335, "grad_norm": 0.513341199927585, "learning_rate": 1.812976480129765e-05, "loss": 0.5616, "step": 23074 }, { "epoch": 0.6737029575778809, "grad_norm": 0.5239507197182763, "learning_rate": 1.8128142741281428e-05, "loss": 0.5812, "step": 23075 }, { "epoch": 0.6737321538057283, "grad_norm": 0.5101556971073101, "learning_rate": 1.812652068126521e-05, "loss": 0.5667, "step": 23076 }, { "epoch": 0.6737613500335756, "grad_norm": 0.4993902296977515, "learning_rate": 1.8124898621248988e-05, "loss": 0.5677, "step": 23077 }, { "epoch": 0.673790546261423, "grad_norm": 0.46479082358753643, "learning_rate": 1.8123276561232766e-05, "loss": 0.5022, "step": 23078 }, { "epoch": 0.6738197424892703, "grad_norm": 0.5547020206550077, "learning_rate": 1.8121654501216544e-05, "loss": 0.6541, "step": 23079 }, { "epoch": 0.6738489387171177, "grad_norm": 0.4947736089757717, "learning_rate": 1.8120032441200323e-05, "loss": 0.5616, "step": 23080 }, { "epoch": 0.6738781349449651, "grad_norm": 0.532459858672565, "learning_rate": 1.8118410381184105e-05, "loss": 0.5925, "step": 23081 }, { "epoch": 0.6739073311728124, "grad_norm": 0.5378164292106552, "learning_rate": 1.8116788321167886e-05, "loss": 0.5953, "step": 23082 }, { "epoch": 0.6739365274006598, "grad_norm": 0.4997008689904721, "learning_rate": 1.8115166261151665e-05, "loss": 0.5378, "step": 23083 }, { "epoch": 0.6739657236285072, "grad_norm": 0.7384328142762829, "learning_rate": 1.8113544201135443e-05, "loss": 0.5666, "step": 23084 }, { "epoch": 0.6739949198563545, "grad_norm": 0.5344857194354072, "learning_rate": 1.811192214111922e-05, "loss": 0.6015, "step": 23085 }, { "epoch": 0.6740241160842019, "grad_norm": 0.5466230724592029, "learning_rate": 1.8110300081103003e-05, "loss": 0.6111, "step": 23086 }, { "epoch": 0.6740533123120492, "grad_norm": 0.5460132765665027, "learning_rate": 1.810867802108678e-05, "loss": 0.6449, "step": 23087 }, { "epoch": 0.6740825085398966, "grad_norm": 0.5191719748781372, "learning_rate": 1.810705596107056e-05, "loss": 0.5706, "step": 23088 }, { "epoch": 0.674111704767744, "grad_norm": 0.48555060608556144, "learning_rate": 1.810543390105434e-05, "loss": 0.4935, "step": 23089 }, { "epoch": 0.6741409009955913, "grad_norm": 0.5227949044597129, "learning_rate": 1.8103811841038117e-05, "loss": 0.6066, "step": 23090 }, { "epoch": 0.6741700972234387, "grad_norm": 0.5843224696295538, "learning_rate": 1.81021897810219e-05, "loss": 0.6396, "step": 23091 }, { "epoch": 0.674199293451286, "grad_norm": 0.5020336790722113, "learning_rate": 1.810056772100568e-05, "loss": 0.5653, "step": 23092 }, { "epoch": 0.6742284896791334, "grad_norm": 0.5502329754729046, "learning_rate": 1.809894566098946e-05, "loss": 0.6836, "step": 23093 }, { "epoch": 0.6742576859069808, "grad_norm": 0.539887150962257, "learning_rate": 1.8097323600973237e-05, "loss": 0.6427, "step": 23094 }, { "epoch": 0.6742868821348281, "grad_norm": 0.48643380663376196, "learning_rate": 1.8095701540957015e-05, "loss": 0.5078, "step": 23095 }, { "epoch": 0.6743160783626756, "grad_norm": 0.5255898962084503, "learning_rate": 1.8094079480940794e-05, "loss": 0.6034, "step": 23096 }, { "epoch": 0.674345274590523, "grad_norm": 0.538518828318809, "learning_rate": 1.8092457420924576e-05, "loss": 0.6249, "step": 23097 }, { "epoch": 0.6743744708183703, "grad_norm": 0.5053324283318077, "learning_rate": 1.8090835360908354e-05, "loss": 0.5419, "step": 23098 }, { "epoch": 0.6744036670462177, "grad_norm": 0.5170102703717918, "learning_rate": 1.8089213300892132e-05, "loss": 0.5735, "step": 23099 }, { "epoch": 0.674432863274065, "grad_norm": 0.5280328636320367, "learning_rate": 1.8087591240875914e-05, "loss": 0.6176, "step": 23100 }, { "epoch": 0.6744620595019124, "grad_norm": 0.5077599487618853, "learning_rate": 1.8085969180859692e-05, "loss": 0.5904, "step": 23101 }, { "epoch": 0.6744912557297598, "grad_norm": 0.547799751322932, "learning_rate": 1.8084347120843474e-05, "loss": 0.6398, "step": 23102 }, { "epoch": 0.6745204519576071, "grad_norm": 0.5468387472299872, "learning_rate": 1.8082725060827253e-05, "loss": 0.6364, "step": 23103 }, { "epoch": 0.6745496481854545, "grad_norm": 0.5358471124051727, "learning_rate": 1.808110300081103e-05, "loss": 0.6365, "step": 23104 }, { "epoch": 0.6745788444133018, "grad_norm": 0.5255915650524762, "learning_rate": 1.807948094079481e-05, "loss": 0.6143, "step": 23105 }, { "epoch": 0.6746080406411492, "grad_norm": 0.49915745664137157, "learning_rate": 1.8077858880778588e-05, "loss": 0.5077, "step": 23106 }, { "epoch": 0.6746372368689966, "grad_norm": 0.546355333546863, "learning_rate": 1.807623682076237e-05, "loss": 0.6433, "step": 23107 }, { "epoch": 0.6746664330968439, "grad_norm": 0.5039577200791457, "learning_rate": 1.8074614760746148e-05, "loss": 0.5695, "step": 23108 }, { "epoch": 0.6746956293246913, "grad_norm": 0.5533102797725147, "learning_rate": 1.8072992700729926e-05, "loss": 0.6656, "step": 23109 }, { "epoch": 0.6747248255525387, "grad_norm": 0.4737507471576406, "learning_rate": 1.8071370640713708e-05, "loss": 0.522, "step": 23110 }, { "epoch": 0.674754021780386, "grad_norm": 0.5209477649711669, "learning_rate": 1.8069748580697486e-05, "loss": 0.5504, "step": 23111 }, { "epoch": 0.6747832180082334, "grad_norm": 0.5164990855637571, "learning_rate": 1.8068126520681268e-05, "loss": 0.5993, "step": 23112 }, { "epoch": 0.6748124142360807, "grad_norm": 0.5098466553034605, "learning_rate": 1.8066504460665046e-05, "loss": 0.5292, "step": 23113 }, { "epoch": 0.6748416104639281, "grad_norm": 0.4969224519579003, "learning_rate": 1.8064882400648825e-05, "loss": 0.5769, "step": 23114 }, { "epoch": 0.6748708066917755, "grad_norm": 0.5260790893753279, "learning_rate": 1.8063260340632603e-05, "loss": 0.6272, "step": 23115 }, { "epoch": 0.6749000029196228, "grad_norm": 0.5066520249974663, "learning_rate": 1.806163828061638e-05, "loss": 0.5341, "step": 23116 }, { "epoch": 0.6749291991474702, "grad_norm": 0.5181851545094152, "learning_rate": 1.8060016220600163e-05, "loss": 0.6016, "step": 23117 }, { "epoch": 0.6749583953753175, "grad_norm": 0.5097027889182195, "learning_rate": 1.8058394160583942e-05, "loss": 0.5622, "step": 23118 }, { "epoch": 0.6749875916031649, "grad_norm": 0.5442927806089407, "learning_rate": 1.8056772100567723e-05, "loss": 0.6316, "step": 23119 }, { "epoch": 0.6750167878310123, "grad_norm": 0.5055757853910782, "learning_rate": 1.8055150040551502e-05, "loss": 0.5257, "step": 23120 }, { "epoch": 0.6750459840588596, "grad_norm": 0.49317079335218167, "learning_rate": 1.805352798053528e-05, "loss": 0.5037, "step": 23121 }, { "epoch": 0.675075180286707, "grad_norm": 0.581087938853253, "learning_rate": 1.8051905920519062e-05, "loss": 0.6512, "step": 23122 }, { "epoch": 0.6751043765145543, "grad_norm": 0.5289835048941908, "learning_rate": 1.805028386050284e-05, "loss": 0.6001, "step": 23123 }, { "epoch": 0.6751335727424017, "grad_norm": 0.49005382299650063, "learning_rate": 1.804866180048662e-05, "loss": 0.5436, "step": 23124 }, { "epoch": 0.6751627689702491, "grad_norm": 0.5077985223018303, "learning_rate": 1.8047039740470397e-05, "loss": 0.6003, "step": 23125 }, { "epoch": 0.6751919651980964, "grad_norm": 0.5510836240512955, "learning_rate": 1.8045417680454175e-05, "loss": 0.6497, "step": 23126 }, { "epoch": 0.6752211614259438, "grad_norm": 0.5352992479954469, "learning_rate": 1.8043795620437957e-05, "loss": 0.6007, "step": 23127 }, { "epoch": 0.6752503576537912, "grad_norm": 0.5777803529940405, "learning_rate": 1.8042173560421736e-05, "loss": 0.7043, "step": 23128 }, { "epoch": 0.6752795538816385, "grad_norm": 0.52193277398134, "learning_rate": 1.8040551500405517e-05, "loss": 0.6025, "step": 23129 }, { "epoch": 0.6753087501094859, "grad_norm": 0.49873663608241064, "learning_rate": 1.8038929440389296e-05, "loss": 0.5488, "step": 23130 }, { "epoch": 0.6753379463373332, "grad_norm": 0.5153537098726106, "learning_rate": 1.8037307380373074e-05, "loss": 0.5477, "step": 23131 }, { "epoch": 0.6753671425651806, "grad_norm": 0.5219989328510413, "learning_rate": 1.8035685320356856e-05, "loss": 0.5509, "step": 23132 }, { "epoch": 0.675396338793028, "grad_norm": 0.5025936854389471, "learning_rate": 1.8034063260340634e-05, "loss": 0.5567, "step": 23133 }, { "epoch": 0.6754255350208753, "grad_norm": 0.5059507767939919, "learning_rate": 1.8032441200324413e-05, "loss": 0.5777, "step": 23134 }, { "epoch": 0.6754547312487227, "grad_norm": 0.4923322762321413, "learning_rate": 1.803081914030819e-05, "loss": 0.5025, "step": 23135 }, { "epoch": 0.67548392747657, "grad_norm": 0.5522554767904673, "learning_rate": 1.802919708029197e-05, "loss": 0.6578, "step": 23136 }, { "epoch": 0.6755131237044174, "grad_norm": 0.49235361764378177, "learning_rate": 1.802757502027575e-05, "loss": 0.5411, "step": 23137 }, { "epoch": 0.6755423199322648, "grad_norm": 0.4856837582606408, "learning_rate": 1.8025952960259533e-05, "loss": 0.4988, "step": 23138 }, { "epoch": 0.6755715161601121, "grad_norm": 0.4999829309895394, "learning_rate": 1.802433090024331e-05, "loss": 0.587, "step": 23139 }, { "epoch": 0.6756007123879595, "grad_norm": 0.5247553416115974, "learning_rate": 1.802270884022709e-05, "loss": 0.5979, "step": 23140 }, { "epoch": 0.6756299086158069, "grad_norm": 0.48981478294613945, "learning_rate": 1.8021086780210868e-05, "loss": 0.5304, "step": 23141 }, { "epoch": 0.6756591048436542, "grad_norm": 0.5011247841070593, "learning_rate": 1.801946472019465e-05, "loss": 0.5877, "step": 23142 }, { "epoch": 0.6756883010715016, "grad_norm": 0.560169598039216, "learning_rate": 1.8017842660178428e-05, "loss": 0.6419, "step": 23143 }, { "epoch": 0.6757174972993489, "grad_norm": 0.5271136337862992, "learning_rate": 1.8016220600162207e-05, "loss": 0.5823, "step": 23144 }, { "epoch": 0.6757466935271963, "grad_norm": 0.5346444146566721, "learning_rate": 1.8014598540145985e-05, "loss": 0.6006, "step": 23145 }, { "epoch": 0.6757758897550437, "grad_norm": 0.5414318620227525, "learning_rate": 1.8012976480129763e-05, "loss": 0.6437, "step": 23146 }, { "epoch": 0.675805085982891, "grad_norm": 0.535860012178104, "learning_rate": 1.8011354420113545e-05, "loss": 0.5532, "step": 23147 }, { "epoch": 0.6758342822107384, "grad_norm": 0.5229007163420895, "learning_rate": 1.8009732360097327e-05, "loss": 0.5931, "step": 23148 }, { "epoch": 0.6758634784385857, "grad_norm": 0.4961312502403833, "learning_rate": 1.8008110300081105e-05, "loss": 0.5383, "step": 23149 }, { "epoch": 0.6758926746664331, "grad_norm": 0.4858524430775045, "learning_rate": 1.8006488240064884e-05, "loss": 0.5207, "step": 23150 }, { "epoch": 0.6759218708942805, "grad_norm": 0.5000285647417533, "learning_rate": 1.8004866180048662e-05, "loss": 0.5789, "step": 23151 }, { "epoch": 0.6759510671221278, "grad_norm": 0.5160838151993876, "learning_rate": 1.800324412003244e-05, "loss": 0.5772, "step": 23152 }, { "epoch": 0.6759802633499752, "grad_norm": 0.5262807677470863, "learning_rate": 1.8001622060016222e-05, "loss": 0.5806, "step": 23153 }, { "epoch": 0.6760094595778225, "grad_norm": 0.5702584398621693, "learning_rate": 1.8e-05, "loss": 0.7094, "step": 23154 }, { "epoch": 0.6760386558056699, "grad_norm": 0.5425475579789627, "learning_rate": 1.799837793998378e-05, "loss": 0.6288, "step": 23155 }, { "epoch": 0.6760678520335173, "grad_norm": 0.5121886338433433, "learning_rate": 1.7996755879967557e-05, "loss": 0.5394, "step": 23156 }, { "epoch": 0.6760970482613646, "grad_norm": 0.5246584680346151, "learning_rate": 1.799513381995134e-05, "loss": 0.5937, "step": 23157 }, { "epoch": 0.676126244489212, "grad_norm": 0.51307535463434, "learning_rate": 1.799351175993512e-05, "loss": 0.6022, "step": 23158 }, { "epoch": 0.6761554407170594, "grad_norm": 0.4905692123335035, "learning_rate": 1.79918896999189e-05, "loss": 0.5429, "step": 23159 }, { "epoch": 0.6761846369449067, "grad_norm": 0.5118962631924981, "learning_rate": 1.7990267639902677e-05, "loss": 0.5748, "step": 23160 }, { "epoch": 0.6762138331727541, "grad_norm": 0.5315657267062968, "learning_rate": 1.7988645579886456e-05, "loss": 0.6231, "step": 23161 }, { "epoch": 0.6762430294006014, "grad_norm": 0.5474581327287376, "learning_rate": 1.7987023519870234e-05, "loss": 0.6368, "step": 23162 }, { "epoch": 0.6762722256284488, "grad_norm": 0.5310100138762194, "learning_rate": 1.7985401459854016e-05, "loss": 0.6077, "step": 23163 }, { "epoch": 0.6763014218562962, "grad_norm": 0.5133501884064555, "learning_rate": 1.7983779399837794e-05, "loss": 0.5657, "step": 23164 }, { "epoch": 0.6763306180841435, "grad_norm": 0.522150946564635, "learning_rate": 1.7982157339821573e-05, "loss": 0.5911, "step": 23165 }, { "epoch": 0.6763598143119909, "grad_norm": 0.5426505563953475, "learning_rate": 1.7980535279805354e-05, "loss": 0.644, "step": 23166 }, { "epoch": 0.6763890105398382, "grad_norm": 0.5337051765455504, "learning_rate": 1.7978913219789133e-05, "loss": 0.6407, "step": 23167 }, { "epoch": 0.6764182067676856, "grad_norm": 0.5631456990500486, "learning_rate": 1.7977291159772915e-05, "loss": 0.6389, "step": 23168 }, { "epoch": 0.676447402995533, "grad_norm": 0.5044251114343616, "learning_rate": 1.7975669099756693e-05, "loss": 0.5843, "step": 23169 }, { "epoch": 0.6764765992233803, "grad_norm": 0.47826715567466976, "learning_rate": 1.797404703974047e-05, "loss": 0.5044, "step": 23170 }, { "epoch": 0.6765057954512277, "grad_norm": 0.555915027169013, "learning_rate": 1.797242497972425e-05, "loss": 0.6083, "step": 23171 }, { "epoch": 0.676534991679075, "grad_norm": 0.5108103655710274, "learning_rate": 1.7970802919708028e-05, "loss": 0.5523, "step": 23172 }, { "epoch": 0.6765641879069224, "grad_norm": 0.5507767765204191, "learning_rate": 1.796918085969181e-05, "loss": 0.6283, "step": 23173 }, { "epoch": 0.6765933841347698, "grad_norm": 0.5457273669807331, "learning_rate": 1.7967558799675588e-05, "loss": 0.6012, "step": 23174 }, { "epoch": 0.6766225803626171, "grad_norm": 0.5176508992916422, "learning_rate": 1.7965936739659367e-05, "loss": 0.594, "step": 23175 }, { "epoch": 0.6766517765904645, "grad_norm": 0.5788551910536922, "learning_rate": 1.796431467964315e-05, "loss": 0.65, "step": 23176 }, { "epoch": 0.6766809728183119, "grad_norm": 0.5222465349154689, "learning_rate": 1.7962692619626927e-05, "loss": 0.5806, "step": 23177 }, { "epoch": 0.6767101690461592, "grad_norm": 0.4766420575098879, "learning_rate": 1.796107055961071e-05, "loss": 0.5018, "step": 23178 }, { "epoch": 0.6767393652740066, "grad_norm": 0.5163758707664964, "learning_rate": 1.7959448499594487e-05, "loss": 0.5637, "step": 23179 }, { "epoch": 0.6767685615018539, "grad_norm": 0.510949678472483, "learning_rate": 1.7957826439578265e-05, "loss": 0.5895, "step": 23180 }, { "epoch": 0.6767977577297013, "grad_norm": 0.5300829656888835, "learning_rate": 1.7956204379562044e-05, "loss": 0.5864, "step": 23181 }, { "epoch": 0.6768269539575487, "grad_norm": 0.5274125285282115, "learning_rate": 1.7954582319545822e-05, "loss": 0.5857, "step": 23182 }, { "epoch": 0.676856150185396, "grad_norm": 0.5957468215136943, "learning_rate": 1.7952960259529604e-05, "loss": 0.7279, "step": 23183 }, { "epoch": 0.6768853464132434, "grad_norm": 0.5010900681923499, "learning_rate": 1.7951338199513382e-05, "loss": 0.5236, "step": 23184 }, { "epoch": 0.6769145426410907, "grad_norm": 0.47524336098589587, "learning_rate": 1.7949716139497164e-05, "loss": 0.529, "step": 23185 }, { "epoch": 0.6769437388689381, "grad_norm": 0.5179733262849145, "learning_rate": 1.7948094079480942e-05, "loss": 0.5832, "step": 23186 }, { "epoch": 0.6769729350967855, "grad_norm": 0.50644399600374, "learning_rate": 1.794647201946472e-05, "loss": 0.4932, "step": 23187 }, { "epoch": 0.6770021313246328, "grad_norm": 0.47840869760706195, "learning_rate": 1.7944849959448502e-05, "loss": 0.5157, "step": 23188 }, { "epoch": 0.6770313275524802, "grad_norm": 0.5417004415618866, "learning_rate": 1.794322789943228e-05, "loss": 0.6494, "step": 23189 }, { "epoch": 0.6770605237803276, "grad_norm": 0.5096324270066898, "learning_rate": 1.794160583941606e-05, "loss": 0.5697, "step": 23190 }, { "epoch": 0.6770897200081749, "grad_norm": 0.4993626150545745, "learning_rate": 1.7939983779399838e-05, "loss": 0.5425, "step": 23191 }, { "epoch": 0.6771189162360223, "grad_norm": 0.5104451005168045, "learning_rate": 1.7938361719383616e-05, "loss": 0.5871, "step": 23192 }, { "epoch": 0.6771481124638696, "grad_norm": 0.5434262974967333, "learning_rate": 1.7936739659367398e-05, "loss": 0.6703, "step": 23193 }, { "epoch": 0.677177308691717, "grad_norm": 0.522251475095559, "learning_rate": 1.7935117599351176e-05, "loss": 0.6007, "step": 23194 }, { "epoch": 0.6772065049195644, "grad_norm": 0.5108383937069468, "learning_rate": 1.7933495539334958e-05, "loss": 0.5959, "step": 23195 }, { "epoch": 0.6772357011474117, "grad_norm": 0.5129487795550036, "learning_rate": 1.7931873479318736e-05, "loss": 0.5956, "step": 23196 }, { "epoch": 0.6772648973752591, "grad_norm": 0.5251444405209365, "learning_rate": 1.7930251419302515e-05, "loss": 0.6147, "step": 23197 }, { "epoch": 0.6772940936031064, "grad_norm": 0.5627290119029751, "learning_rate": 1.7928629359286296e-05, "loss": 0.6739, "step": 23198 }, { "epoch": 0.6773232898309538, "grad_norm": 0.5123975957565914, "learning_rate": 1.7927007299270075e-05, "loss": 0.6042, "step": 23199 }, { "epoch": 0.6773524860588012, "grad_norm": 0.5247169962779281, "learning_rate": 1.7925385239253853e-05, "loss": 0.6496, "step": 23200 }, { "epoch": 0.6773816822866485, "grad_norm": 0.5482565603220192, "learning_rate": 1.792376317923763e-05, "loss": 0.6193, "step": 23201 }, { "epoch": 0.6774108785144959, "grad_norm": 0.5426030747720743, "learning_rate": 1.792214111922141e-05, "loss": 0.6137, "step": 23202 }, { "epoch": 0.6774400747423432, "grad_norm": 0.4828007398121312, "learning_rate": 1.792051905920519e-05, "loss": 0.5267, "step": 23203 }, { "epoch": 0.6774692709701906, "grad_norm": 0.5150604964460328, "learning_rate": 1.7918896999188973e-05, "loss": 0.5424, "step": 23204 }, { "epoch": 0.677498467198038, "grad_norm": 0.48702059336461523, "learning_rate": 1.7917274939172752e-05, "loss": 0.5507, "step": 23205 }, { "epoch": 0.6775276634258853, "grad_norm": 0.4981456444272619, "learning_rate": 1.791565287915653e-05, "loss": 0.5266, "step": 23206 }, { "epoch": 0.6775568596537327, "grad_norm": 0.524124703314456, "learning_rate": 1.791403081914031e-05, "loss": 0.6219, "step": 23207 }, { "epoch": 0.67758605588158, "grad_norm": 0.5187480996618017, "learning_rate": 1.791240875912409e-05, "loss": 0.5845, "step": 23208 }, { "epoch": 0.6776152521094274, "grad_norm": 0.5052863371566678, "learning_rate": 1.791078669910787e-05, "loss": 0.5563, "step": 23209 }, { "epoch": 0.6776444483372748, "grad_norm": 0.5142584380572213, "learning_rate": 1.7909164639091647e-05, "loss": 0.587, "step": 23210 }, { "epoch": 0.6776736445651221, "grad_norm": 0.5611045092516709, "learning_rate": 1.7907542579075425e-05, "loss": 0.6489, "step": 23211 }, { "epoch": 0.6777028407929695, "grad_norm": 0.5299130157582288, "learning_rate": 1.7905920519059204e-05, "loss": 0.5532, "step": 23212 }, { "epoch": 0.6777320370208169, "grad_norm": 0.46750143549361156, "learning_rate": 1.7904298459042986e-05, "loss": 0.5138, "step": 23213 }, { "epoch": 0.6777612332486642, "grad_norm": 0.5076057801874837, "learning_rate": 1.7902676399026767e-05, "loss": 0.5549, "step": 23214 }, { "epoch": 0.6777904294765116, "grad_norm": 0.48325216883675776, "learning_rate": 1.7901054339010546e-05, "loss": 0.5174, "step": 23215 }, { "epoch": 0.677819625704359, "grad_norm": 0.5258374155312332, "learning_rate": 1.7899432278994324e-05, "loss": 0.6235, "step": 23216 }, { "epoch": 0.6778488219322064, "grad_norm": 0.49985615601647504, "learning_rate": 1.7897810218978102e-05, "loss": 0.5786, "step": 23217 }, { "epoch": 0.6778780181600538, "grad_norm": 0.5084493197953355, "learning_rate": 1.789618815896188e-05, "loss": 0.5849, "step": 23218 }, { "epoch": 0.6779072143879011, "grad_norm": 0.5315544644912548, "learning_rate": 1.7894566098945663e-05, "loss": 0.6069, "step": 23219 }, { "epoch": 0.6779364106157485, "grad_norm": 0.5262337143480641, "learning_rate": 1.789294403892944e-05, "loss": 0.6507, "step": 23220 }, { "epoch": 0.6779656068435959, "grad_norm": 0.47803351526568916, "learning_rate": 1.789132197891322e-05, "loss": 0.5316, "step": 23221 }, { "epoch": 0.6779948030714432, "grad_norm": 0.5011677062311476, "learning_rate": 1.7889699918896998e-05, "loss": 0.5742, "step": 23222 }, { "epoch": 0.6780239992992906, "grad_norm": 0.6154896369232663, "learning_rate": 1.788807785888078e-05, "loss": 0.6558, "step": 23223 }, { "epoch": 0.6780531955271379, "grad_norm": 0.5026398018712502, "learning_rate": 1.788645579886456e-05, "loss": 0.5525, "step": 23224 }, { "epoch": 0.6780823917549853, "grad_norm": 0.532370900255494, "learning_rate": 1.788483373884834e-05, "loss": 0.6129, "step": 23225 }, { "epoch": 0.6781115879828327, "grad_norm": 0.5418605398691244, "learning_rate": 1.7883211678832118e-05, "loss": 0.6891, "step": 23226 }, { "epoch": 0.67814078421068, "grad_norm": 0.5368013915642537, "learning_rate": 1.7881589618815896e-05, "loss": 0.6293, "step": 23227 }, { "epoch": 0.6781699804385274, "grad_norm": 0.5027756054232181, "learning_rate": 1.7879967558799675e-05, "loss": 0.5658, "step": 23228 }, { "epoch": 0.6781991766663747, "grad_norm": 0.5404612297666501, "learning_rate": 1.7878345498783456e-05, "loss": 0.6264, "step": 23229 }, { "epoch": 0.6782283728942221, "grad_norm": 0.4946440739950982, "learning_rate": 1.7876723438767235e-05, "loss": 0.5662, "step": 23230 }, { "epoch": 0.6782575691220695, "grad_norm": 0.5175032380169783, "learning_rate": 1.7875101378751013e-05, "loss": 0.6085, "step": 23231 }, { "epoch": 0.6782867653499168, "grad_norm": 0.5061546255318785, "learning_rate": 1.7873479318734795e-05, "loss": 0.6013, "step": 23232 }, { "epoch": 0.6783159615777642, "grad_norm": 0.5471709089090392, "learning_rate": 1.7871857258718573e-05, "loss": 0.6365, "step": 23233 }, { "epoch": 0.6783451578056116, "grad_norm": 0.5381706054749983, "learning_rate": 1.7870235198702355e-05, "loss": 0.6669, "step": 23234 }, { "epoch": 0.6783743540334589, "grad_norm": 0.5148946472517894, "learning_rate": 1.7868613138686133e-05, "loss": 0.5983, "step": 23235 }, { "epoch": 0.6784035502613063, "grad_norm": 0.481711467003769, "learning_rate": 1.7866991078669912e-05, "loss": 0.5409, "step": 23236 }, { "epoch": 0.6784327464891536, "grad_norm": 0.5298114480342034, "learning_rate": 1.786536901865369e-05, "loss": 0.6327, "step": 23237 }, { "epoch": 0.678461942717001, "grad_norm": 0.4944725817795151, "learning_rate": 1.786374695863747e-05, "loss": 0.5682, "step": 23238 }, { "epoch": 0.6784911389448484, "grad_norm": 0.5087831860837253, "learning_rate": 1.786212489862125e-05, "loss": 0.5862, "step": 23239 }, { "epoch": 0.6785203351726957, "grad_norm": 0.5450967994531911, "learning_rate": 1.786050283860503e-05, "loss": 0.6529, "step": 23240 }, { "epoch": 0.6785495314005431, "grad_norm": 0.47085236082847126, "learning_rate": 1.7858880778588807e-05, "loss": 0.5234, "step": 23241 }, { "epoch": 0.6785787276283904, "grad_norm": 0.5062090335363624, "learning_rate": 1.785725871857259e-05, "loss": 0.5823, "step": 23242 }, { "epoch": 0.6786079238562378, "grad_norm": 0.4925781951046308, "learning_rate": 1.7855636658556367e-05, "loss": 0.5599, "step": 23243 }, { "epoch": 0.6786371200840852, "grad_norm": 0.5233679402537312, "learning_rate": 1.785401459854015e-05, "loss": 0.5743, "step": 23244 }, { "epoch": 0.6786663163119325, "grad_norm": 0.5470142285806879, "learning_rate": 1.7852392538523927e-05, "loss": 0.6835, "step": 23245 }, { "epoch": 0.6786955125397799, "grad_norm": 0.5069957430518476, "learning_rate": 1.7850770478507706e-05, "loss": 0.5464, "step": 23246 }, { "epoch": 0.6787247087676272, "grad_norm": 0.47464032005639856, "learning_rate": 1.7849148418491484e-05, "loss": 0.4936, "step": 23247 }, { "epoch": 0.6787539049954746, "grad_norm": 0.5345700983911745, "learning_rate": 1.7847526358475262e-05, "loss": 0.5875, "step": 23248 }, { "epoch": 0.678783101223322, "grad_norm": 0.5254232194982584, "learning_rate": 1.7845904298459044e-05, "loss": 0.5782, "step": 23249 }, { "epoch": 0.6788122974511693, "grad_norm": 0.5489793825419576, "learning_rate": 1.7844282238442823e-05, "loss": 0.6493, "step": 23250 }, { "epoch": 0.6788414936790167, "grad_norm": 0.5439983350495001, "learning_rate": 1.7842660178426604e-05, "loss": 0.6484, "step": 23251 }, { "epoch": 0.678870689906864, "grad_norm": 0.5072448201379004, "learning_rate": 1.7841038118410383e-05, "loss": 0.5698, "step": 23252 }, { "epoch": 0.6788998861347114, "grad_norm": 0.5242843178341899, "learning_rate": 1.783941605839416e-05, "loss": 0.6089, "step": 23253 }, { "epoch": 0.6789290823625588, "grad_norm": 0.5243368645110924, "learning_rate": 1.7837793998377943e-05, "loss": 0.5752, "step": 23254 }, { "epoch": 0.6789582785904061, "grad_norm": 0.5563092417161586, "learning_rate": 1.783617193836172e-05, "loss": 0.6143, "step": 23255 }, { "epoch": 0.6789874748182535, "grad_norm": 0.5268338178828647, "learning_rate": 1.78345498783455e-05, "loss": 0.606, "step": 23256 }, { "epoch": 0.6790166710461009, "grad_norm": 0.5014170339402865, "learning_rate": 1.7832927818329278e-05, "loss": 0.6076, "step": 23257 }, { "epoch": 0.6790458672739482, "grad_norm": 0.5442660641301802, "learning_rate": 1.7831305758313056e-05, "loss": 0.6655, "step": 23258 }, { "epoch": 0.6790750635017956, "grad_norm": 0.5048766582414422, "learning_rate": 1.7829683698296838e-05, "loss": 0.5834, "step": 23259 }, { "epoch": 0.679104259729643, "grad_norm": 0.545544923157532, "learning_rate": 1.7828061638280617e-05, "loss": 0.616, "step": 23260 }, { "epoch": 0.6791334559574903, "grad_norm": 0.5457699156246452, "learning_rate": 1.7826439578264398e-05, "loss": 0.6297, "step": 23261 }, { "epoch": 0.6791626521853377, "grad_norm": 0.5058066177606982, "learning_rate": 1.7824817518248177e-05, "loss": 0.6029, "step": 23262 }, { "epoch": 0.679191848413185, "grad_norm": 0.5080242352072465, "learning_rate": 1.7823195458231955e-05, "loss": 0.5569, "step": 23263 }, { "epoch": 0.6792210446410324, "grad_norm": 0.5199132529295567, "learning_rate": 1.7821573398215737e-05, "loss": 0.5692, "step": 23264 }, { "epoch": 0.6792502408688798, "grad_norm": 0.48166750825961596, "learning_rate": 1.7819951338199515e-05, "loss": 0.544, "step": 23265 }, { "epoch": 0.6792794370967271, "grad_norm": 0.5233743808074319, "learning_rate": 1.7818329278183294e-05, "loss": 0.5936, "step": 23266 }, { "epoch": 0.6793086333245745, "grad_norm": 0.5101497360021559, "learning_rate": 1.7816707218167072e-05, "loss": 0.5791, "step": 23267 }, { "epoch": 0.6793378295524218, "grad_norm": 0.5614631207617102, "learning_rate": 1.781508515815085e-05, "loss": 0.698, "step": 23268 }, { "epoch": 0.6793670257802692, "grad_norm": 0.5330158116078123, "learning_rate": 1.7813463098134632e-05, "loss": 0.6215, "step": 23269 }, { "epoch": 0.6793962220081166, "grad_norm": 0.5282265472075242, "learning_rate": 1.7811841038118414e-05, "loss": 0.6069, "step": 23270 }, { "epoch": 0.6794254182359639, "grad_norm": 0.507699908468085, "learning_rate": 1.7810218978102192e-05, "loss": 0.6154, "step": 23271 }, { "epoch": 0.6794546144638113, "grad_norm": 0.5264115995310351, "learning_rate": 1.780859691808597e-05, "loss": 0.582, "step": 23272 }, { "epoch": 0.6794838106916586, "grad_norm": 0.5207348369101187, "learning_rate": 1.780697485806975e-05, "loss": 0.6025, "step": 23273 }, { "epoch": 0.679513006919506, "grad_norm": 0.5152855018906627, "learning_rate": 1.7805352798053527e-05, "loss": 0.636, "step": 23274 }, { "epoch": 0.6795422031473534, "grad_norm": 0.52276615088786, "learning_rate": 1.780373073803731e-05, "loss": 0.5915, "step": 23275 }, { "epoch": 0.6795713993752007, "grad_norm": 0.6156528329276512, "learning_rate": 1.7802108678021087e-05, "loss": 0.6257, "step": 23276 }, { "epoch": 0.6796005956030481, "grad_norm": 0.5098276361259854, "learning_rate": 1.7800486618004866e-05, "loss": 0.5844, "step": 23277 }, { "epoch": 0.6796297918308954, "grad_norm": 0.5054348579687097, "learning_rate": 1.7798864557988644e-05, "loss": 0.581, "step": 23278 }, { "epoch": 0.6796589880587428, "grad_norm": 0.5170443957764878, "learning_rate": 1.7797242497972426e-05, "loss": 0.6366, "step": 23279 }, { "epoch": 0.6796881842865902, "grad_norm": 0.5422645313305684, "learning_rate": 1.7795620437956208e-05, "loss": 0.6518, "step": 23280 }, { "epoch": 0.6797173805144375, "grad_norm": 0.5085144529704866, "learning_rate": 1.7793998377939986e-05, "loss": 0.5806, "step": 23281 }, { "epoch": 0.6797465767422849, "grad_norm": 0.5258204448732352, "learning_rate": 1.7792376317923764e-05, "loss": 0.6278, "step": 23282 }, { "epoch": 0.6797757729701323, "grad_norm": 0.5467935121078178, "learning_rate": 1.7790754257907543e-05, "loss": 0.5937, "step": 23283 }, { "epoch": 0.6798049691979796, "grad_norm": 0.5059917301269214, "learning_rate": 1.778913219789132e-05, "loss": 0.5517, "step": 23284 }, { "epoch": 0.679834165425827, "grad_norm": 0.4492150175684999, "learning_rate": 1.7787510137875103e-05, "loss": 0.4619, "step": 23285 }, { "epoch": 0.6798633616536743, "grad_norm": 0.5126741885619093, "learning_rate": 1.778588807785888e-05, "loss": 0.547, "step": 23286 }, { "epoch": 0.6798925578815217, "grad_norm": 0.49924893396244996, "learning_rate": 1.778426601784266e-05, "loss": 0.5467, "step": 23287 }, { "epoch": 0.6799217541093691, "grad_norm": 0.47940263948991124, "learning_rate": 1.7782643957826438e-05, "loss": 0.5111, "step": 23288 }, { "epoch": 0.6799509503372164, "grad_norm": 0.5382679667168749, "learning_rate": 1.778102189781022e-05, "loss": 0.6008, "step": 23289 }, { "epoch": 0.6799801465650638, "grad_norm": 0.507909185841827, "learning_rate": 1.7779399837794e-05, "loss": 0.5776, "step": 23290 }, { "epoch": 0.6800093427929111, "grad_norm": 0.533437410787404, "learning_rate": 1.777777777777778e-05, "loss": 0.6354, "step": 23291 }, { "epoch": 0.6800385390207585, "grad_norm": 0.5258442285243685, "learning_rate": 1.777615571776156e-05, "loss": 0.6074, "step": 23292 }, { "epoch": 0.6800677352486059, "grad_norm": 0.5206585901213235, "learning_rate": 1.7774533657745337e-05, "loss": 0.5954, "step": 23293 }, { "epoch": 0.6800969314764532, "grad_norm": 0.548412365753272, "learning_rate": 1.7772911597729115e-05, "loss": 0.6098, "step": 23294 }, { "epoch": 0.6801261277043006, "grad_norm": 0.4924172438223522, "learning_rate": 1.7771289537712897e-05, "loss": 0.5505, "step": 23295 }, { "epoch": 0.680155323932148, "grad_norm": 0.5482695180632778, "learning_rate": 1.7769667477696675e-05, "loss": 0.6332, "step": 23296 }, { "epoch": 0.6801845201599953, "grad_norm": 0.5161487177357982, "learning_rate": 1.7768045417680454e-05, "loss": 0.5596, "step": 23297 }, { "epoch": 0.6802137163878427, "grad_norm": 0.4945385282214998, "learning_rate": 1.7766423357664232e-05, "loss": 0.5309, "step": 23298 }, { "epoch": 0.68024291261569, "grad_norm": 0.5237000029274758, "learning_rate": 1.7764801297648014e-05, "loss": 0.5819, "step": 23299 }, { "epoch": 0.6802721088435374, "grad_norm": 0.5806574034974686, "learning_rate": 1.7763179237631796e-05, "loss": 0.7483, "step": 23300 }, { "epoch": 0.6803013050713848, "grad_norm": 0.52061692002058, "learning_rate": 1.7761557177615574e-05, "loss": 0.5906, "step": 23301 }, { "epoch": 0.6803305012992321, "grad_norm": 0.5207829238751258, "learning_rate": 1.7759935117599352e-05, "loss": 0.6006, "step": 23302 }, { "epoch": 0.6803596975270795, "grad_norm": 0.5347421650206338, "learning_rate": 1.775831305758313e-05, "loss": 0.6064, "step": 23303 }, { "epoch": 0.6803888937549268, "grad_norm": 0.5116076171923528, "learning_rate": 1.775669099756691e-05, "loss": 0.575, "step": 23304 }, { "epoch": 0.6804180899827742, "grad_norm": 0.5872495924521766, "learning_rate": 1.775506893755069e-05, "loss": 0.7259, "step": 23305 }, { "epoch": 0.6804472862106216, "grad_norm": 0.4998442697216877, "learning_rate": 1.775344687753447e-05, "loss": 0.5465, "step": 23306 }, { "epoch": 0.6804764824384689, "grad_norm": 0.5565084152428391, "learning_rate": 1.7751824817518248e-05, "loss": 0.6719, "step": 23307 }, { "epoch": 0.6805056786663163, "grad_norm": 0.5278078827384727, "learning_rate": 1.775020275750203e-05, "loss": 0.5847, "step": 23308 }, { "epoch": 0.6805348748941636, "grad_norm": 0.5096491307619003, "learning_rate": 1.7748580697485808e-05, "loss": 0.593, "step": 23309 }, { "epoch": 0.680564071122011, "grad_norm": 0.5069810278951133, "learning_rate": 1.774695863746959e-05, "loss": 0.5998, "step": 23310 }, { "epoch": 0.6805932673498584, "grad_norm": 0.5016313952889337, "learning_rate": 1.7745336577453368e-05, "loss": 0.602, "step": 23311 }, { "epoch": 0.6806224635777057, "grad_norm": 0.5084559335903003, "learning_rate": 1.7743714517437146e-05, "loss": 0.5705, "step": 23312 }, { "epoch": 0.6806516598055531, "grad_norm": 0.4811975206335835, "learning_rate": 1.7742092457420925e-05, "loss": 0.5305, "step": 23313 }, { "epoch": 0.6806808560334004, "grad_norm": 0.47646425428305095, "learning_rate": 1.7740470397404703e-05, "loss": 0.5402, "step": 23314 }, { "epoch": 0.6807100522612478, "grad_norm": 0.49346483817003334, "learning_rate": 1.7738848337388485e-05, "loss": 0.5485, "step": 23315 }, { "epoch": 0.6807392484890952, "grad_norm": 0.4994151346236652, "learning_rate": 1.7737226277372263e-05, "loss": 0.5447, "step": 23316 }, { "epoch": 0.6807684447169425, "grad_norm": 0.5375273024173945, "learning_rate": 1.7735604217356045e-05, "loss": 0.6159, "step": 23317 }, { "epoch": 0.6807976409447899, "grad_norm": 0.5317466738858351, "learning_rate": 1.7733982157339823e-05, "loss": 0.624, "step": 23318 }, { "epoch": 0.6808268371726373, "grad_norm": 0.5099496449019202, "learning_rate": 1.77323600973236e-05, "loss": 0.6222, "step": 23319 }, { "epoch": 0.6808560334004846, "grad_norm": 0.5636392249038099, "learning_rate": 1.7730738037307383e-05, "loss": 0.6449, "step": 23320 }, { "epoch": 0.680885229628332, "grad_norm": 0.5003380985841838, "learning_rate": 1.7729115977291162e-05, "loss": 0.565, "step": 23321 }, { "epoch": 0.6809144258561793, "grad_norm": 0.5544282268477233, "learning_rate": 1.772749391727494e-05, "loss": 0.6976, "step": 23322 }, { "epoch": 0.6809436220840267, "grad_norm": 0.46899209787865065, "learning_rate": 1.772587185725872e-05, "loss": 0.549, "step": 23323 }, { "epoch": 0.6809728183118741, "grad_norm": 0.5128668629492272, "learning_rate": 1.7724249797242497e-05, "loss": 0.6051, "step": 23324 }, { "epoch": 0.6810020145397214, "grad_norm": 0.47202072707686865, "learning_rate": 1.772262773722628e-05, "loss": 0.5182, "step": 23325 }, { "epoch": 0.6810312107675688, "grad_norm": 0.5183559866746212, "learning_rate": 1.7721005677210057e-05, "loss": 0.597, "step": 23326 }, { "epoch": 0.6810604069954161, "grad_norm": 0.5657789795907968, "learning_rate": 1.771938361719384e-05, "loss": 0.6144, "step": 23327 }, { "epoch": 0.6810896032232635, "grad_norm": 0.5424510480926761, "learning_rate": 1.7717761557177617e-05, "loss": 0.6451, "step": 23328 }, { "epoch": 0.6811187994511109, "grad_norm": 0.5265950430481026, "learning_rate": 1.7716139497161395e-05, "loss": 0.598, "step": 23329 }, { "epoch": 0.6811479956789582, "grad_norm": 0.5363487443336414, "learning_rate": 1.7714517437145177e-05, "loss": 0.5911, "step": 23330 }, { "epoch": 0.6811771919068056, "grad_norm": 0.5276861068811077, "learning_rate": 1.7712895377128956e-05, "loss": 0.6143, "step": 23331 }, { "epoch": 0.681206388134653, "grad_norm": 0.5200380875025187, "learning_rate": 1.7711273317112734e-05, "loss": 0.6082, "step": 23332 }, { "epoch": 0.6812355843625003, "grad_norm": 0.5458287104799199, "learning_rate": 1.7709651257096512e-05, "loss": 0.6326, "step": 23333 }, { "epoch": 0.6812647805903477, "grad_norm": 0.5057519919480633, "learning_rate": 1.770802919708029e-05, "loss": 0.5964, "step": 23334 }, { "epoch": 0.681293976818195, "grad_norm": 0.5448827726752612, "learning_rate": 1.7706407137064072e-05, "loss": 0.6567, "step": 23335 }, { "epoch": 0.6813231730460424, "grad_norm": 0.5234540220694228, "learning_rate": 1.7704785077047854e-05, "loss": 0.5701, "step": 23336 }, { "epoch": 0.6813523692738899, "grad_norm": 0.5053620992643569, "learning_rate": 1.7703163017031633e-05, "loss": 0.5679, "step": 23337 }, { "epoch": 0.6813815655017372, "grad_norm": 0.5002785905399426, "learning_rate": 1.770154095701541e-05, "loss": 0.5311, "step": 23338 }, { "epoch": 0.6814107617295846, "grad_norm": 0.516745556881859, "learning_rate": 1.769991889699919e-05, "loss": 0.6201, "step": 23339 }, { "epoch": 0.681439957957432, "grad_norm": 0.5303118919973626, "learning_rate": 1.7698296836982968e-05, "loss": 0.5751, "step": 23340 }, { "epoch": 0.6814691541852793, "grad_norm": 0.508281266713417, "learning_rate": 1.769667477696675e-05, "loss": 0.5431, "step": 23341 }, { "epoch": 0.6814983504131267, "grad_norm": 0.5386790428625167, "learning_rate": 1.7695052716950528e-05, "loss": 0.5917, "step": 23342 }, { "epoch": 0.681527546640974, "grad_norm": 0.5542571418144591, "learning_rate": 1.7693430656934306e-05, "loss": 0.6744, "step": 23343 }, { "epoch": 0.6815567428688214, "grad_norm": 0.5273438389787363, "learning_rate": 1.7691808596918085e-05, "loss": 0.5891, "step": 23344 }, { "epoch": 0.6815859390966688, "grad_norm": 0.522351227313786, "learning_rate": 1.7690186536901866e-05, "loss": 0.659, "step": 23345 }, { "epoch": 0.6816151353245161, "grad_norm": 0.5166626515299995, "learning_rate": 1.7688564476885648e-05, "loss": 0.5624, "step": 23346 }, { "epoch": 0.6816443315523635, "grad_norm": 0.5335712406848965, "learning_rate": 1.7686942416869427e-05, "loss": 0.5864, "step": 23347 }, { "epoch": 0.6816735277802108, "grad_norm": 0.49304967239284014, "learning_rate": 1.7685320356853205e-05, "loss": 0.5842, "step": 23348 }, { "epoch": 0.6817027240080582, "grad_norm": 0.4952504024098668, "learning_rate": 1.7683698296836983e-05, "loss": 0.5674, "step": 23349 }, { "epoch": 0.6817319202359056, "grad_norm": 0.5563742090459519, "learning_rate": 1.768207623682076e-05, "loss": 0.6881, "step": 23350 }, { "epoch": 0.6817611164637529, "grad_norm": 0.546017214759847, "learning_rate": 1.7680454176804543e-05, "loss": 0.7054, "step": 23351 }, { "epoch": 0.6817903126916003, "grad_norm": 0.5458325847891534, "learning_rate": 1.7678832116788322e-05, "loss": 0.697, "step": 23352 }, { "epoch": 0.6818195089194476, "grad_norm": 0.5038912280006981, "learning_rate": 1.76772100567721e-05, "loss": 0.5877, "step": 23353 }, { "epoch": 0.681848705147295, "grad_norm": 0.5403072412798727, "learning_rate": 1.767558799675588e-05, "loss": 0.6401, "step": 23354 }, { "epoch": 0.6818779013751424, "grad_norm": 0.4980828366700741, "learning_rate": 1.767396593673966e-05, "loss": 0.5462, "step": 23355 }, { "epoch": 0.6819070976029897, "grad_norm": 0.527664327771301, "learning_rate": 1.7672343876723442e-05, "loss": 0.6259, "step": 23356 }, { "epoch": 0.6819362938308371, "grad_norm": 0.48154043043008277, "learning_rate": 1.767072181670722e-05, "loss": 0.5341, "step": 23357 }, { "epoch": 0.6819654900586845, "grad_norm": 0.5547209178999198, "learning_rate": 1.7669099756691e-05, "loss": 0.6419, "step": 23358 }, { "epoch": 0.6819946862865318, "grad_norm": 0.47392776751886534, "learning_rate": 1.7667477696674777e-05, "loss": 0.5017, "step": 23359 }, { "epoch": 0.6820238825143792, "grad_norm": 0.522979626789299, "learning_rate": 1.7665855636658556e-05, "loss": 0.5464, "step": 23360 }, { "epoch": 0.6820530787422265, "grad_norm": 0.5261225390205618, "learning_rate": 1.7664233576642337e-05, "loss": 0.617, "step": 23361 }, { "epoch": 0.6820822749700739, "grad_norm": 0.4942246898384508, "learning_rate": 1.7662611516626116e-05, "loss": 0.5596, "step": 23362 }, { "epoch": 0.6821114711979213, "grad_norm": 0.5107297033219989, "learning_rate": 1.7660989456609894e-05, "loss": 0.5687, "step": 23363 }, { "epoch": 0.6821406674257686, "grad_norm": 0.5239270784550543, "learning_rate": 1.7659367396593672e-05, "loss": 0.5932, "step": 23364 }, { "epoch": 0.682169863653616, "grad_norm": 0.5113905625890988, "learning_rate": 1.7657745336577454e-05, "loss": 0.5943, "step": 23365 }, { "epoch": 0.6821990598814633, "grad_norm": 0.5091362036174221, "learning_rate": 1.7656123276561236e-05, "loss": 0.5643, "step": 23366 }, { "epoch": 0.6822282561093107, "grad_norm": 0.4905192115356489, "learning_rate": 1.7654501216545014e-05, "loss": 0.5015, "step": 23367 }, { "epoch": 0.6822574523371581, "grad_norm": 0.5208233194932845, "learning_rate": 1.7652879156528793e-05, "loss": 0.6175, "step": 23368 }, { "epoch": 0.6822866485650054, "grad_norm": 0.553094919977493, "learning_rate": 1.765125709651257e-05, "loss": 0.5969, "step": 23369 }, { "epoch": 0.6823158447928528, "grad_norm": 0.5369948192947861, "learning_rate": 1.764963503649635e-05, "loss": 0.6151, "step": 23370 }, { "epoch": 0.6823450410207001, "grad_norm": 0.49125730752430197, "learning_rate": 1.764801297648013e-05, "loss": 0.5507, "step": 23371 }, { "epoch": 0.6823742372485475, "grad_norm": 0.5574741441934769, "learning_rate": 1.764639091646391e-05, "loss": 0.7188, "step": 23372 }, { "epoch": 0.6824034334763949, "grad_norm": 0.5681845309601796, "learning_rate": 1.7644768856447688e-05, "loss": 0.6687, "step": 23373 }, { "epoch": 0.6824326297042422, "grad_norm": 0.5146779660797942, "learning_rate": 1.764314679643147e-05, "loss": 0.5741, "step": 23374 }, { "epoch": 0.6824618259320896, "grad_norm": 0.5417346665178515, "learning_rate": 1.7641524736415248e-05, "loss": 0.6593, "step": 23375 }, { "epoch": 0.682491022159937, "grad_norm": 0.5260074352045238, "learning_rate": 1.763990267639903e-05, "loss": 0.5918, "step": 23376 }, { "epoch": 0.6825202183877843, "grad_norm": 0.5227992457656617, "learning_rate": 1.7638280616382808e-05, "loss": 0.5774, "step": 23377 }, { "epoch": 0.6825494146156317, "grad_norm": 0.5257215922307397, "learning_rate": 1.7636658556366587e-05, "loss": 0.5774, "step": 23378 }, { "epoch": 0.682578610843479, "grad_norm": 0.5361744100606642, "learning_rate": 1.7635036496350365e-05, "loss": 0.6299, "step": 23379 }, { "epoch": 0.6826078070713264, "grad_norm": 0.5136909115222767, "learning_rate": 1.7633414436334143e-05, "loss": 0.6089, "step": 23380 }, { "epoch": 0.6826370032991738, "grad_norm": 0.5337523908663219, "learning_rate": 1.7631792376317925e-05, "loss": 0.6223, "step": 23381 }, { "epoch": 0.6826661995270211, "grad_norm": 0.5068272061744167, "learning_rate": 1.7630170316301703e-05, "loss": 0.5265, "step": 23382 }, { "epoch": 0.6826953957548685, "grad_norm": 0.5188706462905315, "learning_rate": 1.7628548256285485e-05, "loss": 0.5563, "step": 23383 }, { "epoch": 0.6827245919827158, "grad_norm": 0.48069194198330656, "learning_rate": 1.7626926196269264e-05, "loss": 0.5277, "step": 23384 }, { "epoch": 0.6827537882105632, "grad_norm": 0.5118801003986755, "learning_rate": 1.7625304136253042e-05, "loss": 0.5572, "step": 23385 }, { "epoch": 0.6827829844384106, "grad_norm": 0.5169492067083383, "learning_rate": 1.7623682076236824e-05, "loss": 0.5858, "step": 23386 }, { "epoch": 0.6828121806662579, "grad_norm": 0.5242421950667099, "learning_rate": 1.7622060016220602e-05, "loss": 0.6185, "step": 23387 }, { "epoch": 0.6828413768941053, "grad_norm": 0.4999382688162057, "learning_rate": 1.762043795620438e-05, "loss": 0.5956, "step": 23388 }, { "epoch": 0.6828705731219527, "grad_norm": 0.5142247931306755, "learning_rate": 1.761881589618816e-05, "loss": 0.5764, "step": 23389 }, { "epoch": 0.6828997693498, "grad_norm": 0.5517128669229245, "learning_rate": 1.7617193836171937e-05, "loss": 0.6814, "step": 23390 }, { "epoch": 0.6829289655776474, "grad_norm": 0.5288829113662241, "learning_rate": 1.761557177615572e-05, "loss": 0.623, "step": 23391 }, { "epoch": 0.6829581618054947, "grad_norm": 0.5471302931109516, "learning_rate": 1.7613949716139497e-05, "loss": 0.6288, "step": 23392 }, { "epoch": 0.6829873580333421, "grad_norm": 0.5613719324847595, "learning_rate": 1.761232765612328e-05, "loss": 0.7108, "step": 23393 }, { "epoch": 0.6830165542611895, "grad_norm": 0.5227585751379938, "learning_rate": 1.7610705596107058e-05, "loss": 0.604, "step": 23394 }, { "epoch": 0.6830457504890368, "grad_norm": 0.47922521075399105, "learning_rate": 1.7609083536090836e-05, "loss": 0.4993, "step": 23395 }, { "epoch": 0.6830749467168842, "grad_norm": 0.4793432157337712, "learning_rate": 1.7607461476074614e-05, "loss": 0.5311, "step": 23396 }, { "epoch": 0.6831041429447315, "grad_norm": 0.4988121745370974, "learning_rate": 1.7605839416058396e-05, "loss": 0.57, "step": 23397 }, { "epoch": 0.6831333391725789, "grad_norm": 0.5568909844173252, "learning_rate": 1.7604217356042174e-05, "loss": 0.7102, "step": 23398 }, { "epoch": 0.6831625354004263, "grad_norm": 0.5244260837322128, "learning_rate": 1.7602595296025953e-05, "loss": 0.5389, "step": 23399 }, { "epoch": 0.6831917316282736, "grad_norm": 0.6084076215570502, "learning_rate": 1.760097323600973e-05, "loss": 0.5773, "step": 23400 }, { "epoch": 0.683220927856121, "grad_norm": 0.5196402231953643, "learning_rate": 1.7599351175993513e-05, "loss": 0.593, "step": 23401 }, { "epoch": 0.6832501240839683, "grad_norm": 0.5536086066963353, "learning_rate": 1.7597729115977295e-05, "loss": 0.6015, "step": 23402 }, { "epoch": 0.6832793203118157, "grad_norm": 0.5611464921627911, "learning_rate": 1.7596107055961073e-05, "loss": 0.622, "step": 23403 }, { "epoch": 0.6833085165396631, "grad_norm": 0.49964501120412647, "learning_rate": 1.759448499594485e-05, "loss": 0.5858, "step": 23404 }, { "epoch": 0.6833377127675104, "grad_norm": 0.5206941181225486, "learning_rate": 1.759286293592863e-05, "loss": 0.6217, "step": 23405 }, { "epoch": 0.6833669089953578, "grad_norm": 0.5360349459980468, "learning_rate": 1.7591240875912408e-05, "loss": 0.6524, "step": 23406 }, { "epoch": 0.6833961052232052, "grad_norm": 0.5448773525918233, "learning_rate": 1.758961881589619e-05, "loss": 0.6228, "step": 23407 }, { "epoch": 0.6834253014510525, "grad_norm": 0.5389043815736766, "learning_rate": 1.758799675587997e-05, "loss": 0.6375, "step": 23408 }, { "epoch": 0.6834544976788999, "grad_norm": 0.5232453623620017, "learning_rate": 1.7586374695863747e-05, "loss": 0.5811, "step": 23409 }, { "epoch": 0.6834836939067472, "grad_norm": 0.4877932741912799, "learning_rate": 1.7584752635847525e-05, "loss": 0.5417, "step": 23410 }, { "epoch": 0.6835128901345946, "grad_norm": 0.5461652685410959, "learning_rate": 1.7583130575831307e-05, "loss": 0.703, "step": 23411 }, { "epoch": 0.683542086362442, "grad_norm": 0.5115641983201744, "learning_rate": 1.758150851581509e-05, "loss": 0.5942, "step": 23412 }, { "epoch": 0.6835712825902893, "grad_norm": 0.5042667958360116, "learning_rate": 1.7579886455798867e-05, "loss": 0.5642, "step": 23413 }, { "epoch": 0.6836004788181367, "grad_norm": 0.5414105431552683, "learning_rate": 1.7578264395782645e-05, "loss": 0.623, "step": 23414 }, { "epoch": 0.683629675045984, "grad_norm": 0.5381139932831915, "learning_rate": 1.7576642335766424e-05, "loss": 0.6109, "step": 23415 }, { "epoch": 0.6836588712738314, "grad_norm": 0.5546041186279411, "learning_rate": 1.7575020275750202e-05, "loss": 0.7037, "step": 23416 }, { "epoch": 0.6836880675016788, "grad_norm": 0.5180253586013686, "learning_rate": 1.7573398215733984e-05, "loss": 0.6049, "step": 23417 }, { "epoch": 0.6837172637295261, "grad_norm": 0.49186490776039155, "learning_rate": 1.7571776155717762e-05, "loss": 0.5222, "step": 23418 }, { "epoch": 0.6837464599573735, "grad_norm": 0.49039790134211847, "learning_rate": 1.757015409570154e-05, "loss": 0.5451, "step": 23419 }, { "epoch": 0.6837756561852208, "grad_norm": 0.5511996871595298, "learning_rate": 1.756853203568532e-05, "loss": 0.6296, "step": 23420 }, { "epoch": 0.6838048524130682, "grad_norm": 0.5391830914236085, "learning_rate": 1.75669099756691e-05, "loss": 0.5803, "step": 23421 }, { "epoch": 0.6838340486409156, "grad_norm": 0.5251569040265792, "learning_rate": 1.7565287915652882e-05, "loss": 0.6318, "step": 23422 }, { "epoch": 0.6838632448687629, "grad_norm": 0.4891718094278087, "learning_rate": 1.756366585563666e-05, "loss": 0.5383, "step": 23423 }, { "epoch": 0.6838924410966103, "grad_norm": 0.5197101066272743, "learning_rate": 1.756204379562044e-05, "loss": 0.6209, "step": 23424 }, { "epoch": 0.6839216373244577, "grad_norm": 0.526277573589232, "learning_rate": 1.7560421735604218e-05, "loss": 0.6241, "step": 23425 }, { "epoch": 0.683950833552305, "grad_norm": 0.5410352279926521, "learning_rate": 1.7558799675587996e-05, "loss": 0.6469, "step": 23426 }, { "epoch": 0.6839800297801524, "grad_norm": 0.4914183818022286, "learning_rate": 1.7557177615571778e-05, "loss": 0.5443, "step": 23427 }, { "epoch": 0.6840092260079997, "grad_norm": 0.5401872948342558, "learning_rate": 1.7555555555555556e-05, "loss": 0.6837, "step": 23428 }, { "epoch": 0.6840384222358471, "grad_norm": 0.5429094898047597, "learning_rate": 1.7553933495539334e-05, "loss": 0.6542, "step": 23429 }, { "epoch": 0.6840676184636945, "grad_norm": 0.503877933069503, "learning_rate": 1.7552311435523113e-05, "loss": 0.5764, "step": 23430 }, { "epoch": 0.6840968146915418, "grad_norm": 0.5158186787033544, "learning_rate": 1.7550689375506895e-05, "loss": 0.571, "step": 23431 }, { "epoch": 0.6841260109193892, "grad_norm": 0.4912251657715427, "learning_rate": 1.7549067315490676e-05, "loss": 0.5412, "step": 23432 }, { "epoch": 0.6841552071472365, "grad_norm": 0.5194314062579835, "learning_rate": 1.7547445255474455e-05, "loss": 0.6322, "step": 23433 }, { "epoch": 0.6841844033750839, "grad_norm": 0.47151068636358073, "learning_rate": 1.7545823195458233e-05, "loss": 0.5343, "step": 23434 }, { "epoch": 0.6842135996029313, "grad_norm": 0.4899378031201914, "learning_rate": 1.754420113544201e-05, "loss": 0.5466, "step": 23435 }, { "epoch": 0.6842427958307786, "grad_norm": 0.49361841725860606, "learning_rate": 1.754257907542579e-05, "loss": 0.5589, "step": 23436 }, { "epoch": 0.684271992058626, "grad_norm": 0.5120832845516645, "learning_rate": 1.754095701540957e-05, "loss": 0.5828, "step": 23437 }, { "epoch": 0.6843011882864733, "grad_norm": 0.514179959119176, "learning_rate": 1.753933495539335e-05, "loss": 0.5536, "step": 23438 }, { "epoch": 0.6843303845143207, "grad_norm": 0.629720091943774, "learning_rate": 1.753771289537713e-05, "loss": 0.7643, "step": 23439 }, { "epoch": 0.6843595807421681, "grad_norm": 0.5371772412738931, "learning_rate": 1.753609083536091e-05, "loss": 0.6553, "step": 23440 }, { "epoch": 0.6843887769700154, "grad_norm": 0.5238670272964626, "learning_rate": 1.753446877534469e-05, "loss": 0.5816, "step": 23441 }, { "epoch": 0.6844179731978628, "grad_norm": 0.511634768332348, "learning_rate": 1.753284671532847e-05, "loss": 0.5929, "step": 23442 }, { "epoch": 0.6844471694257102, "grad_norm": 0.48905891114087435, "learning_rate": 1.753122465531225e-05, "loss": 0.5108, "step": 23443 }, { "epoch": 0.6844763656535575, "grad_norm": 0.49525252426853883, "learning_rate": 1.7529602595296027e-05, "loss": 0.5468, "step": 23444 }, { "epoch": 0.6845055618814049, "grad_norm": 0.5247461456224489, "learning_rate": 1.7527980535279805e-05, "loss": 0.5658, "step": 23445 }, { "epoch": 0.6845347581092522, "grad_norm": 0.5394408583764555, "learning_rate": 1.7526358475263584e-05, "loss": 0.6078, "step": 23446 }, { "epoch": 0.6845639543370996, "grad_norm": 0.5406095878931143, "learning_rate": 1.7524736415247366e-05, "loss": 0.6296, "step": 23447 }, { "epoch": 0.684593150564947, "grad_norm": 0.547176485715485, "learning_rate": 1.7523114355231144e-05, "loss": 0.6307, "step": 23448 }, { "epoch": 0.6846223467927943, "grad_norm": 0.49668706869963314, "learning_rate": 1.7521492295214922e-05, "loss": 0.5463, "step": 23449 }, { "epoch": 0.6846515430206417, "grad_norm": 0.5175380903832342, "learning_rate": 1.7519870235198704e-05, "loss": 0.588, "step": 23450 }, { "epoch": 0.684680739248489, "grad_norm": 0.5096325056577092, "learning_rate": 1.7518248175182482e-05, "loss": 0.5973, "step": 23451 }, { "epoch": 0.6847099354763364, "grad_norm": 0.5517218462684849, "learning_rate": 1.751662611516626e-05, "loss": 0.6361, "step": 23452 }, { "epoch": 0.6847391317041838, "grad_norm": 0.4818162105510765, "learning_rate": 1.7515004055150043e-05, "loss": 0.5339, "step": 23453 }, { "epoch": 0.6847683279320311, "grad_norm": 0.55685395365164, "learning_rate": 1.751338199513382e-05, "loss": 0.6727, "step": 23454 }, { "epoch": 0.6847975241598785, "grad_norm": 0.49288521944533187, "learning_rate": 1.75117599351176e-05, "loss": 0.5724, "step": 23455 }, { "epoch": 0.6848267203877259, "grad_norm": 0.5300673251710608, "learning_rate": 1.7510137875101378e-05, "loss": 0.6048, "step": 23456 }, { "epoch": 0.6848559166155732, "grad_norm": 0.5270182428111332, "learning_rate": 1.750851581508516e-05, "loss": 0.6311, "step": 23457 }, { "epoch": 0.6848851128434207, "grad_norm": 0.5126289652420885, "learning_rate": 1.7506893755068938e-05, "loss": 0.5606, "step": 23458 }, { "epoch": 0.684914309071268, "grad_norm": 0.5294206790248569, "learning_rate": 1.750527169505272e-05, "loss": 0.6012, "step": 23459 }, { "epoch": 0.6849435052991154, "grad_norm": 0.5357401375799288, "learning_rate": 1.7503649635036498e-05, "loss": 0.6153, "step": 23460 }, { "epoch": 0.6849727015269628, "grad_norm": 0.5166757770014727, "learning_rate": 1.7502027575020276e-05, "loss": 0.5525, "step": 23461 }, { "epoch": 0.6850018977548101, "grad_norm": 0.5628790349823171, "learning_rate": 1.7500405515004055e-05, "loss": 0.6519, "step": 23462 }, { "epoch": 0.6850310939826575, "grad_norm": 0.5286878064420534, "learning_rate": 1.7498783454987836e-05, "loss": 0.612, "step": 23463 }, { "epoch": 0.6850602902105049, "grad_norm": 0.5431852053638235, "learning_rate": 1.7497161394971615e-05, "loss": 0.6234, "step": 23464 }, { "epoch": 0.6850894864383522, "grad_norm": 0.5260815601336124, "learning_rate": 1.7495539334955393e-05, "loss": 0.6268, "step": 23465 }, { "epoch": 0.6851186826661996, "grad_norm": 0.5153664228598184, "learning_rate": 1.749391727493917e-05, "loss": 0.5619, "step": 23466 }, { "epoch": 0.6851478788940469, "grad_norm": 0.5368260775837053, "learning_rate": 1.7492295214922953e-05, "loss": 0.6399, "step": 23467 }, { "epoch": 0.6851770751218943, "grad_norm": 0.49959085947077825, "learning_rate": 1.7490673154906735e-05, "loss": 0.6017, "step": 23468 }, { "epoch": 0.6852062713497417, "grad_norm": 0.5278743599501866, "learning_rate": 1.7489051094890514e-05, "loss": 0.591, "step": 23469 }, { "epoch": 0.685235467577589, "grad_norm": 0.5409015294484169, "learning_rate": 1.7487429034874292e-05, "loss": 0.6144, "step": 23470 }, { "epoch": 0.6852646638054364, "grad_norm": 0.5311892582053849, "learning_rate": 1.748580697485807e-05, "loss": 0.6422, "step": 23471 }, { "epoch": 0.6852938600332837, "grad_norm": 0.5471297697382368, "learning_rate": 1.748418491484185e-05, "loss": 0.7015, "step": 23472 }, { "epoch": 0.6853230562611311, "grad_norm": 0.5316891735125409, "learning_rate": 1.748256285482563e-05, "loss": 0.6004, "step": 23473 }, { "epoch": 0.6853522524889785, "grad_norm": 0.5191560447651146, "learning_rate": 1.748094079480941e-05, "loss": 0.5641, "step": 23474 }, { "epoch": 0.6853814487168258, "grad_norm": 0.47370842117127476, "learning_rate": 1.7479318734793187e-05, "loss": 0.5145, "step": 23475 }, { "epoch": 0.6854106449446732, "grad_norm": 0.525694262424119, "learning_rate": 1.7477696674776966e-05, "loss": 0.5914, "step": 23476 }, { "epoch": 0.6854398411725205, "grad_norm": 0.5320606988416644, "learning_rate": 1.7476074614760747e-05, "loss": 0.5955, "step": 23477 }, { "epoch": 0.6854690374003679, "grad_norm": 0.5393475613996247, "learning_rate": 1.747445255474453e-05, "loss": 0.648, "step": 23478 }, { "epoch": 0.6854982336282153, "grad_norm": 0.4923536927417667, "learning_rate": 1.7472830494728307e-05, "loss": 0.5495, "step": 23479 }, { "epoch": 0.6855274298560626, "grad_norm": 0.5071900438313337, "learning_rate": 1.7471208434712086e-05, "loss": 0.5495, "step": 23480 }, { "epoch": 0.68555662608391, "grad_norm": 0.5046092211639872, "learning_rate": 1.7469586374695864e-05, "loss": 0.5421, "step": 23481 }, { "epoch": 0.6855858223117574, "grad_norm": 0.539256943620597, "learning_rate": 1.7467964314679643e-05, "loss": 0.6339, "step": 23482 }, { "epoch": 0.6856150185396047, "grad_norm": 0.5553378138772767, "learning_rate": 1.7466342254663424e-05, "loss": 0.6464, "step": 23483 }, { "epoch": 0.6856442147674521, "grad_norm": 0.48856895568095854, "learning_rate": 1.7464720194647203e-05, "loss": 0.5548, "step": 23484 }, { "epoch": 0.6856734109952994, "grad_norm": 0.49763080074625315, "learning_rate": 1.746309813463098e-05, "loss": 0.5527, "step": 23485 }, { "epoch": 0.6857026072231468, "grad_norm": 0.4960470535165491, "learning_rate": 1.746147607461476e-05, "loss": 0.53, "step": 23486 }, { "epoch": 0.6857318034509942, "grad_norm": 0.5106713706620094, "learning_rate": 1.745985401459854e-05, "loss": 0.5517, "step": 23487 }, { "epoch": 0.6857609996788415, "grad_norm": 0.5457443246352778, "learning_rate": 1.7458231954582323e-05, "loss": 0.6269, "step": 23488 }, { "epoch": 0.6857901959066889, "grad_norm": 0.5522903017355301, "learning_rate": 1.74566098945661e-05, "loss": 0.6051, "step": 23489 }, { "epoch": 0.6858193921345362, "grad_norm": 0.49450899904634604, "learning_rate": 1.745498783454988e-05, "loss": 0.549, "step": 23490 }, { "epoch": 0.6858485883623836, "grad_norm": 0.5304854813670328, "learning_rate": 1.7453365774533658e-05, "loss": 0.5797, "step": 23491 }, { "epoch": 0.685877784590231, "grad_norm": 0.5160014509914677, "learning_rate": 1.7451743714517436e-05, "loss": 0.5968, "step": 23492 }, { "epoch": 0.6859069808180783, "grad_norm": 0.5062142521579326, "learning_rate": 1.7450121654501218e-05, "loss": 0.6011, "step": 23493 }, { "epoch": 0.6859361770459257, "grad_norm": 0.5221482083350444, "learning_rate": 1.7448499594484997e-05, "loss": 0.6157, "step": 23494 }, { "epoch": 0.685965373273773, "grad_norm": 0.5034465508308342, "learning_rate": 1.7446877534468775e-05, "loss": 0.5564, "step": 23495 }, { "epoch": 0.6859945695016204, "grad_norm": 0.5283333247381515, "learning_rate": 1.7445255474452553e-05, "loss": 0.5931, "step": 23496 }, { "epoch": 0.6860237657294678, "grad_norm": 0.5402986416560327, "learning_rate": 1.7443633414436335e-05, "loss": 0.6143, "step": 23497 }, { "epoch": 0.6860529619573151, "grad_norm": 0.4916312541460488, "learning_rate": 1.7442011354420117e-05, "loss": 0.5337, "step": 23498 }, { "epoch": 0.6860821581851625, "grad_norm": 0.5638468004610088, "learning_rate": 1.7440389294403895e-05, "loss": 0.653, "step": 23499 }, { "epoch": 0.6861113544130099, "grad_norm": 0.519436805482454, "learning_rate": 1.7438767234387674e-05, "loss": 0.5658, "step": 23500 }, { "epoch": 0.6861405506408572, "grad_norm": 0.5166562746692238, "learning_rate": 1.7437145174371452e-05, "loss": 0.574, "step": 23501 }, { "epoch": 0.6861697468687046, "grad_norm": 0.5395134310558044, "learning_rate": 1.743552311435523e-05, "loss": 0.5955, "step": 23502 }, { "epoch": 0.6861989430965519, "grad_norm": 0.46956224252032364, "learning_rate": 1.7433901054339012e-05, "loss": 0.4673, "step": 23503 }, { "epoch": 0.6862281393243993, "grad_norm": 0.5242817571491608, "learning_rate": 1.743227899432279e-05, "loss": 0.5946, "step": 23504 }, { "epoch": 0.6862573355522467, "grad_norm": 0.565539868709441, "learning_rate": 1.743065693430657e-05, "loss": 0.6573, "step": 23505 }, { "epoch": 0.686286531780094, "grad_norm": 0.5440074054609729, "learning_rate": 1.742903487429035e-05, "loss": 0.6506, "step": 23506 }, { "epoch": 0.6863157280079414, "grad_norm": 0.5458441611687059, "learning_rate": 1.742741281427413e-05, "loss": 0.6257, "step": 23507 }, { "epoch": 0.6863449242357887, "grad_norm": 0.5465147243619319, "learning_rate": 1.742579075425791e-05, "loss": 0.6507, "step": 23508 }, { "epoch": 0.6863741204636361, "grad_norm": 0.5305669544362739, "learning_rate": 1.742416869424169e-05, "loss": 0.6379, "step": 23509 }, { "epoch": 0.6864033166914835, "grad_norm": 0.5079510352949382, "learning_rate": 1.7422546634225467e-05, "loss": 0.5749, "step": 23510 }, { "epoch": 0.6864325129193308, "grad_norm": 0.5175160341209201, "learning_rate": 1.7420924574209246e-05, "loss": 0.5823, "step": 23511 }, { "epoch": 0.6864617091471782, "grad_norm": 0.5195818280097037, "learning_rate": 1.7419302514193024e-05, "loss": 0.6029, "step": 23512 }, { "epoch": 0.6864909053750256, "grad_norm": 0.5094163657761377, "learning_rate": 1.7417680454176806e-05, "loss": 0.5522, "step": 23513 }, { "epoch": 0.6865201016028729, "grad_norm": 0.4967761982001339, "learning_rate": 1.7416058394160584e-05, "loss": 0.5501, "step": 23514 }, { "epoch": 0.6865492978307203, "grad_norm": 0.5280373636733319, "learning_rate": 1.7414436334144363e-05, "loss": 0.5755, "step": 23515 }, { "epoch": 0.6865784940585676, "grad_norm": 0.5113015981306914, "learning_rate": 1.7412814274128145e-05, "loss": 0.5354, "step": 23516 }, { "epoch": 0.686607690286415, "grad_norm": 0.569328917924927, "learning_rate": 1.7411192214111923e-05, "loss": 0.705, "step": 23517 }, { "epoch": 0.6866368865142624, "grad_norm": 0.4951137437456837, "learning_rate": 1.74095701540957e-05, "loss": 0.6075, "step": 23518 }, { "epoch": 0.6866660827421097, "grad_norm": 0.5187006629795674, "learning_rate": 1.7407948094079483e-05, "loss": 0.5798, "step": 23519 }, { "epoch": 0.6866952789699571, "grad_norm": 0.5074663538170016, "learning_rate": 1.740632603406326e-05, "loss": 0.5638, "step": 23520 }, { "epoch": 0.6867244751978044, "grad_norm": 0.5517130252574154, "learning_rate": 1.740470397404704e-05, "loss": 0.6353, "step": 23521 }, { "epoch": 0.6867536714256518, "grad_norm": 0.48918953312284813, "learning_rate": 1.7403081914030818e-05, "loss": 0.5284, "step": 23522 }, { "epoch": 0.6867828676534992, "grad_norm": 0.5519020863410169, "learning_rate": 1.74014598540146e-05, "loss": 0.676, "step": 23523 }, { "epoch": 0.6868120638813465, "grad_norm": 0.5455721845009521, "learning_rate": 1.7399837793998378e-05, "loss": 0.6471, "step": 23524 }, { "epoch": 0.6868412601091939, "grad_norm": 0.5242612336924971, "learning_rate": 1.739821573398216e-05, "loss": 0.6325, "step": 23525 }, { "epoch": 0.6868704563370412, "grad_norm": 0.5483095287923111, "learning_rate": 1.739659367396594e-05, "loss": 0.6321, "step": 23526 }, { "epoch": 0.6868996525648886, "grad_norm": 0.5448500523633807, "learning_rate": 1.7394971613949717e-05, "loss": 0.6011, "step": 23527 }, { "epoch": 0.686928848792736, "grad_norm": 0.5280832602982939, "learning_rate": 1.7393349553933495e-05, "loss": 0.6246, "step": 23528 }, { "epoch": 0.6869580450205833, "grad_norm": 0.5103377777260577, "learning_rate": 1.7391727493917277e-05, "loss": 0.577, "step": 23529 }, { "epoch": 0.6869872412484307, "grad_norm": 0.5351913226941146, "learning_rate": 1.7390105433901055e-05, "loss": 0.6039, "step": 23530 }, { "epoch": 0.687016437476278, "grad_norm": 0.5192102073407743, "learning_rate": 1.7388483373884834e-05, "loss": 0.5559, "step": 23531 }, { "epoch": 0.6870456337041254, "grad_norm": 0.4973910291224419, "learning_rate": 1.7386861313868612e-05, "loss": 0.551, "step": 23532 }, { "epoch": 0.6870748299319728, "grad_norm": 0.5224499223049325, "learning_rate": 1.7385239253852394e-05, "loss": 0.6219, "step": 23533 }, { "epoch": 0.6871040261598201, "grad_norm": 0.4996301830343175, "learning_rate": 1.7383617193836176e-05, "loss": 0.5685, "step": 23534 }, { "epoch": 0.6871332223876675, "grad_norm": 0.49903031717083707, "learning_rate": 1.7381995133819954e-05, "loss": 0.5332, "step": 23535 }, { "epoch": 0.6871624186155149, "grad_norm": 0.5448349802636578, "learning_rate": 1.7380373073803732e-05, "loss": 0.6189, "step": 23536 }, { "epoch": 0.6871916148433622, "grad_norm": 0.5565428573343827, "learning_rate": 1.737875101378751e-05, "loss": 0.6753, "step": 23537 }, { "epoch": 0.6872208110712096, "grad_norm": 0.4982532677066761, "learning_rate": 1.737712895377129e-05, "loss": 0.5349, "step": 23538 }, { "epoch": 0.6872500072990569, "grad_norm": 0.560219120426507, "learning_rate": 1.737550689375507e-05, "loss": 0.679, "step": 23539 }, { "epoch": 0.6872792035269043, "grad_norm": 0.48292193564448577, "learning_rate": 1.737388483373885e-05, "loss": 0.5708, "step": 23540 }, { "epoch": 0.6873083997547517, "grad_norm": 0.5057262701668266, "learning_rate": 1.7372262773722628e-05, "loss": 0.5747, "step": 23541 }, { "epoch": 0.687337595982599, "grad_norm": 0.5329609041737459, "learning_rate": 1.7370640713706406e-05, "loss": 0.5961, "step": 23542 }, { "epoch": 0.6873667922104464, "grad_norm": 0.5127441331140987, "learning_rate": 1.7369018653690188e-05, "loss": 0.6115, "step": 23543 }, { "epoch": 0.6873959884382937, "grad_norm": 0.5238379290003409, "learning_rate": 1.736739659367397e-05, "loss": 0.6095, "step": 23544 }, { "epoch": 0.6874251846661411, "grad_norm": 0.5570249331548576, "learning_rate": 1.7365774533657748e-05, "loss": 0.6557, "step": 23545 }, { "epoch": 0.6874543808939885, "grad_norm": 0.5187066562478915, "learning_rate": 1.7364152473641526e-05, "loss": 0.5779, "step": 23546 }, { "epoch": 0.6874835771218358, "grad_norm": 0.5139559608863795, "learning_rate": 1.7362530413625305e-05, "loss": 0.5988, "step": 23547 }, { "epoch": 0.6875127733496832, "grad_norm": 0.5158412215834794, "learning_rate": 1.7360908353609083e-05, "loss": 0.6048, "step": 23548 }, { "epoch": 0.6875419695775306, "grad_norm": 0.5005124490291122, "learning_rate": 1.7359286293592865e-05, "loss": 0.5631, "step": 23549 }, { "epoch": 0.6875711658053779, "grad_norm": 0.5291105050140408, "learning_rate": 1.7357664233576643e-05, "loss": 0.5795, "step": 23550 }, { "epoch": 0.6876003620332253, "grad_norm": 0.5151100656274347, "learning_rate": 1.735604217356042e-05, "loss": 0.5874, "step": 23551 }, { "epoch": 0.6876295582610726, "grad_norm": 0.5109857656244801, "learning_rate": 1.73544201135442e-05, "loss": 0.5871, "step": 23552 }, { "epoch": 0.68765875448892, "grad_norm": 0.545064071319582, "learning_rate": 1.735279805352798e-05, "loss": 0.6237, "step": 23553 }, { "epoch": 0.6876879507167674, "grad_norm": 0.5120921099400986, "learning_rate": 1.7351175993511763e-05, "loss": 0.5557, "step": 23554 }, { "epoch": 0.6877171469446147, "grad_norm": 0.4774749399398686, "learning_rate": 1.7349553933495542e-05, "loss": 0.551, "step": 23555 }, { "epoch": 0.6877463431724621, "grad_norm": 0.4973825952167816, "learning_rate": 1.734793187347932e-05, "loss": 0.5711, "step": 23556 }, { "epoch": 0.6877755394003094, "grad_norm": 0.5430871645986624, "learning_rate": 1.73463098134631e-05, "loss": 0.624, "step": 23557 }, { "epoch": 0.6878047356281568, "grad_norm": 0.550540738491528, "learning_rate": 1.7344687753446877e-05, "loss": 0.6726, "step": 23558 }, { "epoch": 0.6878339318560042, "grad_norm": 0.5307688833230662, "learning_rate": 1.734306569343066e-05, "loss": 0.6091, "step": 23559 }, { "epoch": 0.6878631280838515, "grad_norm": 0.5340540407437947, "learning_rate": 1.7341443633414437e-05, "loss": 0.5838, "step": 23560 }, { "epoch": 0.6878923243116989, "grad_norm": 0.5286890138683995, "learning_rate": 1.7339821573398215e-05, "loss": 0.6392, "step": 23561 }, { "epoch": 0.6879215205395462, "grad_norm": 0.5516347733385394, "learning_rate": 1.7338199513381994e-05, "loss": 0.6537, "step": 23562 }, { "epoch": 0.6879507167673936, "grad_norm": 0.5061381806884112, "learning_rate": 1.7336577453365776e-05, "loss": 0.5404, "step": 23563 }, { "epoch": 0.687979912995241, "grad_norm": 0.5282841424879027, "learning_rate": 1.7334955393349557e-05, "loss": 0.5829, "step": 23564 }, { "epoch": 0.6880091092230883, "grad_norm": 0.4862839433249167, "learning_rate": 1.7333333333333336e-05, "loss": 0.5723, "step": 23565 }, { "epoch": 0.6880383054509357, "grad_norm": 0.5204929699151447, "learning_rate": 1.7331711273317114e-05, "loss": 0.5749, "step": 23566 }, { "epoch": 0.6880675016787831, "grad_norm": 0.530424323536019, "learning_rate": 1.7330089213300892e-05, "loss": 0.5984, "step": 23567 }, { "epoch": 0.6880966979066304, "grad_norm": 0.5153905831353307, "learning_rate": 1.732846715328467e-05, "loss": 0.606, "step": 23568 }, { "epoch": 0.6881258941344778, "grad_norm": 0.5620435161614771, "learning_rate": 1.7326845093268453e-05, "loss": 0.7041, "step": 23569 }, { "epoch": 0.6881550903623251, "grad_norm": 0.5312709337254603, "learning_rate": 1.732522303325223e-05, "loss": 0.629, "step": 23570 }, { "epoch": 0.6881842865901725, "grad_norm": 0.5069411020551607, "learning_rate": 1.732360097323601e-05, "loss": 0.5551, "step": 23571 }, { "epoch": 0.6882134828180199, "grad_norm": 0.5372864005791482, "learning_rate": 1.732197891321979e-05, "loss": 0.6157, "step": 23572 }, { "epoch": 0.6882426790458672, "grad_norm": 0.5088790777508204, "learning_rate": 1.732035685320357e-05, "loss": 0.5983, "step": 23573 }, { "epoch": 0.6882718752737146, "grad_norm": 0.5517172122939561, "learning_rate": 1.7318734793187348e-05, "loss": 0.5988, "step": 23574 }, { "epoch": 0.688301071501562, "grad_norm": 0.5517094145918285, "learning_rate": 1.731711273317113e-05, "loss": 0.6208, "step": 23575 }, { "epoch": 0.6883302677294093, "grad_norm": 0.5275724292947922, "learning_rate": 1.7315490673154908e-05, "loss": 0.6134, "step": 23576 }, { "epoch": 0.6883594639572567, "grad_norm": 0.5199938429840573, "learning_rate": 1.7313868613138686e-05, "loss": 0.6408, "step": 23577 }, { "epoch": 0.6883886601851041, "grad_norm": 0.5275268068026981, "learning_rate": 1.7312246553122465e-05, "loss": 0.6047, "step": 23578 }, { "epoch": 0.6884178564129515, "grad_norm": 0.5356662396058264, "learning_rate": 1.7310624493106246e-05, "loss": 0.5765, "step": 23579 }, { "epoch": 0.6884470526407989, "grad_norm": 0.5388127670901123, "learning_rate": 1.7309002433090025e-05, "loss": 0.6284, "step": 23580 }, { "epoch": 0.6884762488686462, "grad_norm": 0.5126040426360394, "learning_rate": 1.7307380373073803e-05, "loss": 0.5834, "step": 23581 }, { "epoch": 0.6885054450964936, "grad_norm": 0.5314535937437154, "learning_rate": 1.7305758313057585e-05, "loss": 0.6225, "step": 23582 }, { "epoch": 0.688534641324341, "grad_norm": 0.5199217185004895, "learning_rate": 1.7304136253041363e-05, "loss": 0.5846, "step": 23583 }, { "epoch": 0.6885638375521883, "grad_norm": 0.5740078588746768, "learning_rate": 1.7302514193025142e-05, "loss": 0.6191, "step": 23584 }, { "epoch": 0.6885930337800357, "grad_norm": 0.5585578630453402, "learning_rate": 1.7300892133008923e-05, "loss": 0.6794, "step": 23585 }, { "epoch": 0.688622230007883, "grad_norm": 0.4954770061886625, "learning_rate": 1.7299270072992702e-05, "loss": 0.5318, "step": 23586 }, { "epoch": 0.6886514262357304, "grad_norm": 0.5525993700528695, "learning_rate": 1.729764801297648e-05, "loss": 0.6934, "step": 23587 }, { "epoch": 0.6886806224635778, "grad_norm": 0.5142877587374638, "learning_rate": 1.729602595296026e-05, "loss": 0.5867, "step": 23588 }, { "epoch": 0.6887098186914251, "grad_norm": 0.5285166396798353, "learning_rate": 1.729440389294404e-05, "loss": 0.5957, "step": 23589 }, { "epoch": 0.6887390149192725, "grad_norm": 0.5280248643094959, "learning_rate": 1.729278183292782e-05, "loss": 0.5596, "step": 23590 }, { "epoch": 0.6887682111471198, "grad_norm": 0.5615124441953319, "learning_rate": 1.72911597729116e-05, "loss": 0.6606, "step": 23591 }, { "epoch": 0.6887974073749672, "grad_norm": 0.5601143507664353, "learning_rate": 1.728953771289538e-05, "loss": 0.6537, "step": 23592 }, { "epoch": 0.6888266036028146, "grad_norm": 0.5074344601702453, "learning_rate": 1.7287915652879157e-05, "loss": 0.5611, "step": 23593 }, { "epoch": 0.6888557998306619, "grad_norm": 0.5166671384308649, "learning_rate": 1.7286293592862936e-05, "loss": 0.5476, "step": 23594 }, { "epoch": 0.6888849960585093, "grad_norm": 0.5424666031438723, "learning_rate": 1.7284671532846717e-05, "loss": 0.5974, "step": 23595 }, { "epoch": 0.6889141922863566, "grad_norm": 0.5246258876818202, "learning_rate": 1.7283049472830496e-05, "loss": 0.5678, "step": 23596 }, { "epoch": 0.688943388514204, "grad_norm": 0.4526646844835631, "learning_rate": 1.7281427412814274e-05, "loss": 0.4307, "step": 23597 }, { "epoch": 0.6889725847420514, "grad_norm": 0.5091173752156488, "learning_rate": 1.7279805352798052e-05, "loss": 0.5527, "step": 23598 }, { "epoch": 0.6890017809698987, "grad_norm": 0.5218349641417105, "learning_rate": 1.7278183292781834e-05, "loss": 0.5828, "step": 23599 }, { "epoch": 0.6890309771977461, "grad_norm": 0.5371279262102715, "learning_rate": 1.7276561232765613e-05, "loss": 0.6432, "step": 23600 }, { "epoch": 0.6890601734255934, "grad_norm": 0.5489987630631288, "learning_rate": 1.7274939172749394e-05, "loss": 0.6687, "step": 23601 }, { "epoch": 0.6890893696534408, "grad_norm": 0.46014305923769266, "learning_rate": 1.7273317112733173e-05, "loss": 0.4837, "step": 23602 }, { "epoch": 0.6891185658812882, "grad_norm": 0.545237528129163, "learning_rate": 1.727169505271695e-05, "loss": 0.6089, "step": 23603 }, { "epoch": 0.6891477621091355, "grad_norm": 0.546398328977393, "learning_rate": 1.727007299270073e-05, "loss": 0.6607, "step": 23604 }, { "epoch": 0.6891769583369829, "grad_norm": 0.5102567897592868, "learning_rate": 1.726845093268451e-05, "loss": 0.6227, "step": 23605 }, { "epoch": 0.6892061545648303, "grad_norm": 0.5214732117403634, "learning_rate": 1.726682887266829e-05, "loss": 0.5642, "step": 23606 }, { "epoch": 0.6892353507926776, "grad_norm": 0.5382793050302872, "learning_rate": 1.7265206812652068e-05, "loss": 0.6146, "step": 23607 }, { "epoch": 0.689264547020525, "grad_norm": 0.480060845333099, "learning_rate": 1.7263584752635846e-05, "loss": 0.5017, "step": 23608 }, { "epoch": 0.6892937432483723, "grad_norm": 0.523149768039643, "learning_rate": 1.7261962692619628e-05, "loss": 0.5977, "step": 23609 }, { "epoch": 0.6893229394762197, "grad_norm": 0.5535291730732728, "learning_rate": 1.726034063260341e-05, "loss": 0.6498, "step": 23610 }, { "epoch": 0.6893521357040671, "grad_norm": 0.46580599674750106, "learning_rate": 1.7258718572587188e-05, "loss": 0.4687, "step": 23611 }, { "epoch": 0.6893813319319144, "grad_norm": 0.6246410082254292, "learning_rate": 1.7257096512570967e-05, "loss": 0.5939, "step": 23612 }, { "epoch": 0.6894105281597618, "grad_norm": 0.5159343786938163, "learning_rate": 1.7255474452554745e-05, "loss": 0.5926, "step": 23613 }, { "epoch": 0.6894397243876091, "grad_norm": 0.52309339047787, "learning_rate": 1.7253852392538523e-05, "loss": 0.5857, "step": 23614 }, { "epoch": 0.6894689206154565, "grad_norm": 0.5195646518270246, "learning_rate": 1.7252230332522305e-05, "loss": 0.599, "step": 23615 }, { "epoch": 0.6894981168433039, "grad_norm": 0.6190732493992959, "learning_rate": 1.7250608272506084e-05, "loss": 0.5729, "step": 23616 }, { "epoch": 0.6895273130711512, "grad_norm": 0.5134322410258375, "learning_rate": 1.7248986212489862e-05, "loss": 0.6071, "step": 23617 }, { "epoch": 0.6895565092989986, "grad_norm": 0.5596342114755224, "learning_rate": 1.724736415247364e-05, "loss": 0.6442, "step": 23618 }, { "epoch": 0.689585705526846, "grad_norm": 0.5518464623881514, "learning_rate": 1.7245742092457422e-05, "loss": 0.5791, "step": 23619 }, { "epoch": 0.6896149017546933, "grad_norm": 0.48989885572216835, "learning_rate": 1.7244120032441204e-05, "loss": 0.4976, "step": 23620 }, { "epoch": 0.6896440979825407, "grad_norm": 0.4976318792696211, "learning_rate": 1.7242497972424982e-05, "loss": 0.5709, "step": 23621 }, { "epoch": 0.689673294210388, "grad_norm": 0.5596430754834171, "learning_rate": 1.724087591240876e-05, "loss": 0.62, "step": 23622 }, { "epoch": 0.6897024904382354, "grad_norm": 0.5285672298621388, "learning_rate": 1.723925385239254e-05, "loss": 0.5808, "step": 23623 }, { "epoch": 0.6897316866660828, "grad_norm": 0.5482753330085082, "learning_rate": 1.7237631792376317e-05, "loss": 0.6492, "step": 23624 }, { "epoch": 0.6897608828939301, "grad_norm": 0.5066164789882368, "learning_rate": 1.72360097323601e-05, "loss": 0.577, "step": 23625 }, { "epoch": 0.6897900791217775, "grad_norm": 0.5462167100419604, "learning_rate": 1.7234387672343877e-05, "loss": 0.6505, "step": 23626 }, { "epoch": 0.6898192753496248, "grad_norm": 0.5325465107795244, "learning_rate": 1.7232765612327656e-05, "loss": 0.6491, "step": 23627 }, { "epoch": 0.6898484715774722, "grad_norm": 0.4847691305056907, "learning_rate": 1.7231143552311434e-05, "loss": 0.5345, "step": 23628 }, { "epoch": 0.6898776678053196, "grad_norm": 0.5691181405241776, "learning_rate": 1.7229521492295216e-05, "loss": 0.6636, "step": 23629 }, { "epoch": 0.6899068640331669, "grad_norm": 0.5461702988689345, "learning_rate": 1.7227899432278998e-05, "loss": 0.6361, "step": 23630 }, { "epoch": 0.6899360602610143, "grad_norm": 0.5135426021893174, "learning_rate": 1.7226277372262776e-05, "loss": 0.6172, "step": 23631 }, { "epoch": 0.6899652564888616, "grad_norm": 0.5579822405640753, "learning_rate": 1.7224655312246554e-05, "loss": 0.6716, "step": 23632 }, { "epoch": 0.689994452716709, "grad_norm": 0.5786930031970619, "learning_rate": 1.7223033252230333e-05, "loss": 0.7085, "step": 23633 }, { "epoch": 0.6900236489445564, "grad_norm": 0.5082462465885527, "learning_rate": 1.722141119221411e-05, "loss": 0.5873, "step": 23634 }, { "epoch": 0.6900528451724037, "grad_norm": 0.5084384003944316, "learning_rate": 1.7219789132197893e-05, "loss": 0.583, "step": 23635 }, { "epoch": 0.6900820414002511, "grad_norm": 0.5345794943365335, "learning_rate": 1.721816707218167e-05, "loss": 0.598, "step": 23636 }, { "epoch": 0.6901112376280985, "grad_norm": 0.5327630488746602, "learning_rate": 1.721654501216545e-05, "loss": 0.6185, "step": 23637 }, { "epoch": 0.6901404338559458, "grad_norm": 0.45530191420045324, "learning_rate": 1.721492295214923e-05, "loss": 0.4639, "step": 23638 }, { "epoch": 0.6901696300837932, "grad_norm": 0.5374646167059337, "learning_rate": 1.721330089213301e-05, "loss": 0.6309, "step": 23639 }, { "epoch": 0.6901988263116405, "grad_norm": 0.5376872851646046, "learning_rate": 1.7211678832116788e-05, "loss": 0.6301, "step": 23640 }, { "epoch": 0.6902280225394879, "grad_norm": 0.4954714515903167, "learning_rate": 1.721005677210057e-05, "loss": 0.5296, "step": 23641 }, { "epoch": 0.6902572187673353, "grad_norm": 0.5307966647806912, "learning_rate": 1.720843471208435e-05, "loss": 0.5892, "step": 23642 }, { "epoch": 0.6902864149951826, "grad_norm": 0.5031119237647046, "learning_rate": 1.7206812652068127e-05, "loss": 0.5848, "step": 23643 }, { "epoch": 0.69031561122303, "grad_norm": 0.5341158975730262, "learning_rate": 1.7205190592051905e-05, "loss": 0.6096, "step": 23644 }, { "epoch": 0.6903448074508773, "grad_norm": 0.5310329715860332, "learning_rate": 1.7203568532035687e-05, "loss": 0.622, "step": 23645 }, { "epoch": 0.6903740036787247, "grad_norm": 0.502828817278192, "learning_rate": 1.7201946472019465e-05, "loss": 0.5503, "step": 23646 }, { "epoch": 0.6904031999065721, "grad_norm": 0.5238054971225907, "learning_rate": 1.7200324412003244e-05, "loss": 0.6475, "step": 23647 }, { "epoch": 0.6904323961344194, "grad_norm": 0.5367126799092701, "learning_rate": 1.7198702351987025e-05, "loss": 0.5874, "step": 23648 }, { "epoch": 0.6904615923622668, "grad_norm": 0.47280513687133674, "learning_rate": 1.7197080291970804e-05, "loss": 0.5165, "step": 23649 }, { "epoch": 0.6904907885901141, "grad_norm": 0.5572661040938952, "learning_rate": 1.7195458231954582e-05, "loss": 0.6698, "step": 23650 }, { "epoch": 0.6905199848179615, "grad_norm": 0.5514082970941807, "learning_rate": 1.7193836171938364e-05, "loss": 0.5808, "step": 23651 }, { "epoch": 0.6905491810458089, "grad_norm": 0.5027596032987566, "learning_rate": 1.7192214111922142e-05, "loss": 0.5613, "step": 23652 }, { "epoch": 0.6905783772736562, "grad_norm": 0.5661581667786582, "learning_rate": 1.719059205190592e-05, "loss": 0.5973, "step": 23653 }, { "epoch": 0.6906075735015036, "grad_norm": 0.4963956753365871, "learning_rate": 1.71889699918897e-05, "loss": 0.526, "step": 23654 }, { "epoch": 0.690636769729351, "grad_norm": 0.5372458473816207, "learning_rate": 1.718734793187348e-05, "loss": 0.6149, "step": 23655 }, { "epoch": 0.6906659659571983, "grad_norm": 0.5309420995664729, "learning_rate": 1.718572587185726e-05, "loss": 0.6208, "step": 23656 }, { "epoch": 0.6906951621850457, "grad_norm": 0.5057845924943877, "learning_rate": 1.718410381184104e-05, "loss": 0.5677, "step": 23657 }, { "epoch": 0.690724358412893, "grad_norm": 0.5403068472003105, "learning_rate": 1.718248175182482e-05, "loss": 0.5983, "step": 23658 }, { "epoch": 0.6907535546407404, "grad_norm": 0.5176603552643886, "learning_rate": 1.7180859691808598e-05, "loss": 0.6025, "step": 23659 }, { "epoch": 0.6907827508685878, "grad_norm": 0.5011623475474127, "learning_rate": 1.7179237631792376e-05, "loss": 0.5933, "step": 23660 }, { "epoch": 0.6908119470964351, "grad_norm": 0.5346634098768441, "learning_rate": 1.7177615571776158e-05, "loss": 0.5768, "step": 23661 }, { "epoch": 0.6908411433242825, "grad_norm": 0.519712527944142, "learning_rate": 1.7175993511759936e-05, "loss": 0.5943, "step": 23662 }, { "epoch": 0.6908703395521298, "grad_norm": 0.5419739739810117, "learning_rate": 1.7174371451743715e-05, "loss": 0.6785, "step": 23663 }, { "epoch": 0.6908995357799772, "grad_norm": 0.5383380554248638, "learning_rate": 1.7172749391727493e-05, "loss": 0.624, "step": 23664 }, { "epoch": 0.6909287320078246, "grad_norm": 0.4920133018472504, "learning_rate": 1.7171127331711275e-05, "loss": 0.5347, "step": 23665 }, { "epoch": 0.6909579282356719, "grad_norm": 0.4975303153312352, "learning_rate": 1.7169505271695053e-05, "loss": 0.5481, "step": 23666 }, { "epoch": 0.6909871244635193, "grad_norm": 0.5206139408853383, "learning_rate": 1.7167883211678835e-05, "loss": 0.5957, "step": 23667 }, { "epoch": 0.6910163206913666, "grad_norm": 0.5225662182532838, "learning_rate": 1.7166261151662613e-05, "loss": 0.5849, "step": 23668 }, { "epoch": 0.691045516919214, "grad_norm": 0.5054044776030998, "learning_rate": 1.716463909164639e-05, "loss": 0.5353, "step": 23669 }, { "epoch": 0.6910747131470614, "grad_norm": 0.487855538583697, "learning_rate": 1.716301703163017e-05, "loss": 0.5511, "step": 23670 }, { "epoch": 0.6911039093749087, "grad_norm": 0.5148140680474236, "learning_rate": 1.7161394971613952e-05, "loss": 0.5563, "step": 23671 }, { "epoch": 0.6911331056027561, "grad_norm": 0.5207054887360297, "learning_rate": 1.715977291159773e-05, "loss": 0.5923, "step": 23672 }, { "epoch": 0.6911623018306035, "grad_norm": 0.5430152707202333, "learning_rate": 1.715815085158151e-05, "loss": 0.6248, "step": 23673 }, { "epoch": 0.6911914980584508, "grad_norm": 0.48497093576903155, "learning_rate": 1.7156528791565287e-05, "loss": 0.5295, "step": 23674 }, { "epoch": 0.6912206942862982, "grad_norm": 0.5018874266905073, "learning_rate": 1.715490673154907e-05, "loss": 0.5739, "step": 23675 }, { "epoch": 0.6912498905141455, "grad_norm": 0.5381937360413974, "learning_rate": 1.715328467153285e-05, "loss": 0.6353, "step": 23676 }, { "epoch": 0.6912790867419929, "grad_norm": 0.5444513340272611, "learning_rate": 1.715166261151663e-05, "loss": 0.6521, "step": 23677 }, { "epoch": 0.6913082829698403, "grad_norm": 0.5164300419369008, "learning_rate": 1.7150040551500407e-05, "loss": 0.6173, "step": 23678 }, { "epoch": 0.6913374791976876, "grad_norm": 0.5117721282413696, "learning_rate": 1.7148418491484185e-05, "loss": 0.5868, "step": 23679 }, { "epoch": 0.691366675425535, "grad_norm": 0.5311757949797611, "learning_rate": 1.7146796431467964e-05, "loss": 0.589, "step": 23680 }, { "epoch": 0.6913958716533823, "grad_norm": 0.510038469853161, "learning_rate": 1.7145174371451746e-05, "loss": 0.5766, "step": 23681 }, { "epoch": 0.6914250678812297, "grad_norm": 0.5231565204244358, "learning_rate": 1.7143552311435524e-05, "loss": 0.5762, "step": 23682 }, { "epoch": 0.6914542641090771, "grad_norm": 0.5084645439320679, "learning_rate": 1.7141930251419302e-05, "loss": 0.5846, "step": 23683 }, { "epoch": 0.6914834603369244, "grad_norm": 0.4876409446616704, "learning_rate": 1.714030819140308e-05, "loss": 0.5715, "step": 23684 }, { "epoch": 0.6915126565647718, "grad_norm": 0.5449273627801983, "learning_rate": 1.7138686131386862e-05, "loss": 0.5755, "step": 23685 }, { "epoch": 0.6915418527926191, "grad_norm": 0.4923208224845296, "learning_rate": 1.7137064071370644e-05, "loss": 0.5236, "step": 23686 }, { "epoch": 0.6915710490204665, "grad_norm": 13.081897124020612, "learning_rate": 1.7135442011354423e-05, "loss": 1.1441, "step": 23687 }, { "epoch": 0.6916002452483139, "grad_norm": 0.515043470430959, "learning_rate": 1.71338199513382e-05, "loss": 0.5884, "step": 23688 }, { "epoch": 0.6916294414761612, "grad_norm": 0.5150962374461404, "learning_rate": 1.713219789132198e-05, "loss": 0.5306, "step": 23689 }, { "epoch": 0.6916586377040086, "grad_norm": 0.520313220623294, "learning_rate": 1.7130575831305758e-05, "loss": 0.5516, "step": 23690 }, { "epoch": 0.691687833931856, "grad_norm": 0.5010849067223583, "learning_rate": 1.712895377128954e-05, "loss": 0.512, "step": 23691 }, { "epoch": 0.6917170301597033, "grad_norm": 0.5613121814093052, "learning_rate": 1.7127331711273318e-05, "loss": 0.7051, "step": 23692 }, { "epoch": 0.6917462263875507, "grad_norm": 0.534796506268883, "learning_rate": 1.7125709651257096e-05, "loss": 0.5969, "step": 23693 }, { "epoch": 0.691775422615398, "grad_norm": 0.5020889259747936, "learning_rate": 1.7124087591240875e-05, "loss": 0.5546, "step": 23694 }, { "epoch": 0.6918046188432454, "grad_norm": 0.5254898415823915, "learning_rate": 1.7122465531224656e-05, "loss": 0.5904, "step": 23695 }, { "epoch": 0.6918338150710928, "grad_norm": 0.533577343260752, "learning_rate": 1.7120843471208435e-05, "loss": 0.6141, "step": 23696 }, { "epoch": 0.6918630112989401, "grad_norm": 0.5120697007774, "learning_rate": 1.7119221411192217e-05, "loss": 0.5448, "step": 23697 }, { "epoch": 0.6918922075267875, "grad_norm": 0.49416203369900374, "learning_rate": 1.7117599351175995e-05, "loss": 0.5483, "step": 23698 }, { "epoch": 0.691921403754635, "grad_norm": 0.5090682580904385, "learning_rate": 1.7115977291159773e-05, "loss": 0.5602, "step": 23699 }, { "epoch": 0.6919505999824823, "grad_norm": 0.5069658142486193, "learning_rate": 1.711435523114355e-05, "loss": 0.5569, "step": 23700 }, { "epoch": 0.6919797962103297, "grad_norm": 0.5049536978301027, "learning_rate": 1.7112733171127333e-05, "loss": 0.5541, "step": 23701 }, { "epoch": 0.692008992438177, "grad_norm": 0.575419432559398, "learning_rate": 1.7111111111111112e-05, "loss": 0.6015, "step": 23702 }, { "epoch": 0.6920381886660244, "grad_norm": 0.4859199628060639, "learning_rate": 1.710948905109489e-05, "loss": 0.539, "step": 23703 }, { "epoch": 0.6920673848938718, "grad_norm": 0.5570491677645666, "learning_rate": 1.7107866991078672e-05, "loss": 0.6308, "step": 23704 }, { "epoch": 0.6920965811217191, "grad_norm": 0.5331762948889691, "learning_rate": 1.710624493106245e-05, "loss": 0.5751, "step": 23705 }, { "epoch": 0.6921257773495665, "grad_norm": 0.49500716819666835, "learning_rate": 1.710462287104623e-05, "loss": 0.5286, "step": 23706 }, { "epoch": 0.6921549735774138, "grad_norm": 0.5260503707831439, "learning_rate": 1.710300081103001e-05, "loss": 0.542, "step": 23707 }, { "epoch": 0.6921841698052612, "grad_norm": 0.48682135447999014, "learning_rate": 1.710137875101379e-05, "loss": 0.5231, "step": 23708 }, { "epoch": 0.6922133660331086, "grad_norm": 0.47033627829158137, "learning_rate": 1.7099756690997567e-05, "loss": 0.4829, "step": 23709 }, { "epoch": 0.6922425622609559, "grad_norm": 0.5182269684240608, "learning_rate": 1.7098134630981346e-05, "loss": 0.5919, "step": 23710 }, { "epoch": 0.6922717584888033, "grad_norm": 0.543966608506863, "learning_rate": 1.7096512570965127e-05, "loss": 0.6303, "step": 23711 }, { "epoch": 0.6923009547166507, "grad_norm": 0.5529676105220926, "learning_rate": 1.7094890510948906e-05, "loss": 0.6278, "step": 23712 }, { "epoch": 0.692330150944498, "grad_norm": 0.5206239545558564, "learning_rate": 1.7093268450932684e-05, "loss": 0.5755, "step": 23713 }, { "epoch": 0.6923593471723454, "grad_norm": 0.5279384667370749, "learning_rate": 1.7091646390916466e-05, "loss": 0.6065, "step": 23714 }, { "epoch": 0.6923885434001927, "grad_norm": 0.5558411884943901, "learning_rate": 1.7090024330900244e-05, "loss": 0.6171, "step": 23715 }, { "epoch": 0.6924177396280401, "grad_norm": 0.5714743571459361, "learning_rate": 1.7088402270884023e-05, "loss": 0.6858, "step": 23716 }, { "epoch": 0.6924469358558875, "grad_norm": 0.4907335323473935, "learning_rate": 1.7086780210867804e-05, "loss": 0.5426, "step": 23717 }, { "epoch": 0.6924761320837348, "grad_norm": 0.5483792785514984, "learning_rate": 1.7085158150851583e-05, "loss": 0.6365, "step": 23718 }, { "epoch": 0.6925053283115822, "grad_norm": 0.5185397568688265, "learning_rate": 1.708353609083536e-05, "loss": 0.5886, "step": 23719 }, { "epoch": 0.6925345245394295, "grad_norm": 0.526374195573011, "learning_rate": 1.708191403081914e-05, "loss": 0.6209, "step": 23720 }, { "epoch": 0.6925637207672769, "grad_norm": 0.49957764142084754, "learning_rate": 1.708029197080292e-05, "loss": 0.5629, "step": 23721 }, { "epoch": 0.6925929169951243, "grad_norm": 0.5773630525616519, "learning_rate": 1.70786699107867e-05, "loss": 0.6683, "step": 23722 }, { "epoch": 0.6926221132229716, "grad_norm": 0.4822109593616765, "learning_rate": 1.707704785077048e-05, "loss": 0.5159, "step": 23723 }, { "epoch": 0.692651309450819, "grad_norm": 0.5129544956661666, "learning_rate": 1.707542579075426e-05, "loss": 0.5925, "step": 23724 }, { "epoch": 0.6926805056786663, "grad_norm": 0.5562631617298621, "learning_rate": 1.7073803730738038e-05, "loss": 0.6916, "step": 23725 }, { "epoch": 0.6927097019065137, "grad_norm": 0.5143401691064631, "learning_rate": 1.7072181670721816e-05, "loss": 0.5927, "step": 23726 }, { "epoch": 0.6927388981343611, "grad_norm": 0.5418938464833098, "learning_rate": 1.7070559610705598e-05, "loss": 0.6435, "step": 23727 }, { "epoch": 0.6927680943622084, "grad_norm": 0.519663897071106, "learning_rate": 1.7068937550689377e-05, "loss": 0.5662, "step": 23728 }, { "epoch": 0.6927972905900558, "grad_norm": 0.5659386551484701, "learning_rate": 1.7067315490673155e-05, "loss": 0.681, "step": 23729 }, { "epoch": 0.6928264868179032, "grad_norm": 0.533034556510648, "learning_rate": 1.7065693430656933e-05, "loss": 0.6075, "step": 23730 }, { "epoch": 0.6928556830457505, "grad_norm": 0.48159487974699394, "learning_rate": 1.7064071370640715e-05, "loss": 0.5248, "step": 23731 }, { "epoch": 0.6928848792735979, "grad_norm": 0.549974356434201, "learning_rate": 1.7062449310624494e-05, "loss": 0.5992, "step": 23732 }, { "epoch": 0.6929140755014452, "grad_norm": 0.5617092125502062, "learning_rate": 1.7060827250608275e-05, "loss": 0.7077, "step": 23733 }, { "epoch": 0.6929432717292926, "grad_norm": 0.5158695772916508, "learning_rate": 1.7059205190592054e-05, "loss": 0.5887, "step": 23734 }, { "epoch": 0.69297246795714, "grad_norm": 0.5309894184788985, "learning_rate": 1.7057583130575832e-05, "loss": 0.5689, "step": 23735 }, { "epoch": 0.6930016641849873, "grad_norm": 0.5062230671088319, "learning_rate": 1.705596107055961e-05, "loss": 0.5659, "step": 23736 }, { "epoch": 0.6930308604128347, "grad_norm": 0.5391035164757491, "learning_rate": 1.7054339010543392e-05, "loss": 0.5997, "step": 23737 }, { "epoch": 0.693060056640682, "grad_norm": 0.48145908265569176, "learning_rate": 1.705271695052717e-05, "loss": 0.5461, "step": 23738 }, { "epoch": 0.6930892528685294, "grad_norm": 0.5429437213393002, "learning_rate": 1.705109489051095e-05, "loss": 0.6012, "step": 23739 }, { "epoch": 0.6931184490963768, "grad_norm": 0.5018104070519883, "learning_rate": 1.7049472830494727e-05, "loss": 0.5621, "step": 23740 }, { "epoch": 0.6931476453242241, "grad_norm": 0.501526874712411, "learning_rate": 1.7047850770478506e-05, "loss": 0.572, "step": 23741 }, { "epoch": 0.6931768415520715, "grad_norm": 0.5126702486112923, "learning_rate": 1.704622871046229e-05, "loss": 0.5468, "step": 23742 }, { "epoch": 0.6932060377799188, "grad_norm": 0.5245444586507758, "learning_rate": 1.704460665044607e-05, "loss": 0.581, "step": 23743 }, { "epoch": 0.6932352340077662, "grad_norm": 0.5314371359613206, "learning_rate": 1.7042984590429848e-05, "loss": 0.5162, "step": 23744 }, { "epoch": 0.6932644302356136, "grad_norm": 0.5286891923758802, "learning_rate": 1.7041362530413626e-05, "loss": 0.6095, "step": 23745 }, { "epoch": 0.6932936264634609, "grad_norm": 0.5440149902090625, "learning_rate": 1.7039740470397404e-05, "loss": 0.6579, "step": 23746 }, { "epoch": 0.6933228226913083, "grad_norm": 0.5477890753669838, "learning_rate": 1.7038118410381186e-05, "loss": 0.6604, "step": 23747 }, { "epoch": 0.6933520189191557, "grad_norm": 0.5080380041391956, "learning_rate": 1.7036496350364964e-05, "loss": 0.581, "step": 23748 }, { "epoch": 0.693381215147003, "grad_norm": 0.5239017736186702, "learning_rate": 1.7034874290348743e-05, "loss": 0.6143, "step": 23749 }, { "epoch": 0.6934104113748504, "grad_norm": 0.5284932242373406, "learning_rate": 1.703325223033252e-05, "loss": 0.6298, "step": 23750 }, { "epoch": 0.6934396076026977, "grad_norm": 0.579310501901912, "learning_rate": 1.70316301703163e-05, "loss": 0.7184, "step": 23751 }, { "epoch": 0.6934688038305451, "grad_norm": 0.5096099029423836, "learning_rate": 1.7030008110300085e-05, "loss": 0.5628, "step": 23752 }, { "epoch": 0.6934980000583925, "grad_norm": 0.5321477791844217, "learning_rate": 1.7028386050283863e-05, "loss": 0.569, "step": 23753 }, { "epoch": 0.6935271962862398, "grad_norm": 0.5139206639640708, "learning_rate": 1.702676399026764e-05, "loss": 0.6006, "step": 23754 }, { "epoch": 0.6935563925140872, "grad_norm": 0.4871977160469265, "learning_rate": 1.702514193025142e-05, "loss": 0.5396, "step": 23755 }, { "epoch": 0.6935855887419345, "grad_norm": 0.5536210376893353, "learning_rate": 1.7023519870235198e-05, "loss": 0.6699, "step": 23756 }, { "epoch": 0.6936147849697819, "grad_norm": 0.5272075044353234, "learning_rate": 1.702189781021898e-05, "loss": 0.5939, "step": 23757 }, { "epoch": 0.6936439811976293, "grad_norm": 0.48532411451829216, "learning_rate": 1.702027575020276e-05, "loss": 0.4992, "step": 23758 }, { "epoch": 0.6936731774254766, "grad_norm": 0.5274528534923693, "learning_rate": 1.7018653690186537e-05, "loss": 0.5999, "step": 23759 }, { "epoch": 0.693702373653324, "grad_norm": 0.5476413211897968, "learning_rate": 1.7017031630170315e-05, "loss": 0.6605, "step": 23760 }, { "epoch": 0.6937315698811714, "grad_norm": 0.5799902194325749, "learning_rate": 1.7015409570154097e-05, "loss": 0.7451, "step": 23761 }, { "epoch": 0.6937607661090187, "grad_norm": 0.5378537942925207, "learning_rate": 1.7013787510137875e-05, "loss": 0.6794, "step": 23762 }, { "epoch": 0.6937899623368661, "grad_norm": 0.5483950811926682, "learning_rate": 1.7012165450121657e-05, "loss": 0.6282, "step": 23763 }, { "epoch": 0.6938191585647134, "grad_norm": 0.5065804934572957, "learning_rate": 1.7010543390105435e-05, "loss": 0.5503, "step": 23764 }, { "epoch": 0.6938483547925608, "grad_norm": 0.5382588418139954, "learning_rate": 1.7008921330089214e-05, "loss": 0.623, "step": 23765 }, { "epoch": 0.6938775510204082, "grad_norm": 0.5598508037147156, "learning_rate": 1.7007299270072992e-05, "loss": 0.6416, "step": 23766 }, { "epoch": 0.6939067472482555, "grad_norm": 0.5006802055447637, "learning_rate": 1.7005677210056774e-05, "loss": 0.5628, "step": 23767 }, { "epoch": 0.6939359434761029, "grad_norm": 0.5115582905601475, "learning_rate": 1.7004055150040552e-05, "loss": 0.5765, "step": 23768 }, { "epoch": 0.6939651397039502, "grad_norm": 0.53803309742157, "learning_rate": 1.700243309002433e-05, "loss": 0.63, "step": 23769 }, { "epoch": 0.6939943359317976, "grad_norm": 0.5076343666671799, "learning_rate": 1.7000811030008112e-05, "loss": 0.5301, "step": 23770 }, { "epoch": 0.694023532159645, "grad_norm": 0.5341238550717282, "learning_rate": 1.699918896999189e-05, "loss": 0.6307, "step": 23771 }, { "epoch": 0.6940527283874923, "grad_norm": 0.4995500251452176, "learning_rate": 1.699756690997567e-05, "loss": 0.6191, "step": 23772 }, { "epoch": 0.6940819246153397, "grad_norm": 0.5294670474581326, "learning_rate": 1.699594484995945e-05, "loss": 0.615, "step": 23773 }, { "epoch": 0.694111120843187, "grad_norm": 0.5137203270833156, "learning_rate": 1.699432278994323e-05, "loss": 0.5677, "step": 23774 }, { "epoch": 0.6941403170710344, "grad_norm": 0.5706432427937874, "learning_rate": 1.6992700729927008e-05, "loss": 0.6271, "step": 23775 }, { "epoch": 0.6941695132988818, "grad_norm": 0.5249181094937733, "learning_rate": 1.6991078669910786e-05, "loss": 0.6094, "step": 23776 }, { "epoch": 0.6941987095267291, "grad_norm": 0.5190103973109566, "learning_rate": 1.6989456609894568e-05, "loss": 0.5848, "step": 23777 }, { "epoch": 0.6942279057545765, "grad_norm": 0.485239992675337, "learning_rate": 1.6987834549878346e-05, "loss": 0.5266, "step": 23778 }, { "epoch": 0.6942571019824239, "grad_norm": 0.5455379631001338, "learning_rate": 1.6986212489862125e-05, "loss": 0.6128, "step": 23779 }, { "epoch": 0.6942862982102712, "grad_norm": 0.5042163699910827, "learning_rate": 1.6984590429845906e-05, "loss": 0.5961, "step": 23780 }, { "epoch": 0.6943154944381186, "grad_norm": 0.5513088157726966, "learning_rate": 1.6982968369829685e-05, "loss": 0.6272, "step": 23781 }, { "epoch": 0.6943446906659659, "grad_norm": 0.49180560830834347, "learning_rate": 1.6981346309813463e-05, "loss": 0.5624, "step": 23782 }, { "epoch": 0.6943738868938133, "grad_norm": 0.5272390905264753, "learning_rate": 1.6979724249797245e-05, "loss": 0.6096, "step": 23783 }, { "epoch": 0.6944030831216607, "grad_norm": 0.5161803376050885, "learning_rate": 1.6978102189781023e-05, "loss": 0.5805, "step": 23784 }, { "epoch": 0.694432279349508, "grad_norm": 0.6576759269786211, "learning_rate": 1.69764801297648e-05, "loss": 0.7068, "step": 23785 }, { "epoch": 0.6944614755773554, "grad_norm": 0.5192250322945018, "learning_rate": 1.697485806974858e-05, "loss": 0.5841, "step": 23786 }, { "epoch": 0.6944906718052027, "grad_norm": 0.5406475744778123, "learning_rate": 1.697323600973236e-05, "loss": 0.5983, "step": 23787 }, { "epoch": 0.6945198680330501, "grad_norm": 0.5239229215520361, "learning_rate": 1.697161394971614e-05, "loss": 0.6037, "step": 23788 }, { "epoch": 0.6945490642608975, "grad_norm": 0.5513655388226443, "learning_rate": 1.6969991889699922e-05, "loss": 0.681, "step": 23789 }, { "epoch": 0.6945782604887448, "grad_norm": 0.5370535066452551, "learning_rate": 1.69683698296837e-05, "loss": 0.5838, "step": 23790 }, { "epoch": 0.6946074567165922, "grad_norm": 0.5036242050662186, "learning_rate": 1.696674776966748e-05, "loss": 0.5546, "step": 23791 }, { "epoch": 0.6946366529444395, "grad_norm": 0.5062129755132998, "learning_rate": 1.6965125709651257e-05, "loss": 0.6109, "step": 23792 }, { "epoch": 0.6946658491722869, "grad_norm": 0.5178841443310173, "learning_rate": 1.696350364963504e-05, "loss": 0.6136, "step": 23793 }, { "epoch": 0.6946950454001343, "grad_norm": 0.5016100426492143, "learning_rate": 1.6961881589618817e-05, "loss": 0.5236, "step": 23794 }, { "epoch": 0.6947242416279816, "grad_norm": 0.4720802654947078, "learning_rate": 1.6960259529602595e-05, "loss": 0.4987, "step": 23795 }, { "epoch": 0.694753437855829, "grad_norm": 0.494908130436148, "learning_rate": 1.6958637469586374e-05, "loss": 0.5342, "step": 23796 }, { "epoch": 0.6947826340836764, "grad_norm": 0.5496357335941527, "learning_rate": 1.6957015409570156e-05, "loss": 0.6495, "step": 23797 }, { "epoch": 0.6948118303115237, "grad_norm": 0.5058227860369776, "learning_rate": 1.6955393349553934e-05, "loss": 0.546, "step": 23798 }, { "epoch": 0.6948410265393711, "grad_norm": 0.5122383936723692, "learning_rate": 1.6953771289537716e-05, "loss": 0.6125, "step": 23799 }, { "epoch": 0.6948702227672184, "grad_norm": 0.4733828856098299, "learning_rate": 1.6952149229521494e-05, "loss": 0.4974, "step": 23800 }, { "epoch": 0.6948994189950658, "grad_norm": 0.4823521367327746, "learning_rate": 1.6950527169505272e-05, "loss": 0.5273, "step": 23801 }, { "epoch": 0.6949286152229132, "grad_norm": 0.5593455356618374, "learning_rate": 1.694890510948905e-05, "loss": 0.6685, "step": 23802 }, { "epoch": 0.6949578114507605, "grad_norm": 0.5386448824492631, "learning_rate": 1.6947283049472833e-05, "loss": 0.6106, "step": 23803 }, { "epoch": 0.6949870076786079, "grad_norm": 0.4809853166889026, "learning_rate": 1.694566098945661e-05, "loss": 0.532, "step": 23804 }, { "epoch": 0.6950162039064552, "grad_norm": 0.5043282597835278, "learning_rate": 1.694403892944039e-05, "loss": 0.4951, "step": 23805 }, { "epoch": 0.6950454001343026, "grad_norm": 0.5426680372584799, "learning_rate": 1.6942416869424168e-05, "loss": 0.6601, "step": 23806 }, { "epoch": 0.69507459636215, "grad_norm": 0.5124474271795469, "learning_rate": 1.6940794809407946e-05, "loss": 0.6058, "step": 23807 }, { "epoch": 0.6951037925899973, "grad_norm": 0.5259409500399895, "learning_rate": 1.693917274939173e-05, "loss": 0.6, "step": 23808 }, { "epoch": 0.6951329888178447, "grad_norm": 0.5757026368577832, "learning_rate": 1.693755068937551e-05, "loss": 0.6825, "step": 23809 }, { "epoch": 0.695162185045692, "grad_norm": 0.5193729868571096, "learning_rate": 1.6935928629359288e-05, "loss": 0.5804, "step": 23810 }, { "epoch": 0.6951913812735394, "grad_norm": 0.5599605863762047, "learning_rate": 1.6934306569343066e-05, "loss": 0.6483, "step": 23811 }, { "epoch": 0.6952205775013868, "grad_norm": 0.44823210260706403, "learning_rate": 1.6932684509326845e-05, "loss": 0.4446, "step": 23812 }, { "epoch": 0.6952497737292341, "grad_norm": 0.5175744876177295, "learning_rate": 1.6931062449310626e-05, "loss": 0.5841, "step": 23813 }, { "epoch": 0.6952789699570815, "grad_norm": 0.5463259643728013, "learning_rate": 1.6929440389294405e-05, "loss": 0.6504, "step": 23814 }, { "epoch": 0.6953081661849289, "grad_norm": 0.5364540924861732, "learning_rate": 1.6927818329278183e-05, "loss": 0.6598, "step": 23815 }, { "epoch": 0.6953373624127762, "grad_norm": 0.5115854095462293, "learning_rate": 1.692619626926196e-05, "loss": 0.587, "step": 23816 }, { "epoch": 0.6953665586406236, "grad_norm": 0.556691508063691, "learning_rate": 1.692457420924574e-05, "loss": 0.7067, "step": 23817 }, { "epoch": 0.6953957548684709, "grad_norm": 0.5632741363947276, "learning_rate": 1.6922952149229522e-05, "loss": 0.6511, "step": 23818 }, { "epoch": 0.6954249510963184, "grad_norm": 0.525920036123434, "learning_rate": 1.6921330089213304e-05, "loss": 0.58, "step": 23819 }, { "epoch": 0.6954541473241658, "grad_norm": 0.5415921589207467, "learning_rate": 1.6919708029197082e-05, "loss": 0.6237, "step": 23820 }, { "epoch": 0.6954833435520131, "grad_norm": 0.5160368123482885, "learning_rate": 1.691808596918086e-05, "loss": 0.5544, "step": 23821 }, { "epoch": 0.6955125397798605, "grad_norm": 0.4944134415123406, "learning_rate": 1.691646390916464e-05, "loss": 0.5338, "step": 23822 }, { "epoch": 0.6955417360077079, "grad_norm": 0.5310367547081719, "learning_rate": 1.691484184914842e-05, "loss": 0.5919, "step": 23823 }, { "epoch": 0.6955709322355552, "grad_norm": 0.5594700851558807, "learning_rate": 1.69132197891322e-05, "loss": 0.6632, "step": 23824 }, { "epoch": 0.6956001284634026, "grad_norm": 0.5385482555979152, "learning_rate": 1.6911597729115977e-05, "loss": 0.605, "step": 23825 }, { "epoch": 0.6956293246912499, "grad_norm": 0.504685629484625, "learning_rate": 1.6909975669099756e-05, "loss": 0.5707, "step": 23826 }, { "epoch": 0.6956585209190973, "grad_norm": 0.5230015571925553, "learning_rate": 1.6908353609083537e-05, "loss": 0.6056, "step": 23827 }, { "epoch": 0.6956877171469447, "grad_norm": 0.54784290322116, "learning_rate": 1.6906731549067316e-05, "loss": 0.5891, "step": 23828 }, { "epoch": 0.695716913374792, "grad_norm": 0.5082748721696677, "learning_rate": 1.6905109489051097e-05, "loss": 0.5767, "step": 23829 }, { "epoch": 0.6957461096026394, "grad_norm": 0.5278275875823827, "learning_rate": 1.6903487429034876e-05, "loss": 0.5549, "step": 23830 }, { "epoch": 0.6957753058304867, "grad_norm": 0.5904417035440881, "learning_rate": 1.6901865369018654e-05, "loss": 0.6643, "step": 23831 }, { "epoch": 0.6958045020583341, "grad_norm": 0.5027779673568448, "learning_rate": 1.6900243309002433e-05, "loss": 0.5505, "step": 23832 }, { "epoch": 0.6958336982861815, "grad_norm": 0.5221996145380401, "learning_rate": 1.6898621248986214e-05, "loss": 0.5953, "step": 23833 }, { "epoch": 0.6958628945140288, "grad_norm": 0.43856318001480477, "learning_rate": 1.6896999188969993e-05, "loss": 0.4491, "step": 23834 }, { "epoch": 0.6958920907418762, "grad_norm": 0.5328510243602109, "learning_rate": 1.689537712895377e-05, "loss": 0.6541, "step": 23835 }, { "epoch": 0.6959212869697236, "grad_norm": 0.5295812616625191, "learning_rate": 1.689375506893755e-05, "loss": 0.5985, "step": 23836 }, { "epoch": 0.6959504831975709, "grad_norm": 0.4978355167690101, "learning_rate": 1.689213300892133e-05, "loss": 0.5535, "step": 23837 }, { "epoch": 0.6959796794254183, "grad_norm": 0.5162472704359089, "learning_rate": 1.689051094890511e-05, "loss": 0.5629, "step": 23838 }, { "epoch": 0.6960088756532656, "grad_norm": 0.519307683670095, "learning_rate": 1.688888888888889e-05, "loss": 0.5965, "step": 23839 }, { "epoch": 0.696038071881113, "grad_norm": 0.5309504948275118, "learning_rate": 1.688726682887267e-05, "loss": 0.6214, "step": 23840 }, { "epoch": 0.6960672681089604, "grad_norm": 0.48990618721206847, "learning_rate": 1.6885644768856448e-05, "loss": 0.5741, "step": 23841 }, { "epoch": 0.6960964643368077, "grad_norm": 0.514329379869606, "learning_rate": 1.6884022708840226e-05, "loss": 0.5927, "step": 23842 }, { "epoch": 0.6961256605646551, "grad_norm": 0.5377116193633419, "learning_rate": 1.6882400648824008e-05, "loss": 0.6025, "step": 23843 }, { "epoch": 0.6961548567925024, "grad_norm": 0.5445603318877917, "learning_rate": 1.6880778588807787e-05, "loss": 0.6323, "step": 23844 }, { "epoch": 0.6961840530203498, "grad_norm": 0.48538598799750077, "learning_rate": 1.6879156528791565e-05, "loss": 0.5294, "step": 23845 }, { "epoch": 0.6962132492481972, "grad_norm": 0.48897423904476595, "learning_rate": 1.6877534468775347e-05, "loss": 0.5255, "step": 23846 }, { "epoch": 0.6962424454760445, "grad_norm": 0.5727153372335292, "learning_rate": 1.6875912408759125e-05, "loss": 0.6827, "step": 23847 }, { "epoch": 0.6962716417038919, "grad_norm": 0.4949121955185112, "learning_rate": 1.6874290348742903e-05, "loss": 0.5785, "step": 23848 }, { "epoch": 0.6963008379317392, "grad_norm": 0.499088799736237, "learning_rate": 1.6872668288726685e-05, "loss": 0.5385, "step": 23849 }, { "epoch": 0.6963300341595866, "grad_norm": 0.5354520955599578, "learning_rate": 1.6871046228710464e-05, "loss": 0.6153, "step": 23850 }, { "epoch": 0.696359230387434, "grad_norm": 0.501685450294326, "learning_rate": 1.6869424168694242e-05, "loss": 0.5343, "step": 23851 }, { "epoch": 0.6963884266152813, "grad_norm": 0.5473901221717411, "learning_rate": 1.686780210867802e-05, "loss": 0.6269, "step": 23852 }, { "epoch": 0.6964176228431287, "grad_norm": 0.49805667879840254, "learning_rate": 1.6866180048661802e-05, "loss": 0.5633, "step": 23853 }, { "epoch": 0.696446819070976, "grad_norm": 0.481638955917255, "learning_rate": 1.686455798864558e-05, "loss": 0.5599, "step": 23854 }, { "epoch": 0.6964760152988234, "grad_norm": 0.5067477573910106, "learning_rate": 1.6862935928629362e-05, "loss": 0.5734, "step": 23855 }, { "epoch": 0.6965052115266708, "grad_norm": 0.5946784702709362, "learning_rate": 1.686131386861314e-05, "loss": 0.7078, "step": 23856 }, { "epoch": 0.6965344077545181, "grad_norm": 0.4874590729732595, "learning_rate": 1.685969180859692e-05, "loss": 0.5577, "step": 23857 }, { "epoch": 0.6965636039823655, "grad_norm": 0.4887217224795634, "learning_rate": 1.6858069748580697e-05, "loss": 0.5412, "step": 23858 }, { "epoch": 0.6965928002102129, "grad_norm": 0.545145138267918, "learning_rate": 1.685644768856448e-05, "loss": 0.6562, "step": 23859 }, { "epoch": 0.6966219964380602, "grad_norm": 0.5402586550506002, "learning_rate": 1.6854825628548258e-05, "loss": 0.6182, "step": 23860 }, { "epoch": 0.6966511926659076, "grad_norm": 0.5329927493492291, "learning_rate": 1.6853203568532036e-05, "loss": 0.6206, "step": 23861 }, { "epoch": 0.6966803888937549, "grad_norm": 0.47182562856456045, "learning_rate": 1.6851581508515814e-05, "loss": 0.5139, "step": 23862 }, { "epoch": 0.6967095851216023, "grad_norm": 0.5155617586044764, "learning_rate": 1.6849959448499593e-05, "loss": 0.5927, "step": 23863 }, { "epoch": 0.6967387813494497, "grad_norm": 0.5291021944639421, "learning_rate": 1.6848337388483374e-05, "loss": 0.6022, "step": 23864 }, { "epoch": 0.696767977577297, "grad_norm": 0.5753786210963753, "learning_rate": 1.6846715328467156e-05, "loss": 0.6387, "step": 23865 }, { "epoch": 0.6967971738051444, "grad_norm": 0.5181875771868182, "learning_rate": 1.6845093268450935e-05, "loss": 0.5833, "step": 23866 }, { "epoch": 0.6968263700329917, "grad_norm": 0.4871050245790477, "learning_rate": 1.6843471208434713e-05, "loss": 0.5355, "step": 23867 }, { "epoch": 0.6968555662608391, "grad_norm": 0.5282347140566163, "learning_rate": 1.684184914841849e-05, "loss": 0.6391, "step": 23868 }, { "epoch": 0.6968847624886865, "grad_norm": 0.5111278563043132, "learning_rate": 1.6840227088402273e-05, "loss": 0.5544, "step": 23869 }, { "epoch": 0.6969139587165338, "grad_norm": 0.5112466645816022, "learning_rate": 1.683860502838605e-05, "loss": 0.5784, "step": 23870 }, { "epoch": 0.6969431549443812, "grad_norm": 0.5438644252660075, "learning_rate": 1.683698296836983e-05, "loss": 0.6795, "step": 23871 }, { "epoch": 0.6969723511722286, "grad_norm": 0.5236438983816515, "learning_rate": 1.6835360908353608e-05, "loss": 0.5939, "step": 23872 }, { "epoch": 0.6970015474000759, "grad_norm": 0.5142155553503791, "learning_rate": 1.6833738848337387e-05, "loss": 0.5856, "step": 23873 }, { "epoch": 0.6970307436279233, "grad_norm": 0.5143600029553544, "learning_rate": 1.683211678832117e-05, "loss": 0.5747, "step": 23874 }, { "epoch": 0.6970599398557706, "grad_norm": 0.525636133346018, "learning_rate": 1.683049472830495e-05, "loss": 0.598, "step": 23875 }, { "epoch": 0.697089136083618, "grad_norm": 0.5047271048225384, "learning_rate": 1.682887266828873e-05, "loss": 0.5579, "step": 23876 }, { "epoch": 0.6971183323114654, "grad_norm": 0.5045365736521006, "learning_rate": 1.6827250608272507e-05, "loss": 0.5282, "step": 23877 }, { "epoch": 0.6971475285393127, "grad_norm": 0.5165192800562025, "learning_rate": 1.6825628548256285e-05, "loss": 0.5607, "step": 23878 }, { "epoch": 0.6971767247671601, "grad_norm": 0.47631945428858263, "learning_rate": 1.6824006488240067e-05, "loss": 0.5268, "step": 23879 }, { "epoch": 0.6972059209950074, "grad_norm": 0.5334928935782207, "learning_rate": 1.6822384428223845e-05, "loss": 0.6448, "step": 23880 }, { "epoch": 0.6972351172228548, "grad_norm": 0.4959714590263005, "learning_rate": 1.6820762368207624e-05, "loss": 0.559, "step": 23881 }, { "epoch": 0.6972643134507022, "grad_norm": 0.47928820618923645, "learning_rate": 1.6819140308191402e-05, "loss": 0.4902, "step": 23882 }, { "epoch": 0.6972935096785495, "grad_norm": 0.5021021346923552, "learning_rate": 1.681751824817518e-05, "loss": 0.5812, "step": 23883 }, { "epoch": 0.6973227059063969, "grad_norm": 0.4756977764275285, "learning_rate": 1.6815896188158962e-05, "loss": 0.4901, "step": 23884 }, { "epoch": 0.6973519021342443, "grad_norm": 0.5220264292613518, "learning_rate": 1.6814274128142744e-05, "loss": 0.5532, "step": 23885 }, { "epoch": 0.6973810983620916, "grad_norm": 0.4585149412725217, "learning_rate": 1.6812652068126522e-05, "loss": 0.4918, "step": 23886 }, { "epoch": 0.697410294589939, "grad_norm": 0.4939216471779311, "learning_rate": 1.68110300081103e-05, "loss": 0.5344, "step": 23887 }, { "epoch": 0.6974394908177863, "grad_norm": 0.5057016690903354, "learning_rate": 1.680940794809408e-05, "loss": 0.5766, "step": 23888 }, { "epoch": 0.6974686870456337, "grad_norm": 0.5182465752308527, "learning_rate": 1.680778588807786e-05, "loss": 0.5927, "step": 23889 }, { "epoch": 0.6974978832734811, "grad_norm": 0.49760267945920844, "learning_rate": 1.680616382806164e-05, "loss": 0.5668, "step": 23890 }, { "epoch": 0.6975270795013284, "grad_norm": 0.5126331235736868, "learning_rate": 1.6804541768045418e-05, "loss": 0.5752, "step": 23891 }, { "epoch": 0.6975562757291758, "grad_norm": 0.524431466889306, "learning_rate": 1.6802919708029196e-05, "loss": 0.6029, "step": 23892 }, { "epoch": 0.6975854719570231, "grad_norm": 0.5482098927345681, "learning_rate": 1.6801297648012978e-05, "loss": 0.634, "step": 23893 }, { "epoch": 0.6976146681848705, "grad_norm": 0.5056593355413738, "learning_rate": 1.6799675587996756e-05, "loss": 0.574, "step": 23894 }, { "epoch": 0.6976438644127179, "grad_norm": 0.5251442928596666, "learning_rate": 1.6798053527980538e-05, "loss": 0.5812, "step": 23895 }, { "epoch": 0.6976730606405652, "grad_norm": 0.5014143792820334, "learning_rate": 1.6796431467964316e-05, "loss": 0.5901, "step": 23896 }, { "epoch": 0.6977022568684126, "grad_norm": 0.5307018612421481, "learning_rate": 1.6794809407948095e-05, "loss": 0.6156, "step": 23897 }, { "epoch": 0.69773145309626, "grad_norm": 0.5105595818479766, "learning_rate": 1.6793187347931873e-05, "loss": 0.5786, "step": 23898 }, { "epoch": 0.6977606493241073, "grad_norm": 0.5264014420133188, "learning_rate": 1.6791565287915655e-05, "loss": 0.6351, "step": 23899 }, { "epoch": 0.6977898455519547, "grad_norm": 0.5329186493576598, "learning_rate": 1.6789943227899433e-05, "loss": 0.595, "step": 23900 }, { "epoch": 0.697819041779802, "grad_norm": 0.556109190130278, "learning_rate": 1.678832116788321e-05, "loss": 0.6373, "step": 23901 }, { "epoch": 0.6978482380076494, "grad_norm": 0.5411348403385479, "learning_rate": 1.678669910786699e-05, "loss": 0.6017, "step": 23902 }, { "epoch": 0.6978774342354968, "grad_norm": 0.5746666760170805, "learning_rate": 1.678507704785077e-05, "loss": 0.6567, "step": 23903 }, { "epoch": 0.6979066304633441, "grad_norm": 0.5517165160698778, "learning_rate": 1.678345498783455e-05, "loss": 0.625, "step": 23904 }, { "epoch": 0.6979358266911915, "grad_norm": 0.5554952548252236, "learning_rate": 1.6781832927818332e-05, "loss": 0.6181, "step": 23905 }, { "epoch": 0.6979650229190388, "grad_norm": 0.4696306604852356, "learning_rate": 1.678021086780211e-05, "loss": 0.4794, "step": 23906 }, { "epoch": 0.6979942191468862, "grad_norm": 0.5299359517410492, "learning_rate": 1.677858880778589e-05, "loss": 0.5734, "step": 23907 }, { "epoch": 0.6980234153747336, "grad_norm": 0.539083815263573, "learning_rate": 1.6776966747769667e-05, "loss": 0.6324, "step": 23908 }, { "epoch": 0.6980526116025809, "grad_norm": 0.5411286687976882, "learning_rate": 1.677534468775345e-05, "loss": 0.6258, "step": 23909 }, { "epoch": 0.6980818078304283, "grad_norm": 0.4690278960983926, "learning_rate": 1.6773722627737227e-05, "loss": 0.5098, "step": 23910 }, { "epoch": 0.6981110040582756, "grad_norm": 0.5095329509642403, "learning_rate": 1.6772100567721005e-05, "loss": 0.5766, "step": 23911 }, { "epoch": 0.698140200286123, "grad_norm": 0.5581021655914766, "learning_rate": 1.6770478507704787e-05, "loss": 0.6584, "step": 23912 }, { "epoch": 0.6981693965139704, "grad_norm": 0.4914528039007178, "learning_rate": 1.6768856447688566e-05, "loss": 0.5379, "step": 23913 }, { "epoch": 0.6981985927418177, "grad_norm": 0.5109884585239098, "learning_rate": 1.6767234387672344e-05, "loss": 0.5357, "step": 23914 }, { "epoch": 0.6982277889696651, "grad_norm": 0.5337838984603837, "learning_rate": 1.6765612327656126e-05, "loss": 0.5917, "step": 23915 }, { "epoch": 0.6982569851975124, "grad_norm": 0.5281546795040709, "learning_rate": 1.6763990267639904e-05, "loss": 0.5696, "step": 23916 }, { "epoch": 0.6982861814253598, "grad_norm": 0.5228646515163301, "learning_rate": 1.6762368207623682e-05, "loss": 0.5683, "step": 23917 }, { "epoch": 0.6983153776532072, "grad_norm": 0.5390865555426462, "learning_rate": 1.676074614760746e-05, "loss": 0.6087, "step": 23918 }, { "epoch": 0.6983445738810545, "grad_norm": 0.5167085081381189, "learning_rate": 1.6759124087591243e-05, "loss": 0.6224, "step": 23919 }, { "epoch": 0.6983737701089019, "grad_norm": 0.49185539423450847, "learning_rate": 1.675750202757502e-05, "loss": 0.5632, "step": 23920 }, { "epoch": 0.6984029663367493, "grad_norm": 0.5667333127069081, "learning_rate": 1.6755879967558803e-05, "loss": 0.6749, "step": 23921 }, { "epoch": 0.6984321625645966, "grad_norm": 0.48594398935350813, "learning_rate": 1.675425790754258e-05, "loss": 0.496, "step": 23922 }, { "epoch": 0.698461358792444, "grad_norm": 0.5356209259493073, "learning_rate": 1.675263584752636e-05, "loss": 0.6034, "step": 23923 }, { "epoch": 0.6984905550202913, "grad_norm": 0.4999547168825244, "learning_rate": 1.6751013787510138e-05, "loss": 0.577, "step": 23924 }, { "epoch": 0.6985197512481387, "grad_norm": 0.5123317368722479, "learning_rate": 1.674939172749392e-05, "loss": 0.5996, "step": 23925 }, { "epoch": 0.6985489474759861, "grad_norm": 0.5420027107174145, "learning_rate": 1.6747769667477698e-05, "loss": 0.6033, "step": 23926 }, { "epoch": 0.6985781437038334, "grad_norm": 0.5065081517626098, "learning_rate": 1.6746147607461476e-05, "loss": 0.5439, "step": 23927 }, { "epoch": 0.6986073399316808, "grad_norm": 0.5308651693978712, "learning_rate": 1.6744525547445255e-05, "loss": 0.6243, "step": 23928 }, { "epoch": 0.6986365361595281, "grad_norm": 0.5183730662315712, "learning_rate": 1.6742903487429033e-05, "loss": 0.5628, "step": 23929 }, { "epoch": 0.6986657323873755, "grad_norm": 0.5064805510043244, "learning_rate": 1.6741281427412815e-05, "loss": 0.5607, "step": 23930 }, { "epoch": 0.6986949286152229, "grad_norm": 0.548698763203872, "learning_rate": 1.6739659367396597e-05, "loss": 0.5954, "step": 23931 }, { "epoch": 0.6987241248430702, "grad_norm": 0.49722078360197974, "learning_rate": 1.6738037307380375e-05, "loss": 0.5495, "step": 23932 }, { "epoch": 0.6987533210709176, "grad_norm": 0.5165197011214084, "learning_rate": 1.6736415247364153e-05, "loss": 0.5329, "step": 23933 }, { "epoch": 0.698782517298765, "grad_norm": 0.558799986704739, "learning_rate": 1.6734793187347932e-05, "loss": 0.6382, "step": 23934 }, { "epoch": 0.6988117135266123, "grad_norm": 0.4716029089708527, "learning_rate": 1.6733171127331713e-05, "loss": 0.483, "step": 23935 }, { "epoch": 0.6988409097544597, "grad_norm": 0.5382812674244187, "learning_rate": 1.6731549067315492e-05, "loss": 0.6125, "step": 23936 }, { "epoch": 0.698870105982307, "grad_norm": 0.539729348774088, "learning_rate": 1.672992700729927e-05, "loss": 0.6259, "step": 23937 }, { "epoch": 0.6988993022101544, "grad_norm": 0.5150889830621649, "learning_rate": 1.672830494728305e-05, "loss": 0.5923, "step": 23938 }, { "epoch": 0.6989284984380018, "grad_norm": 0.4931782453274498, "learning_rate": 1.6726682887266827e-05, "loss": 0.5889, "step": 23939 }, { "epoch": 0.6989576946658492, "grad_norm": 0.5478085246928266, "learning_rate": 1.672506082725061e-05, "loss": 0.6234, "step": 23940 }, { "epoch": 0.6989868908936966, "grad_norm": 0.5761708128690408, "learning_rate": 1.672343876723439e-05, "loss": 0.6627, "step": 23941 }, { "epoch": 0.699016087121544, "grad_norm": 0.5012456469660619, "learning_rate": 1.672181670721817e-05, "loss": 0.5711, "step": 23942 }, { "epoch": 0.6990452833493913, "grad_norm": 0.5455540267258604, "learning_rate": 1.6720194647201947e-05, "loss": 0.5594, "step": 23943 }, { "epoch": 0.6990744795772387, "grad_norm": 0.5514428036781069, "learning_rate": 1.6718572587185726e-05, "loss": 0.6402, "step": 23944 }, { "epoch": 0.699103675805086, "grad_norm": 0.5265103093994004, "learning_rate": 1.6716950527169507e-05, "loss": 0.622, "step": 23945 }, { "epoch": 0.6991328720329334, "grad_norm": 0.49550072164625053, "learning_rate": 1.6715328467153286e-05, "loss": 0.5552, "step": 23946 }, { "epoch": 0.6991620682607808, "grad_norm": 0.5511823976462802, "learning_rate": 1.6713706407137064e-05, "loss": 0.6844, "step": 23947 }, { "epoch": 0.6991912644886281, "grad_norm": 0.5195877815471464, "learning_rate": 1.6712084347120843e-05, "loss": 0.5743, "step": 23948 }, { "epoch": 0.6992204607164755, "grad_norm": 0.5191800230671675, "learning_rate": 1.671046228710462e-05, "loss": 0.6289, "step": 23949 }, { "epoch": 0.6992496569443228, "grad_norm": 0.5037666921156752, "learning_rate": 1.6708840227088403e-05, "loss": 0.5713, "step": 23950 }, { "epoch": 0.6992788531721702, "grad_norm": 0.5091972337511907, "learning_rate": 1.6707218167072184e-05, "loss": 0.5591, "step": 23951 }, { "epoch": 0.6993080494000176, "grad_norm": 0.4909852321473616, "learning_rate": 1.6705596107055963e-05, "loss": 0.5659, "step": 23952 }, { "epoch": 0.6993372456278649, "grad_norm": 0.5296490407002589, "learning_rate": 1.670397404703974e-05, "loss": 0.5284, "step": 23953 }, { "epoch": 0.6993664418557123, "grad_norm": 0.5128909225005772, "learning_rate": 1.670235198702352e-05, "loss": 0.5429, "step": 23954 }, { "epoch": 0.6993956380835596, "grad_norm": 0.4925880620008922, "learning_rate": 1.67007299270073e-05, "loss": 0.5475, "step": 23955 }, { "epoch": 0.699424834311407, "grad_norm": 0.49440062098551624, "learning_rate": 1.669910786699108e-05, "loss": 0.5176, "step": 23956 }, { "epoch": 0.6994540305392544, "grad_norm": 0.5109701732989326, "learning_rate": 1.6697485806974858e-05, "loss": 0.5812, "step": 23957 }, { "epoch": 0.6994832267671017, "grad_norm": 0.5330319132690807, "learning_rate": 1.6695863746958636e-05, "loss": 0.5835, "step": 23958 }, { "epoch": 0.6995124229949491, "grad_norm": 0.6069437257300057, "learning_rate": 1.6694241686942418e-05, "loss": 0.7061, "step": 23959 }, { "epoch": 0.6995416192227965, "grad_norm": 0.5278460073041672, "learning_rate": 1.6692619626926197e-05, "loss": 0.559, "step": 23960 }, { "epoch": 0.6995708154506438, "grad_norm": 0.5787582455762258, "learning_rate": 1.6690997566909978e-05, "loss": 0.6821, "step": 23961 }, { "epoch": 0.6996000116784912, "grad_norm": 0.5217639587719214, "learning_rate": 1.6689375506893757e-05, "loss": 0.5987, "step": 23962 }, { "epoch": 0.6996292079063385, "grad_norm": 0.4910471169834393, "learning_rate": 1.6687753446877535e-05, "loss": 0.5051, "step": 23963 }, { "epoch": 0.6996584041341859, "grad_norm": 0.4957142725482389, "learning_rate": 1.6686131386861313e-05, "loss": 0.5163, "step": 23964 }, { "epoch": 0.6996876003620333, "grad_norm": 0.5360359765291611, "learning_rate": 1.6684509326845095e-05, "loss": 0.6312, "step": 23965 }, { "epoch": 0.6997167965898806, "grad_norm": 0.5584233569820024, "learning_rate": 1.6682887266828874e-05, "loss": 0.6844, "step": 23966 }, { "epoch": 0.699745992817728, "grad_norm": 0.5378003759644215, "learning_rate": 1.6681265206812652e-05, "loss": 0.604, "step": 23967 }, { "epoch": 0.6997751890455753, "grad_norm": 0.49312655717778847, "learning_rate": 1.667964314679643e-05, "loss": 0.5433, "step": 23968 }, { "epoch": 0.6998043852734227, "grad_norm": 0.5536786310955353, "learning_rate": 1.6678021086780212e-05, "loss": 0.6178, "step": 23969 }, { "epoch": 0.6998335815012701, "grad_norm": 0.5322802204701849, "learning_rate": 1.667639902676399e-05, "loss": 0.5886, "step": 23970 }, { "epoch": 0.6998627777291174, "grad_norm": 0.5545691773610966, "learning_rate": 1.6674776966747772e-05, "loss": 0.6574, "step": 23971 }, { "epoch": 0.6998919739569648, "grad_norm": 0.5153794241582933, "learning_rate": 1.667315490673155e-05, "loss": 0.6034, "step": 23972 }, { "epoch": 0.6999211701848121, "grad_norm": 0.5149419677529998, "learning_rate": 1.667153284671533e-05, "loss": 0.5607, "step": 23973 }, { "epoch": 0.6999503664126595, "grad_norm": 0.5408966809153618, "learning_rate": 1.6669910786699107e-05, "loss": 0.6398, "step": 23974 }, { "epoch": 0.6999795626405069, "grad_norm": 0.5327976744765813, "learning_rate": 1.666828872668289e-05, "loss": 0.6514, "step": 23975 }, { "epoch": 0.7000087588683542, "grad_norm": 0.4909906500795243, "learning_rate": 1.6666666666666667e-05, "loss": 0.52, "step": 23976 }, { "epoch": 0.7000379550962016, "grad_norm": 0.5209812608615534, "learning_rate": 1.6665044606650446e-05, "loss": 0.5769, "step": 23977 }, { "epoch": 0.700067151324049, "grad_norm": 0.5135124446587106, "learning_rate": 1.6663422546634228e-05, "loss": 0.5307, "step": 23978 }, { "epoch": 0.7000963475518963, "grad_norm": 0.5173488402273957, "learning_rate": 1.6661800486618006e-05, "loss": 0.5752, "step": 23979 }, { "epoch": 0.7001255437797437, "grad_norm": 0.5147739668626204, "learning_rate": 1.6660178426601784e-05, "loss": 0.6032, "step": 23980 }, { "epoch": 0.700154740007591, "grad_norm": 0.5366022907159399, "learning_rate": 1.6658556366585566e-05, "loss": 0.6017, "step": 23981 }, { "epoch": 0.7001839362354384, "grad_norm": 0.5286538765637749, "learning_rate": 1.6656934306569344e-05, "loss": 0.6301, "step": 23982 }, { "epoch": 0.7002131324632858, "grad_norm": 0.5024977104952547, "learning_rate": 1.6655312246553123e-05, "loss": 0.5693, "step": 23983 }, { "epoch": 0.7002423286911331, "grad_norm": 0.5159148669990379, "learning_rate": 1.66536901865369e-05, "loss": 0.5759, "step": 23984 }, { "epoch": 0.7002715249189805, "grad_norm": 0.5475883320012838, "learning_rate": 1.665206812652068e-05, "loss": 0.654, "step": 23985 }, { "epoch": 0.7003007211468278, "grad_norm": 0.5094987735259063, "learning_rate": 1.665044606650446e-05, "loss": 0.5792, "step": 23986 }, { "epoch": 0.7003299173746752, "grad_norm": 0.5126165964391047, "learning_rate": 1.664882400648824e-05, "loss": 0.5625, "step": 23987 }, { "epoch": 0.7003591136025226, "grad_norm": 0.4905912380539101, "learning_rate": 1.664720194647202e-05, "loss": 0.5392, "step": 23988 }, { "epoch": 0.7003883098303699, "grad_norm": 0.5540373473897808, "learning_rate": 1.66455798864558e-05, "loss": 0.6192, "step": 23989 }, { "epoch": 0.7004175060582173, "grad_norm": 0.4855019362765154, "learning_rate": 1.6643957826439578e-05, "loss": 0.5296, "step": 23990 }, { "epoch": 0.7004467022860646, "grad_norm": 0.5323325078753767, "learning_rate": 1.664233576642336e-05, "loss": 0.5655, "step": 23991 }, { "epoch": 0.700475898513912, "grad_norm": 0.5434617565548254, "learning_rate": 1.664071370640714e-05, "loss": 0.6038, "step": 23992 }, { "epoch": 0.7005050947417594, "grad_norm": 0.5189443304467257, "learning_rate": 1.6639091646390917e-05, "loss": 0.5981, "step": 23993 }, { "epoch": 0.7005342909696067, "grad_norm": 0.5058330857345411, "learning_rate": 1.6637469586374695e-05, "loss": 0.5242, "step": 23994 }, { "epoch": 0.7005634871974541, "grad_norm": 0.5413427059510081, "learning_rate": 1.6635847526358474e-05, "loss": 0.6377, "step": 23995 }, { "epoch": 0.7005926834253015, "grad_norm": 0.5462367955625873, "learning_rate": 1.6634225466342255e-05, "loss": 0.6383, "step": 23996 }, { "epoch": 0.7006218796531488, "grad_norm": 0.54884341336049, "learning_rate": 1.6632603406326037e-05, "loss": 0.6293, "step": 23997 }, { "epoch": 0.7006510758809962, "grad_norm": 0.5331523544081641, "learning_rate": 1.6630981346309815e-05, "loss": 0.6219, "step": 23998 }, { "epoch": 0.7006802721088435, "grad_norm": 0.5139323002454964, "learning_rate": 1.6629359286293594e-05, "loss": 0.5829, "step": 23999 }, { "epoch": 0.7007094683366909, "grad_norm": 0.5271001986062617, "learning_rate": 1.6627737226277372e-05, "loss": 0.6017, "step": 24000 }, { "epoch": 0.7007386645645383, "grad_norm": 0.5398897645248066, "learning_rate": 1.6626115166261154e-05, "loss": 0.6053, "step": 24001 }, { "epoch": 0.7007678607923856, "grad_norm": 0.5543059339544634, "learning_rate": 1.6624493106244932e-05, "loss": 0.6455, "step": 24002 }, { "epoch": 0.700797057020233, "grad_norm": 0.492263670098298, "learning_rate": 1.662287104622871e-05, "loss": 0.5002, "step": 24003 }, { "epoch": 0.7008262532480803, "grad_norm": 0.5285159880682347, "learning_rate": 1.662124898621249e-05, "loss": 0.6252, "step": 24004 }, { "epoch": 0.7008554494759277, "grad_norm": 0.5018880181346755, "learning_rate": 1.6619626926196267e-05, "loss": 0.5354, "step": 24005 }, { "epoch": 0.7008846457037751, "grad_norm": 0.5465802419637183, "learning_rate": 1.661800486618005e-05, "loss": 0.678, "step": 24006 }, { "epoch": 0.7009138419316224, "grad_norm": 0.5109348174609255, "learning_rate": 1.661638280616383e-05, "loss": 0.5976, "step": 24007 }, { "epoch": 0.7009430381594698, "grad_norm": 0.5166046122163289, "learning_rate": 1.661476074614761e-05, "loss": 0.5646, "step": 24008 }, { "epoch": 0.7009722343873172, "grad_norm": 0.5069556146839663, "learning_rate": 1.6613138686131388e-05, "loss": 0.5929, "step": 24009 }, { "epoch": 0.7010014306151645, "grad_norm": 0.5183894565825006, "learning_rate": 1.6611516626115166e-05, "loss": 0.5925, "step": 24010 }, { "epoch": 0.7010306268430119, "grad_norm": 0.504688965833277, "learning_rate": 1.6609894566098948e-05, "loss": 0.5497, "step": 24011 }, { "epoch": 0.7010598230708592, "grad_norm": 0.5090124693954277, "learning_rate": 1.6608272506082726e-05, "loss": 0.5641, "step": 24012 }, { "epoch": 0.7010890192987066, "grad_norm": 0.5890513210006034, "learning_rate": 1.6606650446066505e-05, "loss": 0.645, "step": 24013 }, { "epoch": 0.701118215526554, "grad_norm": 0.50397733764072, "learning_rate": 1.6605028386050283e-05, "loss": 0.5619, "step": 24014 }, { "epoch": 0.7011474117544013, "grad_norm": 0.48773037402615343, "learning_rate": 1.660340632603406e-05, "loss": 0.5255, "step": 24015 }, { "epoch": 0.7011766079822487, "grad_norm": 0.5562026368437337, "learning_rate": 1.6601784266017843e-05, "loss": 0.6328, "step": 24016 }, { "epoch": 0.701205804210096, "grad_norm": 0.5232290374639682, "learning_rate": 1.6600162206001625e-05, "loss": 0.6282, "step": 24017 }, { "epoch": 0.7012350004379434, "grad_norm": 0.5015248717609769, "learning_rate": 1.6598540145985403e-05, "loss": 0.586, "step": 24018 }, { "epoch": 0.7012641966657908, "grad_norm": 0.5101389665397759, "learning_rate": 1.659691808596918e-05, "loss": 0.5833, "step": 24019 }, { "epoch": 0.7012933928936381, "grad_norm": 0.5357924265268548, "learning_rate": 1.659529602595296e-05, "loss": 0.629, "step": 24020 }, { "epoch": 0.7013225891214855, "grad_norm": 0.5016527766536262, "learning_rate": 1.6593673965936742e-05, "loss": 0.5622, "step": 24021 }, { "epoch": 0.7013517853493328, "grad_norm": 0.4886232061745889, "learning_rate": 1.659205190592052e-05, "loss": 0.521, "step": 24022 }, { "epoch": 0.7013809815771802, "grad_norm": 0.49152274427190107, "learning_rate": 1.65904298459043e-05, "loss": 0.525, "step": 24023 }, { "epoch": 0.7014101778050276, "grad_norm": 0.5356567833049878, "learning_rate": 1.6588807785888077e-05, "loss": 0.6503, "step": 24024 }, { "epoch": 0.7014393740328749, "grad_norm": 0.4955845041084468, "learning_rate": 1.658718572587186e-05, "loss": 0.5054, "step": 24025 }, { "epoch": 0.7014685702607223, "grad_norm": 0.4662779728255642, "learning_rate": 1.6585563665855637e-05, "loss": 0.5007, "step": 24026 }, { "epoch": 0.7014977664885697, "grad_norm": 0.5312458941285512, "learning_rate": 1.658394160583942e-05, "loss": 0.5906, "step": 24027 }, { "epoch": 0.701526962716417, "grad_norm": 0.4771265902736861, "learning_rate": 1.6582319545823197e-05, "loss": 0.5054, "step": 24028 }, { "epoch": 0.7015561589442644, "grad_norm": 0.5165003252877045, "learning_rate": 1.6580697485806975e-05, "loss": 0.6036, "step": 24029 }, { "epoch": 0.7015853551721117, "grad_norm": 0.5240294952340411, "learning_rate": 1.6579075425790754e-05, "loss": 0.6398, "step": 24030 }, { "epoch": 0.7016145513999591, "grad_norm": 0.5856030512954462, "learning_rate": 1.6577453365774536e-05, "loss": 0.6253, "step": 24031 }, { "epoch": 0.7016437476278065, "grad_norm": 0.5046769480393535, "learning_rate": 1.6575831305758314e-05, "loss": 0.558, "step": 24032 }, { "epoch": 0.7016729438556538, "grad_norm": 0.48802723027607087, "learning_rate": 1.6574209245742092e-05, "loss": 0.5111, "step": 24033 }, { "epoch": 0.7017021400835012, "grad_norm": 0.5261160819731284, "learning_rate": 1.657258718572587e-05, "loss": 0.5927, "step": 24034 }, { "epoch": 0.7017313363113485, "grad_norm": 0.5040371830832382, "learning_rate": 1.6570965125709653e-05, "loss": 0.5717, "step": 24035 }, { "epoch": 0.7017605325391959, "grad_norm": 0.5284561132468284, "learning_rate": 1.656934306569343e-05, "loss": 0.6163, "step": 24036 }, { "epoch": 0.7017897287670433, "grad_norm": 0.5135517397274595, "learning_rate": 1.6567721005677213e-05, "loss": 0.5489, "step": 24037 }, { "epoch": 0.7018189249948906, "grad_norm": 0.5499383728124043, "learning_rate": 1.656609894566099e-05, "loss": 0.6543, "step": 24038 }, { "epoch": 0.701848121222738, "grad_norm": 0.5195612621043249, "learning_rate": 1.656447688564477e-05, "loss": 0.5672, "step": 24039 }, { "epoch": 0.7018773174505853, "grad_norm": 0.5077087213222783, "learning_rate": 1.6562854825628548e-05, "loss": 0.5988, "step": 24040 }, { "epoch": 0.7019065136784327, "grad_norm": 0.5338350943737923, "learning_rate": 1.6561232765612326e-05, "loss": 0.6301, "step": 24041 }, { "epoch": 0.7019357099062801, "grad_norm": 0.5895731044354874, "learning_rate": 1.6559610705596108e-05, "loss": 0.6885, "step": 24042 }, { "epoch": 0.7019649061341274, "grad_norm": 0.554878909793202, "learning_rate": 1.6557988645579886e-05, "loss": 0.6775, "step": 24043 }, { "epoch": 0.7019941023619748, "grad_norm": 0.49367584600820097, "learning_rate": 1.6556366585563668e-05, "loss": 0.5576, "step": 24044 }, { "epoch": 0.7020232985898222, "grad_norm": 0.4955929063538645, "learning_rate": 1.6554744525547446e-05, "loss": 0.5268, "step": 24045 }, { "epoch": 0.7020524948176695, "grad_norm": 0.5676949467281511, "learning_rate": 1.6553122465531225e-05, "loss": 0.6769, "step": 24046 }, { "epoch": 0.7020816910455169, "grad_norm": 0.5543164333885682, "learning_rate": 1.6551500405515007e-05, "loss": 0.6923, "step": 24047 }, { "epoch": 0.7021108872733642, "grad_norm": 0.5035599098951582, "learning_rate": 1.6549878345498785e-05, "loss": 0.5595, "step": 24048 }, { "epoch": 0.7021400835012116, "grad_norm": 0.5004849592714682, "learning_rate": 1.6548256285482563e-05, "loss": 0.5461, "step": 24049 }, { "epoch": 0.702169279729059, "grad_norm": 0.5020510305245294, "learning_rate": 1.654663422546634e-05, "loss": 0.5658, "step": 24050 }, { "epoch": 0.7021984759569063, "grad_norm": 0.5280966567733671, "learning_rate": 1.654501216545012e-05, "loss": 0.6294, "step": 24051 }, { "epoch": 0.7022276721847537, "grad_norm": 0.5036976408192024, "learning_rate": 1.6543390105433902e-05, "loss": 0.5392, "step": 24052 }, { "epoch": 0.702256868412601, "grad_norm": 0.4995130965293663, "learning_rate": 1.654176804541768e-05, "loss": 0.5566, "step": 24053 }, { "epoch": 0.7022860646404484, "grad_norm": 0.5583757459305043, "learning_rate": 1.6540145985401462e-05, "loss": 0.6263, "step": 24054 }, { "epoch": 0.7023152608682958, "grad_norm": 0.5299604810000137, "learning_rate": 1.653852392538524e-05, "loss": 0.6157, "step": 24055 }, { "epoch": 0.7023444570961431, "grad_norm": 0.5195214729047086, "learning_rate": 1.653690186536902e-05, "loss": 0.6234, "step": 24056 }, { "epoch": 0.7023736533239905, "grad_norm": 0.5010850476407385, "learning_rate": 1.65352798053528e-05, "loss": 0.5389, "step": 24057 }, { "epoch": 0.7024028495518378, "grad_norm": 0.48971099878840124, "learning_rate": 1.653365774533658e-05, "loss": 0.5438, "step": 24058 }, { "epoch": 0.7024320457796852, "grad_norm": 0.48143944423484386, "learning_rate": 1.6532035685320357e-05, "loss": 0.5317, "step": 24059 }, { "epoch": 0.7024612420075326, "grad_norm": 0.4862064270571959, "learning_rate": 1.6530413625304136e-05, "loss": 0.5128, "step": 24060 }, { "epoch": 0.70249043823538, "grad_norm": 0.5763922333803357, "learning_rate": 1.6528791565287914e-05, "loss": 0.6698, "step": 24061 }, { "epoch": 0.7025196344632274, "grad_norm": 0.48735309031173557, "learning_rate": 1.6527169505271696e-05, "loss": 0.5423, "step": 24062 }, { "epoch": 0.7025488306910748, "grad_norm": 0.580678495298621, "learning_rate": 1.6525547445255477e-05, "loss": 0.6716, "step": 24063 }, { "epoch": 0.7025780269189221, "grad_norm": 0.5418307015392326, "learning_rate": 1.6523925385239256e-05, "loss": 0.655, "step": 24064 }, { "epoch": 0.7026072231467695, "grad_norm": 0.47586565115132323, "learning_rate": 1.6522303325223034e-05, "loss": 0.5273, "step": 24065 }, { "epoch": 0.7026364193746168, "grad_norm": 0.5327889224370905, "learning_rate": 1.6520681265206813e-05, "loss": 0.5788, "step": 24066 }, { "epoch": 0.7026656156024642, "grad_norm": 0.5397755439629782, "learning_rate": 1.6519059205190594e-05, "loss": 0.5981, "step": 24067 }, { "epoch": 0.7026948118303116, "grad_norm": 0.5081776342299733, "learning_rate": 1.6517437145174373e-05, "loss": 0.5601, "step": 24068 }, { "epoch": 0.7027240080581589, "grad_norm": 0.5532269722660754, "learning_rate": 1.651581508515815e-05, "loss": 0.61, "step": 24069 }, { "epoch": 0.7027532042860063, "grad_norm": 0.5146727484282967, "learning_rate": 1.651419302514193e-05, "loss": 0.5672, "step": 24070 }, { "epoch": 0.7027824005138537, "grad_norm": 0.5748503357240248, "learning_rate": 1.6512570965125708e-05, "loss": 0.6679, "step": 24071 }, { "epoch": 0.702811596741701, "grad_norm": 0.513460812954566, "learning_rate": 1.651094890510949e-05, "loss": 0.5812, "step": 24072 }, { "epoch": 0.7028407929695484, "grad_norm": 0.509299927932304, "learning_rate": 1.650932684509327e-05, "loss": 0.5718, "step": 24073 }, { "epoch": 0.7028699891973957, "grad_norm": 0.49875465418557513, "learning_rate": 1.650770478507705e-05, "loss": 0.5676, "step": 24074 }, { "epoch": 0.7028991854252431, "grad_norm": 0.5559327304767241, "learning_rate": 1.6506082725060828e-05, "loss": 0.689, "step": 24075 }, { "epoch": 0.7029283816530905, "grad_norm": 0.5041136775574696, "learning_rate": 1.6504460665044607e-05, "loss": 0.5359, "step": 24076 }, { "epoch": 0.7029575778809378, "grad_norm": 0.5521720902537083, "learning_rate": 1.6502838605028388e-05, "loss": 0.6632, "step": 24077 }, { "epoch": 0.7029867741087852, "grad_norm": 0.5002038712064322, "learning_rate": 1.6501216545012167e-05, "loss": 0.5765, "step": 24078 }, { "epoch": 0.7030159703366325, "grad_norm": 0.48092594497725344, "learning_rate": 1.6499594484995945e-05, "loss": 0.5185, "step": 24079 }, { "epoch": 0.7030451665644799, "grad_norm": 0.4994857473388057, "learning_rate": 1.6497972424979723e-05, "loss": 0.569, "step": 24080 }, { "epoch": 0.7030743627923273, "grad_norm": 0.6919659379765895, "learning_rate": 1.6496350364963502e-05, "loss": 0.626, "step": 24081 }, { "epoch": 0.7031035590201746, "grad_norm": 0.5197214829887952, "learning_rate": 1.6494728304947284e-05, "loss": 0.5851, "step": 24082 }, { "epoch": 0.703132755248022, "grad_norm": 0.5030225164667893, "learning_rate": 1.6493106244931065e-05, "loss": 0.5873, "step": 24083 }, { "epoch": 0.7031619514758694, "grad_norm": 0.5210224831828907, "learning_rate": 1.6491484184914844e-05, "loss": 0.5802, "step": 24084 }, { "epoch": 0.7031911477037167, "grad_norm": 0.4954753457040405, "learning_rate": 1.6489862124898622e-05, "loss": 0.5386, "step": 24085 }, { "epoch": 0.7032203439315641, "grad_norm": 0.5469471596092883, "learning_rate": 1.64882400648824e-05, "loss": 0.6059, "step": 24086 }, { "epoch": 0.7032495401594114, "grad_norm": 0.5287187517324664, "learning_rate": 1.6486618004866182e-05, "loss": 0.5376, "step": 24087 }, { "epoch": 0.7032787363872588, "grad_norm": 0.5220326671344651, "learning_rate": 1.648499594484996e-05, "loss": 0.5037, "step": 24088 }, { "epoch": 0.7033079326151062, "grad_norm": 0.5427604661323038, "learning_rate": 1.648337388483374e-05, "loss": 0.6326, "step": 24089 }, { "epoch": 0.7033371288429535, "grad_norm": 0.6075929030467775, "learning_rate": 1.6481751824817517e-05, "loss": 0.6763, "step": 24090 }, { "epoch": 0.7033663250708009, "grad_norm": 0.554061414270591, "learning_rate": 1.64801297648013e-05, "loss": 0.6641, "step": 24091 }, { "epoch": 0.7033955212986482, "grad_norm": 0.5200819487989886, "learning_rate": 1.6478507704785077e-05, "loss": 0.6175, "step": 24092 }, { "epoch": 0.7034247175264956, "grad_norm": 0.5173415501752573, "learning_rate": 1.647688564476886e-05, "loss": 0.5751, "step": 24093 }, { "epoch": 0.703453913754343, "grad_norm": 0.5125036666186061, "learning_rate": 1.6475263584752638e-05, "loss": 0.5922, "step": 24094 }, { "epoch": 0.7034831099821903, "grad_norm": 0.5036536911516026, "learning_rate": 1.6473641524736416e-05, "loss": 0.5891, "step": 24095 }, { "epoch": 0.7035123062100377, "grad_norm": 0.4993112505173212, "learning_rate": 1.6472019464720194e-05, "loss": 0.5749, "step": 24096 }, { "epoch": 0.703541502437885, "grad_norm": 0.5325404276158936, "learning_rate": 1.6470397404703976e-05, "loss": 0.5801, "step": 24097 }, { "epoch": 0.7035706986657324, "grad_norm": 0.5080590058110469, "learning_rate": 1.6468775344687754e-05, "loss": 0.5492, "step": 24098 }, { "epoch": 0.7035998948935798, "grad_norm": 0.5478240123648299, "learning_rate": 1.6467153284671533e-05, "loss": 0.6471, "step": 24099 }, { "epoch": 0.7036290911214271, "grad_norm": 0.5352366867850724, "learning_rate": 1.646553122465531e-05, "loss": 0.6108, "step": 24100 }, { "epoch": 0.7036582873492745, "grad_norm": 0.5532047581947896, "learning_rate": 1.6463909164639093e-05, "loss": 0.6575, "step": 24101 }, { "epoch": 0.7036874835771219, "grad_norm": 0.49874510010461126, "learning_rate": 1.646228710462287e-05, "loss": 0.5235, "step": 24102 }, { "epoch": 0.7037166798049692, "grad_norm": 0.570550104636904, "learning_rate": 1.6460665044606653e-05, "loss": 0.7316, "step": 24103 }, { "epoch": 0.7037458760328166, "grad_norm": 0.5092447042618167, "learning_rate": 1.645904298459043e-05, "loss": 0.6049, "step": 24104 }, { "epoch": 0.7037750722606639, "grad_norm": 0.5333840880924832, "learning_rate": 1.645742092457421e-05, "loss": 0.6139, "step": 24105 }, { "epoch": 0.7038042684885113, "grad_norm": 0.5070327177300908, "learning_rate": 1.6455798864557988e-05, "loss": 0.5687, "step": 24106 }, { "epoch": 0.7038334647163587, "grad_norm": 0.5070455169230911, "learning_rate": 1.6454176804541767e-05, "loss": 0.5609, "step": 24107 }, { "epoch": 0.703862660944206, "grad_norm": 0.5383131630214394, "learning_rate": 1.645255474452555e-05, "loss": 0.6326, "step": 24108 }, { "epoch": 0.7038918571720534, "grad_norm": 0.5528901784788245, "learning_rate": 1.6450932684509327e-05, "loss": 0.6632, "step": 24109 }, { "epoch": 0.7039210533999007, "grad_norm": 0.5360308969398351, "learning_rate": 1.644931062449311e-05, "loss": 0.6085, "step": 24110 }, { "epoch": 0.7039502496277481, "grad_norm": 0.5245580831946327, "learning_rate": 1.6447688564476887e-05, "loss": 0.6281, "step": 24111 }, { "epoch": 0.7039794458555955, "grad_norm": 0.5441839550120395, "learning_rate": 1.6446066504460665e-05, "loss": 0.6483, "step": 24112 }, { "epoch": 0.7040086420834428, "grad_norm": 0.5479820097792965, "learning_rate": 1.6444444444444447e-05, "loss": 0.6634, "step": 24113 }, { "epoch": 0.7040378383112902, "grad_norm": 0.5568313774115599, "learning_rate": 1.6442822384428225e-05, "loss": 0.5799, "step": 24114 }, { "epoch": 0.7040670345391375, "grad_norm": 0.5604460082070939, "learning_rate": 1.6441200324412004e-05, "loss": 0.6586, "step": 24115 }, { "epoch": 0.7040962307669849, "grad_norm": 0.48275643538177043, "learning_rate": 1.6439578264395782e-05, "loss": 0.4803, "step": 24116 }, { "epoch": 0.7041254269948323, "grad_norm": 0.4632378539195939, "learning_rate": 1.643795620437956e-05, "loss": 0.485, "step": 24117 }, { "epoch": 0.7041546232226796, "grad_norm": 0.5112921438796428, "learning_rate": 1.6436334144363342e-05, "loss": 0.6231, "step": 24118 }, { "epoch": 0.704183819450527, "grad_norm": 0.5303725725862294, "learning_rate": 1.643471208434712e-05, "loss": 0.6164, "step": 24119 }, { "epoch": 0.7042130156783744, "grad_norm": 0.5093724540421068, "learning_rate": 1.6433090024330902e-05, "loss": 0.529, "step": 24120 }, { "epoch": 0.7042422119062217, "grad_norm": 0.5095724753309154, "learning_rate": 1.643146796431468e-05, "loss": 0.5693, "step": 24121 }, { "epoch": 0.7042714081340691, "grad_norm": 0.46536032660719334, "learning_rate": 1.642984590429846e-05, "loss": 0.5008, "step": 24122 }, { "epoch": 0.7043006043619164, "grad_norm": 0.5422438679952946, "learning_rate": 1.642822384428224e-05, "loss": 0.5998, "step": 24123 }, { "epoch": 0.7043298005897638, "grad_norm": 0.5121221306913102, "learning_rate": 1.642660178426602e-05, "loss": 0.5942, "step": 24124 }, { "epoch": 0.7043589968176112, "grad_norm": 0.5267930303605562, "learning_rate": 1.6424979724249798e-05, "loss": 0.5752, "step": 24125 }, { "epoch": 0.7043881930454585, "grad_norm": 0.5352624572090675, "learning_rate": 1.6423357664233576e-05, "loss": 0.6079, "step": 24126 }, { "epoch": 0.7044173892733059, "grad_norm": 0.5060791238776812, "learning_rate": 1.6421735604217354e-05, "loss": 0.5423, "step": 24127 }, { "epoch": 0.7044465855011532, "grad_norm": 0.5502748683033317, "learning_rate": 1.6420113544201136e-05, "loss": 0.6358, "step": 24128 }, { "epoch": 0.7044757817290006, "grad_norm": 0.49669199369031675, "learning_rate": 1.6418491484184918e-05, "loss": 0.563, "step": 24129 }, { "epoch": 0.704504977956848, "grad_norm": 0.5330323129156962, "learning_rate": 1.6416869424168696e-05, "loss": 0.6371, "step": 24130 }, { "epoch": 0.7045341741846953, "grad_norm": 0.48767104020446117, "learning_rate": 1.6415247364152475e-05, "loss": 0.5301, "step": 24131 }, { "epoch": 0.7045633704125427, "grad_norm": 0.5605759342896137, "learning_rate": 1.6413625304136253e-05, "loss": 0.6693, "step": 24132 }, { "epoch": 0.70459256664039, "grad_norm": 0.5159281135164816, "learning_rate": 1.6412003244120035e-05, "loss": 0.5749, "step": 24133 }, { "epoch": 0.7046217628682374, "grad_norm": 0.5411611322568274, "learning_rate": 1.6410381184103813e-05, "loss": 0.6253, "step": 24134 }, { "epoch": 0.7046509590960848, "grad_norm": 0.5360366194674272, "learning_rate": 1.640875912408759e-05, "loss": 0.564, "step": 24135 }, { "epoch": 0.7046801553239321, "grad_norm": 0.5560183086791639, "learning_rate": 1.640713706407137e-05, "loss": 0.6869, "step": 24136 }, { "epoch": 0.7047093515517795, "grad_norm": 0.5210449824776899, "learning_rate": 1.6405515004055148e-05, "loss": 0.5961, "step": 24137 }, { "epoch": 0.7047385477796269, "grad_norm": 0.5026998324715537, "learning_rate": 1.640389294403893e-05, "loss": 0.5752, "step": 24138 }, { "epoch": 0.7047677440074742, "grad_norm": 0.47397623390910576, "learning_rate": 1.6402270884022712e-05, "loss": 0.5069, "step": 24139 }, { "epoch": 0.7047969402353216, "grad_norm": 0.5387289702982511, "learning_rate": 1.640064882400649e-05, "loss": 0.6562, "step": 24140 }, { "epoch": 0.7048261364631689, "grad_norm": 0.5313870129589623, "learning_rate": 1.639902676399027e-05, "loss": 0.641, "step": 24141 }, { "epoch": 0.7048553326910163, "grad_norm": 0.5316857011162004, "learning_rate": 1.6397404703974047e-05, "loss": 0.6386, "step": 24142 }, { "epoch": 0.7048845289188637, "grad_norm": 0.5076491734313096, "learning_rate": 1.639578264395783e-05, "loss": 0.582, "step": 24143 }, { "epoch": 0.704913725146711, "grad_norm": 0.5547468005940766, "learning_rate": 1.6394160583941607e-05, "loss": 0.5877, "step": 24144 }, { "epoch": 0.7049429213745584, "grad_norm": 0.5015799807862715, "learning_rate": 1.6392538523925385e-05, "loss": 0.5354, "step": 24145 }, { "epoch": 0.7049721176024057, "grad_norm": 0.5388716924976991, "learning_rate": 1.6390916463909164e-05, "loss": 0.6582, "step": 24146 }, { "epoch": 0.7050013138302531, "grad_norm": 0.5046390476050259, "learning_rate": 1.6389294403892942e-05, "loss": 0.5559, "step": 24147 }, { "epoch": 0.7050305100581005, "grad_norm": 0.528954723962246, "learning_rate": 1.6387672343876724e-05, "loss": 0.6353, "step": 24148 }, { "epoch": 0.7050597062859478, "grad_norm": 0.5176704482999158, "learning_rate": 1.6386050283860506e-05, "loss": 0.6248, "step": 24149 }, { "epoch": 0.7050889025137952, "grad_norm": 0.5238746498097555, "learning_rate": 1.6384428223844284e-05, "loss": 0.5963, "step": 24150 }, { "epoch": 0.7051180987416426, "grad_norm": 0.5373881732998663, "learning_rate": 1.6382806163828062e-05, "loss": 0.6299, "step": 24151 }, { "epoch": 0.7051472949694899, "grad_norm": 0.5566791959876759, "learning_rate": 1.638118410381184e-05, "loss": 0.6111, "step": 24152 }, { "epoch": 0.7051764911973373, "grad_norm": 0.5533563863476568, "learning_rate": 1.6379562043795623e-05, "loss": 0.6355, "step": 24153 }, { "epoch": 0.7052056874251846, "grad_norm": 0.5184131270729677, "learning_rate": 1.63779399837794e-05, "loss": 0.5984, "step": 24154 }, { "epoch": 0.705234883653032, "grad_norm": 0.5385991514273957, "learning_rate": 1.637631792376318e-05, "loss": 0.6084, "step": 24155 }, { "epoch": 0.7052640798808794, "grad_norm": 0.5079695098127045, "learning_rate": 1.6374695863746958e-05, "loss": 0.5418, "step": 24156 }, { "epoch": 0.7052932761087267, "grad_norm": 0.5133473402869206, "learning_rate": 1.637307380373074e-05, "loss": 0.5318, "step": 24157 }, { "epoch": 0.7053224723365741, "grad_norm": 0.5133072639517839, "learning_rate": 1.6371451743714518e-05, "loss": 0.6041, "step": 24158 }, { "epoch": 0.7053516685644214, "grad_norm": 0.4967911222998051, "learning_rate": 1.63698296836983e-05, "loss": 0.5481, "step": 24159 }, { "epoch": 0.7053808647922688, "grad_norm": 0.5262415389271001, "learning_rate": 1.6368207623682078e-05, "loss": 0.5601, "step": 24160 }, { "epoch": 0.7054100610201162, "grad_norm": 0.5680338472939054, "learning_rate": 1.6366585563665856e-05, "loss": 0.6818, "step": 24161 }, { "epoch": 0.7054392572479635, "grad_norm": 0.5175210828789691, "learning_rate": 1.6364963503649635e-05, "loss": 0.6161, "step": 24162 }, { "epoch": 0.7054684534758109, "grad_norm": 0.4939339374209092, "learning_rate": 1.6363341443633413e-05, "loss": 0.5588, "step": 24163 }, { "epoch": 0.7054976497036582, "grad_norm": 0.5099231098186295, "learning_rate": 1.6361719383617195e-05, "loss": 0.5591, "step": 24164 }, { "epoch": 0.7055268459315056, "grad_norm": 0.5386630754631856, "learning_rate": 1.6360097323600973e-05, "loss": 0.6384, "step": 24165 }, { "epoch": 0.705556042159353, "grad_norm": 0.5245633887322988, "learning_rate": 1.635847526358475e-05, "loss": 0.5945, "step": 24166 }, { "epoch": 0.7055852383872003, "grad_norm": 0.680124916225357, "learning_rate": 1.6356853203568533e-05, "loss": 0.6543, "step": 24167 }, { "epoch": 0.7056144346150477, "grad_norm": 0.5030624748897597, "learning_rate": 1.6355231143552312e-05, "loss": 0.5633, "step": 24168 }, { "epoch": 0.705643630842895, "grad_norm": 0.519247053210684, "learning_rate": 1.6353609083536094e-05, "loss": 0.5799, "step": 24169 }, { "epoch": 0.7056728270707424, "grad_norm": 0.518153963442196, "learning_rate": 1.6351987023519872e-05, "loss": 0.5894, "step": 24170 }, { "epoch": 0.7057020232985898, "grad_norm": 0.5459892422439371, "learning_rate": 1.635036496350365e-05, "loss": 0.5972, "step": 24171 }, { "epoch": 0.7057312195264371, "grad_norm": 0.5271142991636497, "learning_rate": 1.634874290348743e-05, "loss": 0.5835, "step": 24172 }, { "epoch": 0.7057604157542845, "grad_norm": 0.5031887000766213, "learning_rate": 1.6347120843471207e-05, "loss": 0.586, "step": 24173 }, { "epoch": 0.7057896119821319, "grad_norm": 0.4903109396992807, "learning_rate": 1.634549878345499e-05, "loss": 0.5272, "step": 24174 }, { "epoch": 0.7058188082099792, "grad_norm": 0.5537052093319994, "learning_rate": 1.6343876723438767e-05, "loss": 0.669, "step": 24175 }, { "epoch": 0.7058480044378266, "grad_norm": 0.5580589291578151, "learning_rate": 1.634225466342255e-05, "loss": 0.638, "step": 24176 }, { "epoch": 0.7058772006656739, "grad_norm": 0.5563778275441896, "learning_rate": 1.6340632603406327e-05, "loss": 0.6624, "step": 24177 }, { "epoch": 0.7059063968935213, "grad_norm": 0.5073363330250727, "learning_rate": 1.6339010543390106e-05, "loss": 0.5704, "step": 24178 }, { "epoch": 0.7059355931213687, "grad_norm": 0.5328583690700782, "learning_rate": 1.6337388483373887e-05, "loss": 0.6234, "step": 24179 }, { "epoch": 0.705964789349216, "grad_norm": 0.5353993473248995, "learning_rate": 1.6335766423357666e-05, "loss": 0.6361, "step": 24180 }, { "epoch": 0.7059939855770635, "grad_norm": 0.533678958115822, "learning_rate": 1.6334144363341444e-05, "loss": 0.6131, "step": 24181 }, { "epoch": 0.7060231818049109, "grad_norm": 0.52472821665018, "learning_rate": 1.6332522303325223e-05, "loss": 0.595, "step": 24182 }, { "epoch": 0.7060523780327582, "grad_norm": 0.5699720869280183, "learning_rate": 1.6330900243309e-05, "loss": 0.6813, "step": 24183 }, { "epoch": 0.7060815742606056, "grad_norm": 0.5286022129048366, "learning_rate": 1.6329278183292783e-05, "loss": 0.6253, "step": 24184 }, { "epoch": 0.7061107704884529, "grad_norm": 0.5323687893039617, "learning_rate": 1.632765612327656e-05, "loss": 0.6191, "step": 24185 }, { "epoch": 0.7061399667163003, "grad_norm": 0.5331445015405897, "learning_rate": 1.6326034063260343e-05, "loss": 0.5934, "step": 24186 }, { "epoch": 0.7061691629441477, "grad_norm": 0.5040464909831553, "learning_rate": 1.632441200324412e-05, "loss": 0.5878, "step": 24187 }, { "epoch": 0.706198359171995, "grad_norm": 0.5544895828645632, "learning_rate": 1.63227899432279e-05, "loss": 0.5935, "step": 24188 }, { "epoch": 0.7062275553998424, "grad_norm": 0.5002770152862198, "learning_rate": 1.632116788321168e-05, "loss": 0.5493, "step": 24189 }, { "epoch": 0.7062567516276897, "grad_norm": 0.551906793568328, "learning_rate": 1.631954582319546e-05, "loss": 0.6741, "step": 24190 }, { "epoch": 0.7062859478555371, "grad_norm": 0.5293028344810655, "learning_rate": 1.6317923763179238e-05, "loss": 0.6099, "step": 24191 }, { "epoch": 0.7063151440833845, "grad_norm": 0.5220030191905272, "learning_rate": 1.6316301703163016e-05, "loss": 0.6326, "step": 24192 }, { "epoch": 0.7063443403112318, "grad_norm": 0.5117891804814335, "learning_rate": 1.6314679643146795e-05, "loss": 0.583, "step": 24193 }, { "epoch": 0.7063735365390792, "grad_norm": 0.4993023524702967, "learning_rate": 1.6313057583130577e-05, "loss": 0.5918, "step": 24194 }, { "epoch": 0.7064027327669266, "grad_norm": 0.5263078127625946, "learning_rate": 1.631143552311436e-05, "loss": 0.5684, "step": 24195 }, { "epoch": 0.7064319289947739, "grad_norm": 0.5362572699804949, "learning_rate": 1.6309813463098137e-05, "loss": 0.6194, "step": 24196 }, { "epoch": 0.7064611252226213, "grad_norm": 0.5007688342557215, "learning_rate": 1.6308191403081915e-05, "loss": 0.5631, "step": 24197 }, { "epoch": 0.7064903214504686, "grad_norm": 0.5224531609715292, "learning_rate": 1.6306569343065693e-05, "loss": 0.6147, "step": 24198 }, { "epoch": 0.706519517678316, "grad_norm": 0.5710676267011654, "learning_rate": 1.6304947283049475e-05, "loss": 0.6406, "step": 24199 }, { "epoch": 0.7065487139061634, "grad_norm": 0.5538499002669617, "learning_rate": 1.6303325223033254e-05, "loss": 0.6582, "step": 24200 }, { "epoch": 0.7065779101340107, "grad_norm": 0.5018005495089533, "learning_rate": 1.6301703163017032e-05, "loss": 0.5525, "step": 24201 }, { "epoch": 0.7066071063618581, "grad_norm": 0.5204445716724698, "learning_rate": 1.630008110300081e-05, "loss": 0.6191, "step": 24202 }, { "epoch": 0.7066363025897054, "grad_norm": 0.5444468951278654, "learning_rate": 1.629845904298459e-05, "loss": 0.669, "step": 24203 }, { "epoch": 0.7066654988175528, "grad_norm": 0.5051411974438033, "learning_rate": 1.629683698296837e-05, "loss": 0.5388, "step": 24204 }, { "epoch": 0.7066946950454002, "grad_norm": 0.5037774244831128, "learning_rate": 1.6295214922952152e-05, "loss": 0.5651, "step": 24205 }, { "epoch": 0.7067238912732475, "grad_norm": 0.5456587834181508, "learning_rate": 1.629359286293593e-05, "loss": 0.6255, "step": 24206 }, { "epoch": 0.7067530875010949, "grad_norm": 0.5177147435604121, "learning_rate": 1.629197080291971e-05, "loss": 0.6129, "step": 24207 }, { "epoch": 0.7067822837289423, "grad_norm": 0.5108634019515307, "learning_rate": 1.6290348742903487e-05, "loss": 0.5908, "step": 24208 }, { "epoch": 0.7068114799567896, "grad_norm": 0.5339216268189506, "learning_rate": 1.628872668288727e-05, "loss": 0.5786, "step": 24209 }, { "epoch": 0.706840676184637, "grad_norm": 0.5483160457344453, "learning_rate": 1.6287104622871048e-05, "loss": 0.6708, "step": 24210 }, { "epoch": 0.7068698724124843, "grad_norm": 0.5212944255022137, "learning_rate": 1.6285482562854826e-05, "loss": 0.6309, "step": 24211 }, { "epoch": 0.7068990686403317, "grad_norm": 0.5055823697893422, "learning_rate": 1.6283860502838604e-05, "loss": 0.5388, "step": 24212 }, { "epoch": 0.7069282648681791, "grad_norm": 0.5596240215387228, "learning_rate": 1.6282238442822383e-05, "loss": 0.6839, "step": 24213 }, { "epoch": 0.7069574610960264, "grad_norm": 0.5332909759240724, "learning_rate": 1.6280616382806164e-05, "loss": 0.6393, "step": 24214 }, { "epoch": 0.7069866573238738, "grad_norm": 0.5107011443193952, "learning_rate": 1.6278994322789946e-05, "loss": 0.5844, "step": 24215 }, { "epoch": 0.7070158535517211, "grad_norm": 0.5579038425277846, "learning_rate": 1.6277372262773725e-05, "loss": 0.6088, "step": 24216 }, { "epoch": 0.7070450497795685, "grad_norm": 0.5417186199935202, "learning_rate": 1.6275750202757503e-05, "loss": 0.6032, "step": 24217 }, { "epoch": 0.7070742460074159, "grad_norm": 0.5725992870638733, "learning_rate": 1.627412814274128e-05, "loss": 0.6863, "step": 24218 }, { "epoch": 0.7071034422352632, "grad_norm": 0.5177576947537559, "learning_rate": 1.6272506082725063e-05, "loss": 0.5946, "step": 24219 }, { "epoch": 0.7071326384631106, "grad_norm": 0.5350618790341016, "learning_rate": 1.627088402270884e-05, "loss": 0.6017, "step": 24220 }, { "epoch": 0.707161834690958, "grad_norm": 0.5822633436009609, "learning_rate": 1.626926196269262e-05, "loss": 0.707, "step": 24221 }, { "epoch": 0.7071910309188053, "grad_norm": 0.534553447200677, "learning_rate": 1.6267639902676398e-05, "loss": 0.6303, "step": 24222 }, { "epoch": 0.7072202271466527, "grad_norm": 0.4683224949401128, "learning_rate": 1.626601784266018e-05, "loss": 0.4758, "step": 24223 }, { "epoch": 0.7072494233745, "grad_norm": 0.4904254291249534, "learning_rate": 1.626439578264396e-05, "loss": 0.5297, "step": 24224 }, { "epoch": 0.7072786196023474, "grad_norm": 0.5610842112319121, "learning_rate": 1.626277372262774e-05, "loss": 0.653, "step": 24225 }, { "epoch": 0.7073078158301948, "grad_norm": 0.5000907854744391, "learning_rate": 1.626115166261152e-05, "loss": 0.557, "step": 24226 }, { "epoch": 0.7073370120580421, "grad_norm": 0.5313193727524352, "learning_rate": 1.6259529602595297e-05, "loss": 0.5609, "step": 24227 }, { "epoch": 0.7073662082858895, "grad_norm": 0.5332721916096284, "learning_rate": 1.6257907542579075e-05, "loss": 0.6641, "step": 24228 }, { "epoch": 0.7073954045137368, "grad_norm": 0.5243050704614195, "learning_rate": 1.6256285482562854e-05, "loss": 0.5509, "step": 24229 }, { "epoch": 0.7074246007415842, "grad_norm": 0.5141208062131594, "learning_rate": 1.6254663422546635e-05, "loss": 0.5783, "step": 24230 }, { "epoch": 0.7074537969694316, "grad_norm": 0.5435669007180464, "learning_rate": 1.6253041362530414e-05, "loss": 0.6103, "step": 24231 }, { "epoch": 0.7074829931972789, "grad_norm": 0.5572080188673448, "learning_rate": 1.6251419302514192e-05, "loss": 0.7026, "step": 24232 }, { "epoch": 0.7075121894251263, "grad_norm": 0.537206738278739, "learning_rate": 1.6249797242497974e-05, "loss": 0.6122, "step": 24233 }, { "epoch": 0.7075413856529736, "grad_norm": 0.48020748775136884, "learning_rate": 1.6248175182481752e-05, "loss": 0.5064, "step": 24234 }, { "epoch": 0.707570581880821, "grad_norm": 0.5753771126687721, "learning_rate": 1.6246553122465534e-05, "loss": 0.6965, "step": 24235 }, { "epoch": 0.7075997781086684, "grad_norm": 0.5265510219904859, "learning_rate": 1.6244931062449312e-05, "loss": 0.566, "step": 24236 }, { "epoch": 0.7076289743365157, "grad_norm": 0.5514957793630664, "learning_rate": 1.624330900243309e-05, "loss": 0.6341, "step": 24237 }, { "epoch": 0.7076581705643631, "grad_norm": 0.5311211839416999, "learning_rate": 1.624168694241687e-05, "loss": 0.5715, "step": 24238 }, { "epoch": 0.7076873667922104, "grad_norm": 0.5129110328844956, "learning_rate": 1.6240064882400647e-05, "loss": 0.5432, "step": 24239 }, { "epoch": 0.7077165630200578, "grad_norm": 0.49487351709366645, "learning_rate": 1.623844282238443e-05, "loss": 0.5496, "step": 24240 }, { "epoch": 0.7077457592479052, "grad_norm": 0.5217989685078279, "learning_rate": 1.6236820762368208e-05, "loss": 0.6417, "step": 24241 }, { "epoch": 0.7077749554757525, "grad_norm": 0.5096633415142552, "learning_rate": 1.623519870235199e-05, "loss": 0.5785, "step": 24242 }, { "epoch": 0.7078041517035999, "grad_norm": 0.5184282373323047, "learning_rate": 1.6233576642335768e-05, "loss": 0.5602, "step": 24243 }, { "epoch": 0.7078333479314473, "grad_norm": 0.47713851463062185, "learning_rate": 1.6231954582319546e-05, "loss": 0.5283, "step": 24244 }, { "epoch": 0.7078625441592946, "grad_norm": 0.5158860936215349, "learning_rate": 1.6230332522303328e-05, "loss": 0.5962, "step": 24245 }, { "epoch": 0.707891740387142, "grad_norm": 0.4981134363310074, "learning_rate": 1.6228710462287106e-05, "loss": 0.5526, "step": 24246 }, { "epoch": 0.7079209366149893, "grad_norm": 0.5262051970155375, "learning_rate": 1.6227088402270885e-05, "loss": 0.5813, "step": 24247 }, { "epoch": 0.7079501328428367, "grad_norm": 0.5306869106736064, "learning_rate": 1.6225466342254663e-05, "loss": 0.5975, "step": 24248 }, { "epoch": 0.7079793290706841, "grad_norm": 0.4919784362541109, "learning_rate": 1.622384428223844e-05, "loss": 0.515, "step": 24249 }, { "epoch": 0.7080085252985314, "grad_norm": 0.5283701425800569, "learning_rate": 1.6222222222222223e-05, "loss": 0.5732, "step": 24250 }, { "epoch": 0.7080377215263788, "grad_norm": 0.6061183219416515, "learning_rate": 1.6220600162206e-05, "loss": 0.7383, "step": 24251 }, { "epoch": 0.7080669177542261, "grad_norm": 0.5007858866496219, "learning_rate": 1.6218978102189783e-05, "loss": 0.544, "step": 24252 }, { "epoch": 0.7080961139820735, "grad_norm": 0.49678461481853187, "learning_rate": 1.621735604217356e-05, "loss": 0.5643, "step": 24253 }, { "epoch": 0.7081253102099209, "grad_norm": 0.5734748662407967, "learning_rate": 1.621573398215734e-05, "loss": 0.6189, "step": 24254 }, { "epoch": 0.7081545064377682, "grad_norm": 0.5646726393596929, "learning_rate": 1.6214111922141122e-05, "loss": 0.7036, "step": 24255 }, { "epoch": 0.7081837026656156, "grad_norm": 0.5236071249984122, "learning_rate": 1.62124898621249e-05, "loss": 0.5742, "step": 24256 }, { "epoch": 0.708212898893463, "grad_norm": 0.5392961317902016, "learning_rate": 1.621086780210868e-05, "loss": 0.6531, "step": 24257 }, { "epoch": 0.7082420951213103, "grad_norm": 0.5313751232157897, "learning_rate": 1.6209245742092457e-05, "loss": 0.6042, "step": 24258 }, { "epoch": 0.7082712913491577, "grad_norm": 0.49193804736254415, "learning_rate": 1.6207623682076235e-05, "loss": 0.5146, "step": 24259 }, { "epoch": 0.708300487577005, "grad_norm": 0.5148310734468082, "learning_rate": 1.6206001622060017e-05, "loss": 0.55, "step": 24260 }, { "epoch": 0.7083296838048524, "grad_norm": 0.5103689337946906, "learning_rate": 1.62043795620438e-05, "loss": 0.5897, "step": 24261 }, { "epoch": 0.7083588800326998, "grad_norm": 0.49960891801428176, "learning_rate": 1.6202757502027577e-05, "loss": 0.5659, "step": 24262 }, { "epoch": 0.7083880762605471, "grad_norm": 0.5151881557029245, "learning_rate": 1.6201135442011356e-05, "loss": 0.5568, "step": 24263 }, { "epoch": 0.7084172724883945, "grad_norm": 0.5354074844949486, "learning_rate": 1.6199513381995134e-05, "loss": 0.628, "step": 24264 }, { "epoch": 0.7084464687162418, "grad_norm": 0.520997303300862, "learning_rate": 1.6197891321978916e-05, "loss": 0.6191, "step": 24265 }, { "epoch": 0.7084756649440892, "grad_norm": 0.5000903456134512, "learning_rate": 1.6196269261962694e-05, "loss": 0.5894, "step": 24266 }, { "epoch": 0.7085048611719366, "grad_norm": 0.6001076233676329, "learning_rate": 1.6194647201946472e-05, "loss": 0.6465, "step": 24267 }, { "epoch": 0.7085340573997839, "grad_norm": 0.5093982306911492, "learning_rate": 1.619302514193025e-05, "loss": 0.5451, "step": 24268 }, { "epoch": 0.7085632536276313, "grad_norm": 0.5752827981562041, "learning_rate": 1.619140308191403e-05, "loss": 0.5929, "step": 24269 }, { "epoch": 0.7085924498554786, "grad_norm": 0.5486526852664285, "learning_rate": 1.618978102189781e-05, "loss": 0.6321, "step": 24270 }, { "epoch": 0.708621646083326, "grad_norm": 0.4980078949671079, "learning_rate": 1.6188158961881593e-05, "loss": 0.5354, "step": 24271 }, { "epoch": 0.7086508423111734, "grad_norm": 0.5226878807515444, "learning_rate": 1.618653690186537e-05, "loss": 0.6136, "step": 24272 }, { "epoch": 0.7086800385390207, "grad_norm": 0.5007731989059857, "learning_rate": 1.618491484184915e-05, "loss": 0.583, "step": 24273 }, { "epoch": 0.7087092347668681, "grad_norm": 0.5430623875340662, "learning_rate": 1.6183292781832928e-05, "loss": 0.5946, "step": 24274 }, { "epoch": 0.7087384309947155, "grad_norm": 0.5100273842933597, "learning_rate": 1.618167072181671e-05, "loss": 0.5467, "step": 24275 }, { "epoch": 0.7087676272225628, "grad_norm": 0.5489790635095418, "learning_rate": 1.6180048661800488e-05, "loss": 0.5907, "step": 24276 }, { "epoch": 0.7087968234504102, "grad_norm": 0.5612380185276084, "learning_rate": 1.6178426601784266e-05, "loss": 0.6566, "step": 24277 }, { "epoch": 0.7088260196782575, "grad_norm": 0.614238618035755, "learning_rate": 1.6176804541768045e-05, "loss": 0.5686, "step": 24278 }, { "epoch": 0.7088552159061049, "grad_norm": 0.5552163616375313, "learning_rate": 1.6175182481751823e-05, "loss": 0.6775, "step": 24279 }, { "epoch": 0.7088844121339523, "grad_norm": 0.5363958834124961, "learning_rate": 1.6173560421735605e-05, "loss": 0.6267, "step": 24280 }, { "epoch": 0.7089136083617996, "grad_norm": 0.53373629950934, "learning_rate": 1.6171938361719387e-05, "loss": 0.6081, "step": 24281 }, { "epoch": 0.708942804589647, "grad_norm": 0.5118458641836692, "learning_rate": 1.6170316301703165e-05, "loss": 0.5678, "step": 24282 }, { "epoch": 0.7089720008174943, "grad_norm": 0.5089222032716153, "learning_rate": 1.6168694241686943e-05, "loss": 0.5737, "step": 24283 }, { "epoch": 0.7090011970453417, "grad_norm": 0.506911050396377, "learning_rate": 1.6167072181670722e-05, "loss": 0.5497, "step": 24284 }, { "epoch": 0.7090303932731891, "grad_norm": 0.49214156178524154, "learning_rate": 1.61654501216545e-05, "loss": 0.549, "step": 24285 }, { "epoch": 0.7090595895010364, "grad_norm": 0.5430104260561252, "learning_rate": 1.6163828061638282e-05, "loss": 0.6193, "step": 24286 }, { "epoch": 0.7090887857288838, "grad_norm": 0.5078509070576269, "learning_rate": 1.616220600162206e-05, "loss": 0.5837, "step": 24287 }, { "epoch": 0.7091179819567311, "grad_norm": 0.5740344878886776, "learning_rate": 1.616058394160584e-05, "loss": 0.685, "step": 24288 }, { "epoch": 0.7091471781845785, "grad_norm": 0.5103846037116095, "learning_rate": 1.6158961881589617e-05, "loss": 0.573, "step": 24289 }, { "epoch": 0.7091763744124259, "grad_norm": 0.5182654913170287, "learning_rate": 1.61573398215734e-05, "loss": 0.6005, "step": 24290 }, { "epoch": 0.7092055706402732, "grad_norm": 0.5479699243333459, "learning_rate": 1.615571776155718e-05, "loss": 0.6514, "step": 24291 }, { "epoch": 0.7092347668681206, "grad_norm": 0.5222215037344548, "learning_rate": 1.615409570154096e-05, "loss": 0.5626, "step": 24292 }, { "epoch": 0.709263963095968, "grad_norm": 0.5478632470485791, "learning_rate": 1.6152473641524737e-05, "loss": 0.6723, "step": 24293 }, { "epoch": 0.7092931593238153, "grad_norm": 0.5358350473457386, "learning_rate": 1.6150851581508516e-05, "loss": 0.5751, "step": 24294 }, { "epoch": 0.7093223555516627, "grad_norm": 0.5140410632425679, "learning_rate": 1.6149229521492294e-05, "loss": 0.568, "step": 24295 }, { "epoch": 0.70935155177951, "grad_norm": 0.6060380354412593, "learning_rate": 1.6147607461476076e-05, "loss": 0.7485, "step": 24296 }, { "epoch": 0.7093807480073574, "grad_norm": 0.4989542078499432, "learning_rate": 1.6145985401459854e-05, "loss": 0.5446, "step": 24297 }, { "epoch": 0.7094099442352048, "grad_norm": 0.5141212732205795, "learning_rate": 1.6144363341443633e-05, "loss": 0.5764, "step": 24298 }, { "epoch": 0.7094391404630521, "grad_norm": 0.5578701886426801, "learning_rate": 1.6142741281427414e-05, "loss": 0.6096, "step": 24299 }, { "epoch": 0.7094683366908995, "grad_norm": 0.4900926827733887, "learning_rate": 1.6141119221411193e-05, "loss": 0.5522, "step": 24300 }, { "epoch": 0.7094975329187468, "grad_norm": 0.4984161900696081, "learning_rate": 1.6139497161394974e-05, "loss": 0.5377, "step": 24301 }, { "epoch": 0.7095267291465943, "grad_norm": 0.49327366205808, "learning_rate": 1.6137875101378753e-05, "loss": 0.4994, "step": 24302 }, { "epoch": 0.7095559253744417, "grad_norm": 0.5309930285347094, "learning_rate": 1.613625304136253e-05, "loss": 0.6138, "step": 24303 }, { "epoch": 0.709585121602289, "grad_norm": 0.5302077548269198, "learning_rate": 1.613463098134631e-05, "loss": 0.6009, "step": 24304 }, { "epoch": 0.7096143178301364, "grad_norm": 0.5024083494482305, "learning_rate": 1.6133008921330088e-05, "loss": 0.5679, "step": 24305 }, { "epoch": 0.7096435140579838, "grad_norm": 0.5517960724542629, "learning_rate": 1.613138686131387e-05, "loss": 0.605, "step": 24306 }, { "epoch": 0.7096727102858311, "grad_norm": 0.5292914233181244, "learning_rate": 1.6129764801297648e-05, "loss": 0.6122, "step": 24307 }, { "epoch": 0.7097019065136785, "grad_norm": 0.5337021500328878, "learning_rate": 1.612814274128143e-05, "loss": 0.6138, "step": 24308 }, { "epoch": 0.7097311027415258, "grad_norm": 0.5129442660831289, "learning_rate": 1.6126520681265208e-05, "loss": 0.5706, "step": 24309 }, { "epoch": 0.7097602989693732, "grad_norm": 0.5165388577263117, "learning_rate": 1.6124898621248987e-05, "loss": 0.5828, "step": 24310 }, { "epoch": 0.7097894951972206, "grad_norm": 0.5157977702831075, "learning_rate": 1.612327656123277e-05, "loss": 0.5871, "step": 24311 }, { "epoch": 0.7098186914250679, "grad_norm": 0.5272447961538428, "learning_rate": 1.6121654501216547e-05, "loss": 0.5577, "step": 24312 }, { "epoch": 0.7098478876529153, "grad_norm": 0.515254861939596, "learning_rate": 1.6120032441200325e-05, "loss": 0.5723, "step": 24313 }, { "epoch": 0.7098770838807626, "grad_norm": 0.534410652394103, "learning_rate": 1.6118410381184103e-05, "loss": 0.6211, "step": 24314 }, { "epoch": 0.70990628010861, "grad_norm": 0.509466628702291, "learning_rate": 1.6116788321167882e-05, "loss": 0.5653, "step": 24315 }, { "epoch": 0.7099354763364574, "grad_norm": 0.5317971321941101, "learning_rate": 1.6115166261151664e-05, "loss": 0.6432, "step": 24316 }, { "epoch": 0.7099646725643047, "grad_norm": 0.534020689274553, "learning_rate": 1.6113544201135442e-05, "loss": 0.6054, "step": 24317 }, { "epoch": 0.7099938687921521, "grad_norm": 0.5026861119955698, "learning_rate": 1.6111922141119224e-05, "loss": 0.5251, "step": 24318 }, { "epoch": 0.7100230650199995, "grad_norm": 0.525704512874309, "learning_rate": 1.6110300081103002e-05, "loss": 0.6061, "step": 24319 }, { "epoch": 0.7100522612478468, "grad_norm": 0.6854186857847059, "learning_rate": 1.610867802108678e-05, "loss": 0.5493, "step": 24320 }, { "epoch": 0.7100814574756942, "grad_norm": 0.47471901002634576, "learning_rate": 1.6107055961070562e-05, "loss": 0.4917, "step": 24321 }, { "epoch": 0.7101106537035415, "grad_norm": 0.5630208584479803, "learning_rate": 1.610543390105434e-05, "loss": 0.6717, "step": 24322 }, { "epoch": 0.7101398499313889, "grad_norm": 0.5123470938046943, "learning_rate": 1.610381184103812e-05, "loss": 0.5516, "step": 24323 }, { "epoch": 0.7101690461592363, "grad_norm": 0.5357644356937628, "learning_rate": 1.6102189781021897e-05, "loss": 0.5834, "step": 24324 }, { "epoch": 0.7101982423870836, "grad_norm": 0.5275093274629055, "learning_rate": 1.6100567721005676e-05, "loss": 0.6026, "step": 24325 }, { "epoch": 0.710227438614931, "grad_norm": 0.5032475417980948, "learning_rate": 1.6098945660989457e-05, "loss": 0.6049, "step": 24326 }, { "epoch": 0.7102566348427783, "grad_norm": 0.49315966840077663, "learning_rate": 1.609732360097324e-05, "loss": 0.5538, "step": 24327 }, { "epoch": 0.7102858310706257, "grad_norm": 0.56321070876635, "learning_rate": 1.6095701540957018e-05, "loss": 0.6561, "step": 24328 }, { "epoch": 0.7103150272984731, "grad_norm": 0.5269905924814211, "learning_rate": 1.6094079480940796e-05, "loss": 0.5866, "step": 24329 }, { "epoch": 0.7103442235263204, "grad_norm": 0.5178310822966055, "learning_rate": 1.6092457420924574e-05, "loss": 0.5762, "step": 24330 }, { "epoch": 0.7103734197541678, "grad_norm": 0.5184003406421068, "learning_rate": 1.6090835360908356e-05, "loss": 0.5768, "step": 24331 }, { "epoch": 0.7104026159820152, "grad_norm": 0.5050601751415257, "learning_rate": 1.6089213300892135e-05, "loss": 0.5752, "step": 24332 }, { "epoch": 0.7104318122098625, "grad_norm": 0.5433100866869262, "learning_rate": 1.6087591240875913e-05, "loss": 0.6171, "step": 24333 }, { "epoch": 0.7104610084377099, "grad_norm": 0.5326461967970005, "learning_rate": 1.608596918085969e-05, "loss": 0.6421, "step": 24334 }, { "epoch": 0.7104902046655572, "grad_norm": 0.5019489329488235, "learning_rate": 1.608434712084347e-05, "loss": 0.53, "step": 24335 }, { "epoch": 0.7105194008934046, "grad_norm": 0.4965995079027507, "learning_rate": 1.608272506082725e-05, "loss": 0.5199, "step": 24336 }, { "epoch": 0.710548597121252, "grad_norm": 0.5341112849623488, "learning_rate": 1.6081103000811033e-05, "loss": 0.6397, "step": 24337 }, { "epoch": 0.7105777933490993, "grad_norm": 0.5079387914458976, "learning_rate": 1.607948094079481e-05, "loss": 0.5845, "step": 24338 }, { "epoch": 0.7106069895769467, "grad_norm": 0.5666904508181505, "learning_rate": 1.607785888077859e-05, "loss": 0.6274, "step": 24339 }, { "epoch": 0.710636185804794, "grad_norm": 0.5143501007124919, "learning_rate": 1.6076236820762368e-05, "loss": 0.5634, "step": 24340 }, { "epoch": 0.7106653820326414, "grad_norm": 0.49935284998152574, "learning_rate": 1.607461476074615e-05, "loss": 0.5458, "step": 24341 }, { "epoch": 0.7106945782604888, "grad_norm": 0.5302761268129045, "learning_rate": 1.607299270072993e-05, "loss": 0.6118, "step": 24342 }, { "epoch": 0.7107237744883361, "grad_norm": 0.5606452772677119, "learning_rate": 1.6071370640713707e-05, "loss": 0.6785, "step": 24343 }, { "epoch": 0.7107529707161835, "grad_norm": 0.5466190172440589, "learning_rate": 1.6069748580697485e-05, "loss": 0.6645, "step": 24344 }, { "epoch": 0.7107821669440308, "grad_norm": 0.5273902872998786, "learning_rate": 1.6068126520681264e-05, "loss": 0.6156, "step": 24345 }, { "epoch": 0.7108113631718782, "grad_norm": 0.5200587925942863, "learning_rate": 1.6066504460665045e-05, "loss": 0.5858, "step": 24346 }, { "epoch": 0.7108405593997256, "grad_norm": 0.5436984824992616, "learning_rate": 1.6064882400648827e-05, "loss": 0.6114, "step": 24347 }, { "epoch": 0.7108697556275729, "grad_norm": 0.544526173787538, "learning_rate": 1.6063260340632605e-05, "loss": 0.6511, "step": 24348 }, { "epoch": 0.7108989518554203, "grad_norm": 0.5303637964020913, "learning_rate": 1.6061638280616384e-05, "loss": 0.6085, "step": 24349 }, { "epoch": 0.7109281480832677, "grad_norm": 0.4824469634073438, "learning_rate": 1.6060016220600162e-05, "loss": 0.5339, "step": 24350 }, { "epoch": 0.710957344311115, "grad_norm": 0.5235160658657461, "learning_rate": 1.605839416058394e-05, "loss": 0.5919, "step": 24351 }, { "epoch": 0.7109865405389624, "grad_norm": 0.47293321806650646, "learning_rate": 1.6056772100567722e-05, "loss": 0.5072, "step": 24352 }, { "epoch": 0.7110157367668097, "grad_norm": 0.5033833886237886, "learning_rate": 1.60551500405515e-05, "loss": 0.5992, "step": 24353 }, { "epoch": 0.7110449329946571, "grad_norm": 0.493370087823547, "learning_rate": 1.605352798053528e-05, "loss": 0.5325, "step": 24354 }, { "epoch": 0.7110741292225045, "grad_norm": 0.5711147478828927, "learning_rate": 1.6051905920519057e-05, "loss": 0.6315, "step": 24355 }, { "epoch": 0.7111033254503518, "grad_norm": 0.5217510452217242, "learning_rate": 1.605028386050284e-05, "loss": 0.6054, "step": 24356 }, { "epoch": 0.7111325216781992, "grad_norm": 0.528476266672321, "learning_rate": 1.604866180048662e-05, "loss": 0.5714, "step": 24357 }, { "epoch": 0.7111617179060465, "grad_norm": 0.47500654057270286, "learning_rate": 1.60470397404704e-05, "loss": 0.5259, "step": 24358 }, { "epoch": 0.7111909141338939, "grad_norm": 0.5225201952368466, "learning_rate": 1.6045417680454178e-05, "loss": 0.5865, "step": 24359 }, { "epoch": 0.7112201103617413, "grad_norm": 0.5072058008843336, "learning_rate": 1.6043795620437956e-05, "loss": 0.5897, "step": 24360 }, { "epoch": 0.7112493065895886, "grad_norm": 0.5589986119904824, "learning_rate": 1.6042173560421734e-05, "loss": 0.635, "step": 24361 }, { "epoch": 0.711278502817436, "grad_norm": 0.5265621047471772, "learning_rate": 1.6040551500405516e-05, "loss": 0.5748, "step": 24362 }, { "epoch": 0.7113076990452833, "grad_norm": 0.5071595098683921, "learning_rate": 1.6038929440389295e-05, "loss": 0.5737, "step": 24363 }, { "epoch": 0.7113368952731307, "grad_norm": 0.5312432536767426, "learning_rate": 1.6037307380373073e-05, "loss": 0.6266, "step": 24364 }, { "epoch": 0.7113660915009781, "grad_norm": 0.5194806322326492, "learning_rate": 1.6035685320356855e-05, "loss": 0.6238, "step": 24365 }, { "epoch": 0.7113952877288254, "grad_norm": 0.5161388325650719, "learning_rate": 1.6034063260340633e-05, "loss": 0.5767, "step": 24366 }, { "epoch": 0.7114244839566728, "grad_norm": 0.5337501342002937, "learning_rate": 1.6032441200324415e-05, "loss": 0.6334, "step": 24367 }, { "epoch": 0.7114536801845202, "grad_norm": 0.508503827899231, "learning_rate": 1.6030819140308193e-05, "loss": 0.577, "step": 24368 }, { "epoch": 0.7114828764123675, "grad_norm": 0.48684473706962195, "learning_rate": 1.602919708029197e-05, "loss": 0.4912, "step": 24369 }, { "epoch": 0.7115120726402149, "grad_norm": 0.5526689113578689, "learning_rate": 1.602757502027575e-05, "loss": 0.6572, "step": 24370 }, { "epoch": 0.7115412688680622, "grad_norm": 0.4928096269709931, "learning_rate": 1.602595296025953e-05, "loss": 0.54, "step": 24371 }, { "epoch": 0.7115704650959096, "grad_norm": 0.5403203449205988, "learning_rate": 1.602433090024331e-05, "loss": 0.6209, "step": 24372 }, { "epoch": 0.711599661323757, "grad_norm": 0.512433452762336, "learning_rate": 1.602270884022709e-05, "loss": 0.5648, "step": 24373 }, { "epoch": 0.7116288575516043, "grad_norm": 0.530943028034246, "learning_rate": 1.602108678021087e-05, "loss": 0.5943, "step": 24374 }, { "epoch": 0.7116580537794517, "grad_norm": 0.5622560189856906, "learning_rate": 1.601946472019465e-05, "loss": 0.654, "step": 24375 }, { "epoch": 0.711687250007299, "grad_norm": 0.5251338608345636, "learning_rate": 1.6017842660178427e-05, "loss": 0.6267, "step": 24376 }, { "epoch": 0.7117164462351464, "grad_norm": 0.5757331023877362, "learning_rate": 1.601622060016221e-05, "loss": 0.7128, "step": 24377 }, { "epoch": 0.7117456424629938, "grad_norm": 0.5224684004855603, "learning_rate": 1.6014598540145987e-05, "loss": 0.5959, "step": 24378 }, { "epoch": 0.7117748386908411, "grad_norm": 0.5411017225025658, "learning_rate": 1.6012976480129766e-05, "loss": 0.6132, "step": 24379 }, { "epoch": 0.7118040349186885, "grad_norm": 0.5169142873311133, "learning_rate": 1.6011354420113544e-05, "loss": 0.5785, "step": 24380 }, { "epoch": 0.7118332311465358, "grad_norm": 0.5235102355910057, "learning_rate": 1.6009732360097322e-05, "loss": 0.6086, "step": 24381 }, { "epoch": 0.7118624273743832, "grad_norm": 0.5427046117188967, "learning_rate": 1.6008110300081104e-05, "loss": 0.6137, "step": 24382 }, { "epoch": 0.7118916236022306, "grad_norm": 0.5070013323821708, "learning_rate": 1.6006488240064882e-05, "loss": 0.5497, "step": 24383 }, { "epoch": 0.7119208198300779, "grad_norm": 0.5534466384067488, "learning_rate": 1.6004866180048664e-05, "loss": 0.6425, "step": 24384 }, { "epoch": 0.7119500160579253, "grad_norm": 0.5289440103365457, "learning_rate": 1.6003244120032443e-05, "loss": 0.6102, "step": 24385 }, { "epoch": 0.7119792122857727, "grad_norm": 0.5537467825876065, "learning_rate": 1.600162206001622e-05, "loss": 0.6571, "step": 24386 }, { "epoch": 0.71200840851362, "grad_norm": 0.5422430110600549, "learning_rate": 1.6000000000000003e-05, "loss": 0.6126, "step": 24387 }, { "epoch": 0.7120376047414674, "grad_norm": 0.5107007443906583, "learning_rate": 1.599837793998378e-05, "loss": 0.5931, "step": 24388 }, { "epoch": 0.7120668009693147, "grad_norm": 0.5258061045080087, "learning_rate": 1.599675587996756e-05, "loss": 0.5961, "step": 24389 }, { "epoch": 0.7120959971971621, "grad_norm": 0.5081898980488295, "learning_rate": 1.5995133819951338e-05, "loss": 0.5911, "step": 24390 }, { "epoch": 0.7121251934250095, "grad_norm": 0.5059900992863258, "learning_rate": 1.5993511759935116e-05, "loss": 0.5601, "step": 24391 }, { "epoch": 0.7121543896528568, "grad_norm": 0.5196364111938029, "learning_rate": 1.5991889699918898e-05, "loss": 0.5965, "step": 24392 }, { "epoch": 0.7121835858807042, "grad_norm": 0.4417909225544589, "learning_rate": 1.599026763990268e-05, "loss": 0.4101, "step": 24393 }, { "epoch": 0.7122127821085515, "grad_norm": 0.4771339327166963, "learning_rate": 1.5988645579886458e-05, "loss": 0.4939, "step": 24394 }, { "epoch": 0.7122419783363989, "grad_norm": 0.5172424110349868, "learning_rate": 1.5987023519870236e-05, "loss": 0.5349, "step": 24395 }, { "epoch": 0.7122711745642463, "grad_norm": 0.5247202409331376, "learning_rate": 1.5985401459854015e-05, "loss": 0.5911, "step": 24396 }, { "epoch": 0.7123003707920936, "grad_norm": 0.5241166920046764, "learning_rate": 1.5983779399837797e-05, "loss": 0.6223, "step": 24397 }, { "epoch": 0.712329567019941, "grad_norm": 0.5541963880346511, "learning_rate": 1.5982157339821575e-05, "loss": 0.6543, "step": 24398 }, { "epoch": 0.7123587632477884, "grad_norm": 0.4682243326630985, "learning_rate": 1.5980535279805353e-05, "loss": 0.4859, "step": 24399 }, { "epoch": 0.7123879594756357, "grad_norm": 0.546137717337691, "learning_rate": 1.597891321978913e-05, "loss": 0.6342, "step": 24400 }, { "epoch": 0.7124171557034831, "grad_norm": 0.5234920956768674, "learning_rate": 1.597729115977291e-05, "loss": 0.5897, "step": 24401 }, { "epoch": 0.7124463519313304, "grad_norm": 0.47720166699366257, "learning_rate": 1.5975669099756692e-05, "loss": 0.5123, "step": 24402 }, { "epoch": 0.7124755481591778, "grad_norm": 0.4976678028077751, "learning_rate": 1.5974047039740474e-05, "loss": 0.5276, "step": 24403 }, { "epoch": 0.7125047443870252, "grad_norm": 0.5532418602316534, "learning_rate": 1.5972424979724252e-05, "loss": 0.6194, "step": 24404 }, { "epoch": 0.7125339406148725, "grad_norm": 0.5364273763555857, "learning_rate": 1.597080291970803e-05, "loss": 0.6467, "step": 24405 }, { "epoch": 0.7125631368427199, "grad_norm": 0.556408034624355, "learning_rate": 1.596918085969181e-05, "loss": 0.7077, "step": 24406 }, { "epoch": 0.7125923330705672, "grad_norm": 0.5027711875034621, "learning_rate": 1.5967558799675587e-05, "loss": 0.57, "step": 24407 }, { "epoch": 0.7126215292984146, "grad_norm": 0.49617570673739475, "learning_rate": 1.596593673965937e-05, "loss": 0.5244, "step": 24408 }, { "epoch": 0.712650725526262, "grad_norm": 0.567721116317826, "learning_rate": 1.5964314679643147e-05, "loss": 0.6366, "step": 24409 }, { "epoch": 0.7126799217541093, "grad_norm": 0.49305834720863956, "learning_rate": 1.5962692619626926e-05, "loss": 0.5509, "step": 24410 }, { "epoch": 0.7127091179819567, "grad_norm": 0.5229718191554735, "learning_rate": 1.5961070559610704e-05, "loss": 0.5671, "step": 24411 }, { "epoch": 0.712738314209804, "grad_norm": 0.5096560752150677, "learning_rate": 1.5959448499594486e-05, "loss": 0.6056, "step": 24412 }, { "epoch": 0.7127675104376514, "grad_norm": 0.5195836722163151, "learning_rate": 1.5957826439578267e-05, "loss": 0.6217, "step": 24413 }, { "epoch": 0.7127967066654988, "grad_norm": 0.5307710870366953, "learning_rate": 1.5956204379562046e-05, "loss": 0.5971, "step": 24414 }, { "epoch": 0.7128259028933461, "grad_norm": 0.48718829068104574, "learning_rate": 1.5954582319545824e-05, "loss": 0.4993, "step": 24415 }, { "epoch": 0.7128550991211935, "grad_norm": 0.5163887153501231, "learning_rate": 1.5952960259529603e-05, "loss": 0.5561, "step": 24416 }, { "epoch": 0.7128842953490409, "grad_norm": 0.5287870775621487, "learning_rate": 1.595133819951338e-05, "loss": 0.5867, "step": 24417 }, { "epoch": 0.7129134915768882, "grad_norm": 0.48969097469228734, "learning_rate": 1.5949716139497163e-05, "loss": 0.5393, "step": 24418 }, { "epoch": 0.7129426878047356, "grad_norm": 0.5242259485039623, "learning_rate": 1.594809407948094e-05, "loss": 0.6271, "step": 24419 }, { "epoch": 0.7129718840325829, "grad_norm": 0.4935331326703396, "learning_rate": 1.594647201946472e-05, "loss": 0.5261, "step": 24420 }, { "epoch": 0.7130010802604303, "grad_norm": 0.5091444738052692, "learning_rate": 1.5944849959448498e-05, "loss": 0.5723, "step": 24421 }, { "epoch": 0.7130302764882778, "grad_norm": 0.5732300144706463, "learning_rate": 1.594322789943228e-05, "loss": 0.6798, "step": 24422 }, { "epoch": 0.7130594727161251, "grad_norm": 0.5410287924631164, "learning_rate": 1.594160583941606e-05, "loss": 0.6455, "step": 24423 }, { "epoch": 0.7130886689439725, "grad_norm": 0.4901481271932649, "learning_rate": 1.593998377939984e-05, "loss": 0.5761, "step": 24424 }, { "epoch": 0.7131178651718199, "grad_norm": 0.505373014225688, "learning_rate": 1.5938361719383618e-05, "loss": 0.5467, "step": 24425 }, { "epoch": 0.7131470613996672, "grad_norm": 0.5465110699269768, "learning_rate": 1.5936739659367397e-05, "loss": 0.6346, "step": 24426 }, { "epoch": 0.7131762576275146, "grad_norm": 0.5491513767108686, "learning_rate": 1.5935117599351175e-05, "loss": 0.6375, "step": 24427 }, { "epoch": 0.7132054538553619, "grad_norm": 0.5017286860509392, "learning_rate": 1.5933495539334957e-05, "loss": 0.5394, "step": 24428 }, { "epoch": 0.7132346500832093, "grad_norm": 0.5058189732230501, "learning_rate": 1.5931873479318735e-05, "loss": 0.5449, "step": 24429 }, { "epoch": 0.7132638463110567, "grad_norm": 0.5109687938781842, "learning_rate": 1.5930251419302513e-05, "loss": 0.5657, "step": 24430 }, { "epoch": 0.713293042538904, "grad_norm": 0.530717756622515, "learning_rate": 1.5928629359286295e-05, "loss": 0.5859, "step": 24431 }, { "epoch": 0.7133222387667514, "grad_norm": 0.5162898852375971, "learning_rate": 1.5927007299270074e-05, "loss": 0.5708, "step": 24432 }, { "epoch": 0.7133514349945987, "grad_norm": 0.5453342605395439, "learning_rate": 1.5925385239253855e-05, "loss": 0.61, "step": 24433 }, { "epoch": 0.7133806312224461, "grad_norm": 0.5346895756828393, "learning_rate": 1.5923763179237634e-05, "loss": 0.6597, "step": 24434 }, { "epoch": 0.7134098274502935, "grad_norm": 0.524387816449378, "learning_rate": 1.5922141119221412e-05, "loss": 0.614, "step": 24435 }, { "epoch": 0.7134390236781408, "grad_norm": 0.49014531767729286, "learning_rate": 1.592051905920519e-05, "loss": 0.5863, "step": 24436 }, { "epoch": 0.7134682199059882, "grad_norm": 0.5073080520616621, "learning_rate": 1.591889699918897e-05, "loss": 0.5334, "step": 24437 }, { "epoch": 0.7134974161338355, "grad_norm": 0.4864450791532477, "learning_rate": 1.591727493917275e-05, "loss": 0.5072, "step": 24438 }, { "epoch": 0.7135266123616829, "grad_norm": 0.537681333775664, "learning_rate": 1.591565287915653e-05, "loss": 0.6046, "step": 24439 }, { "epoch": 0.7135558085895303, "grad_norm": 0.5321661730766452, "learning_rate": 1.5914030819140307e-05, "loss": 0.6173, "step": 24440 }, { "epoch": 0.7135850048173776, "grad_norm": 0.5027487010110052, "learning_rate": 1.591240875912409e-05, "loss": 0.5442, "step": 24441 }, { "epoch": 0.713614201045225, "grad_norm": 0.4665159513383859, "learning_rate": 1.5910786699107867e-05, "loss": 0.5071, "step": 24442 }, { "epoch": 0.7136433972730724, "grad_norm": 0.5226702396529058, "learning_rate": 1.590916463909165e-05, "loss": 0.5776, "step": 24443 }, { "epoch": 0.7136725935009197, "grad_norm": 0.5516789964618566, "learning_rate": 1.5907542579075428e-05, "loss": 0.6208, "step": 24444 }, { "epoch": 0.7137017897287671, "grad_norm": 0.4873956231183293, "learning_rate": 1.5905920519059206e-05, "loss": 0.5414, "step": 24445 }, { "epoch": 0.7137309859566144, "grad_norm": 0.5135583085634117, "learning_rate": 1.5904298459042984e-05, "loss": 0.572, "step": 24446 }, { "epoch": 0.7137601821844618, "grad_norm": 0.4927660166471458, "learning_rate": 1.5902676399026763e-05, "loss": 0.5323, "step": 24447 }, { "epoch": 0.7137893784123092, "grad_norm": 0.5128886349344329, "learning_rate": 1.5901054339010544e-05, "loss": 0.577, "step": 24448 }, { "epoch": 0.7138185746401565, "grad_norm": 0.5240353620762632, "learning_rate": 1.5899432278994323e-05, "loss": 0.6013, "step": 24449 }, { "epoch": 0.7138477708680039, "grad_norm": 0.5385764345428563, "learning_rate": 1.5897810218978105e-05, "loss": 0.5844, "step": 24450 }, { "epoch": 0.7138769670958512, "grad_norm": 0.5303245037559615, "learning_rate": 1.5896188158961883e-05, "loss": 0.616, "step": 24451 }, { "epoch": 0.7139061633236986, "grad_norm": 0.5538289841285103, "learning_rate": 1.589456609894566e-05, "loss": 0.6206, "step": 24452 }, { "epoch": 0.713935359551546, "grad_norm": 0.5266196490307159, "learning_rate": 1.5892944038929443e-05, "loss": 0.6173, "step": 24453 }, { "epoch": 0.7139645557793933, "grad_norm": 0.5360033690055308, "learning_rate": 1.589132197891322e-05, "loss": 0.637, "step": 24454 }, { "epoch": 0.7139937520072407, "grad_norm": 0.5062250068539663, "learning_rate": 1.5889699918897e-05, "loss": 0.5887, "step": 24455 }, { "epoch": 0.714022948235088, "grad_norm": 0.5230429234125149, "learning_rate": 1.5888077858880778e-05, "loss": 0.5964, "step": 24456 }, { "epoch": 0.7140521444629354, "grad_norm": 0.5592934869767171, "learning_rate": 1.5886455798864557e-05, "loss": 0.6723, "step": 24457 }, { "epoch": 0.7140813406907828, "grad_norm": 0.5435141049959024, "learning_rate": 1.588483373884834e-05, "loss": 0.6144, "step": 24458 }, { "epoch": 0.7141105369186301, "grad_norm": 0.5257995251110018, "learning_rate": 1.588321167883212e-05, "loss": 0.5392, "step": 24459 }, { "epoch": 0.7141397331464775, "grad_norm": 0.5165111681530791, "learning_rate": 1.58815896188159e-05, "loss": 0.5676, "step": 24460 }, { "epoch": 0.7141689293743249, "grad_norm": 0.5160746700551075, "learning_rate": 1.5879967558799677e-05, "loss": 0.5903, "step": 24461 }, { "epoch": 0.7141981256021722, "grad_norm": 0.5521974012511938, "learning_rate": 1.5878345498783455e-05, "loss": 0.6003, "step": 24462 }, { "epoch": 0.7142273218300196, "grad_norm": 0.5066576170964358, "learning_rate": 1.5876723438767237e-05, "loss": 0.5981, "step": 24463 }, { "epoch": 0.7142565180578669, "grad_norm": 0.5563825713209029, "learning_rate": 1.5875101378751015e-05, "loss": 0.6653, "step": 24464 }, { "epoch": 0.7142857142857143, "grad_norm": 0.48409551280281743, "learning_rate": 1.5873479318734794e-05, "loss": 0.5082, "step": 24465 }, { "epoch": 0.7143149105135617, "grad_norm": 0.4929496958963307, "learning_rate": 1.5871857258718572e-05, "loss": 0.5429, "step": 24466 }, { "epoch": 0.714344106741409, "grad_norm": 0.5210645663451644, "learning_rate": 1.587023519870235e-05, "loss": 0.5849, "step": 24467 }, { "epoch": 0.7143733029692564, "grad_norm": 0.49981294149368216, "learning_rate": 1.5868613138686132e-05, "loss": 0.5562, "step": 24468 }, { "epoch": 0.7144024991971037, "grad_norm": 0.5387394917624767, "learning_rate": 1.5866991078669914e-05, "loss": 0.6435, "step": 24469 }, { "epoch": 0.7144316954249511, "grad_norm": 0.49771111572963234, "learning_rate": 1.5865369018653692e-05, "loss": 0.5623, "step": 24470 }, { "epoch": 0.7144608916527985, "grad_norm": 0.5171793497365363, "learning_rate": 1.586374695863747e-05, "loss": 0.5609, "step": 24471 }, { "epoch": 0.7144900878806458, "grad_norm": 0.5183602057282575, "learning_rate": 1.586212489862125e-05, "loss": 0.5615, "step": 24472 }, { "epoch": 0.7145192841084932, "grad_norm": 0.5227497249087266, "learning_rate": 1.5860502838605028e-05, "loss": 0.577, "step": 24473 }, { "epoch": 0.7145484803363406, "grad_norm": 0.5214265070976798, "learning_rate": 1.585888077858881e-05, "loss": 0.612, "step": 24474 }, { "epoch": 0.7145776765641879, "grad_norm": 0.5578517283031084, "learning_rate": 1.5857258718572588e-05, "loss": 0.6289, "step": 24475 }, { "epoch": 0.7146068727920353, "grad_norm": 0.5092872013109281, "learning_rate": 1.5855636658556366e-05, "loss": 0.57, "step": 24476 }, { "epoch": 0.7146360690198826, "grad_norm": 0.53385015275625, "learning_rate": 1.5854014598540144e-05, "loss": 0.6128, "step": 24477 }, { "epoch": 0.71466526524773, "grad_norm": 0.5663294443739286, "learning_rate": 1.5852392538523926e-05, "loss": 0.6667, "step": 24478 }, { "epoch": 0.7146944614755774, "grad_norm": 0.5627879247717066, "learning_rate": 1.5850770478507708e-05, "loss": 0.5903, "step": 24479 }, { "epoch": 0.7147236577034247, "grad_norm": 0.575985762443855, "learning_rate": 1.5849148418491486e-05, "loss": 0.6545, "step": 24480 }, { "epoch": 0.7147528539312721, "grad_norm": 0.5044767658332489, "learning_rate": 1.5847526358475265e-05, "loss": 0.5366, "step": 24481 }, { "epoch": 0.7147820501591194, "grad_norm": 0.48776463524150165, "learning_rate": 1.5845904298459043e-05, "loss": 0.5461, "step": 24482 }, { "epoch": 0.7148112463869668, "grad_norm": 0.5320592218892515, "learning_rate": 1.584428223844282e-05, "loss": 0.6206, "step": 24483 }, { "epoch": 0.7148404426148142, "grad_norm": 0.5105942743621079, "learning_rate": 1.5842660178426603e-05, "loss": 0.5784, "step": 24484 }, { "epoch": 0.7148696388426615, "grad_norm": 0.5176540323148581, "learning_rate": 1.584103811841038e-05, "loss": 0.5829, "step": 24485 }, { "epoch": 0.7148988350705089, "grad_norm": 0.4834020398922278, "learning_rate": 1.583941605839416e-05, "loss": 0.5209, "step": 24486 }, { "epoch": 0.7149280312983562, "grad_norm": 0.5743384774821121, "learning_rate": 1.583779399837794e-05, "loss": 0.6523, "step": 24487 }, { "epoch": 0.7149572275262036, "grad_norm": 0.5254925802142096, "learning_rate": 1.583617193836172e-05, "loss": 0.6374, "step": 24488 }, { "epoch": 0.714986423754051, "grad_norm": 0.5598108315296847, "learning_rate": 1.5834549878345502e-05, "loss": 0.6828, "step": 24489 }, { "epoch": 0.7150156199818983, "grad_norm": 0.48985635176680176, "learning_rate": 1.583292781832928e-05, "loss": 0.5034, "step": 24490 }, { "epoch": 0.7150448162097457, "grad_norm": 0.544023833417132, "learning_rate": 1.583130575831306e-05, "loss": 0.621, "step": 24491 }, { "epoch": 0.715074012437593, "grad_norm": 0.5023279331320203, "learning_rate": 1.5829683698296837e-05, "loss": 0.5539, "step": 24492 }, { "epoch": 0.7151032086654404, "grad_norm": 0.5586227694313874, "learning_rate": 1.5828061638280615e-05, "loss": 0.6832, "step": 24493 }, { "epoch": 0.7151324048932878, "grad_norm": 0.502279034071624, "learning_rate": 1.5826439578264397e-05, "loss": 0.5196, "step": 24494 }, { "epoch": 0.7151616011211351, "grad_norm": 0.5562081659834239, "learning_rate": 1.5824817518248175e-05, "loss": 0.5986, "step": 24495 }, { "epoch": 0.7151907973489825, "grad_norm": 0.5311048122140287, "learning_rate": 1.5823195458231954e-05, "loss": 0.5824, "step": 24496 }, { "epoch": 0.7152199935768299, "grad_norm": 0.5486833327395194, "learning_rate": 1.5821573398215736e-05, "loss": 0.604, "step": 24497 }, { "epoch": 0.7152491898046772, "grad_norm": 0.5236948998760128, "learning_rate": 1.5819951338199514e-05, "loss": 0.6083, "step": 24498 }, { "epoch": 0.7152783860325246, "grad_norm": 0.526891817769641, "learning_rate": 1.5818329278183296e-05, "loss": 0.615, "step": 24499 }, { "epoch": 0.7153075822603719, "grad_norm": 0.49553277682955177, "learning_rate": 1.5816707218167074e-05, "loss": 0.5394, "step": 24500 }, { "epoch": 0.7153367784882193, "grad_norm": 0.558343390335631, "learning_rate": 1.5815085158150852e-05, "loss": 0.6677, "step": 24501 }, { "epoch": 0.7153659747160667, "grad_norm": 0.4649689429059282, "learning_rate": 1.581346309813463e-05, "loss": 0.4919, "step": 24502 }, { "epoch": 0.715395170943914, "grad_norm": 0.5526442366685175, "learning_rate": 1.581184103811841e-05, "loss": 0.5893, "step": 24503 }, { "epoch": 0.7154243671717614, "grad_norm": 0.528160125882689, "learning_rate": 1.581021897810219e-05, "loss": 0.6327, "step": 24504 }, { "epoch": 0.7154535633996087, "grad_norm": 0.4896013276714316, "learning_rate": 1.580859691808597e-05, "loss": 0.5421, "step": 24505 }, { "epoch": 0.7154827596274561, "grad_norm": 0.48085513031029015, "learning_rate": 1.5806974858069748e-05, "loss": 0.5561, "step": 24506 }, { "epoch": 0.7155119558553035, "grad_norm": 0.6010740932920237, "learning_rate": 1.580535279805353e-05, "loss": 0.6547, "step": 24507 }, { "epoch": 0.7155411520831508, "grad_norm": 0.5622773870954647, "learning_rate": 1.5803730738037308e-05, "loss": 0.6662, "step": 24508 }, { "epoch": 0.7155703483109982, "grad_norm": 0.4753709044675647, "learning_rate": 1.580210867802109e-05, "loss": 0.5232, "step": 24509 }, { "epoch": 0.7155995445388456, "grad_norm": 0.5036549387067275, "learning_rate": 1.5800486618004868e-05, "loss": 0.5532, "step": 24510 }, { "epoch": 0.7156287407666929, "grad_norm": 0.4951214719752831, "learning_rate": 1.5798864557988646e-05, "loss": 0.5588, "step": 24511 }, { "epoch": 0.7156579369945403, "grad_norm": 0.51275195892265, "learning_rate": 1.5797242497972425e-05, "loss": 0.5983, "step": 24512 }, { "epoch": 0.7156871332223876, "grad_norm": 0.5313608625827901, "learning_rate": 1.5795620437956203e-05, "loss": 0.6577, "step": 24513 }, { "epoch": 0.715716329450235, "grad_norm": 0.5419532321404575, "learning_rate": 1.5793998377939985e-05, "loss": 0.6355, "step": 24514 }, { "epoch": 0.7157455256780824, "grad_norm": 0.575737742695339, "learning_rate": 1.5792376317923763e-05, "loss": 0.6702, "step": 24515 }, { "epoch": 0.7157747219059297, "grad_norm": 0.536500482401737, "learning_rate": 1.5790754257907545e-05, "loss": 0.6534, "step": 24516 }, { "epoch": 0.7158039181337771, "grad_norm": 0.48938362683496034, "learning_rate": 1.5789132197891323e-05, "loss": 0.5459, "step": 24517 }, { "epoch": 0.7158331143616244, "grad_norm": 0.5263955639389933, "learning_rate": 1.5787510137875102e-05, "loss": 0.5764, "step": 24518 }, { "epoch": 0.7158623105894718, "grad_norm": 0.5510240827004346, "learning_rate": 1.5785888077858884e-05, "loss": 0.6031, "step": 24519 }, { "epoch": 0.7158915068173192, "grad_norm": 0.5201466517048633, "learning_rate": 1.5784266017842662e-05, "loss": 0.5695, "step": 24520 }, { "epoch": 0.7159207030451665, "grad_norm": 0.5308071654000168, "learning_rate": 1.578264395782644e-05, "loss": 0.6153, "step": 24521 }, { "epoch": 0.7159498992730139, "grad_norm": 0.5057673232185422, "learning_rate": 1.578102189781022e-05, "loss": 0.5774, "step": 24522 }, { "epoch": 0.7159790955008613, "grad_norm": 0.4677357057802583, "learning_rate": 1.5779399837793997e-05, "loss": 0.513, "step": 24523 }, { "epoch": 0.7160082917287086, "grad_norm": 0.5033616662336048, "learning_rate": 1.577777777777778e-05, "loss": 0.5922, "step": 24524 }, { "epoch": 0.716037487956556, "grad_norm": 0.48100865266811005, "learning_rate": 1.577615571776156e-05, "loss": 0.5194, "step": 24525 }, { "epoch": 0.7160666841844033, "grad_norm": 0.5411010740896338, "learning_rate": 1.577453365774534e-05, "loss": 0.6045, "step": 24526 }, { "epoch": 0.7160958804122507, "grad_norm": 0.504486180308048, "learning_rate": 1.5772911597729117e-05, "loss": 0.5994, "step": 24527 }, { "epoch": 0.7161250766400981, "grad_norm": 0.4665488750274193, "learning_rate": 1.5771289537712896e-05, "loss": 0.5039, "step": 24528 }, { "epoch": 0.7161542728679454, "grad_norm": 0.4809393942331895, "learning_rate": 1.5769667477696674e-05, "loss": 0.5209, "step": 24529 }, { "epoch": 0.7161834690957928, "grad_norm": 0.5250751018017937, "learning_rate": 1.5768045417680456e-05, "loss": 0.5931, "step": 24530 }, { "epoch": 0.7162126653236401, "grad_norm": 0.5134591036502552, "learning_rate": 1.5766423357664234e-05, "loss": 0.5647, "step": 24531 }, { "epoch": 0.7162418615514875, "grad_norm": 0.5024819676106802, "learning_rate": 1.5764801297648013e-05, "loss": 0.5556, "step": 24532 }, { "epoch": 0.7162710577793349, "grad_norm": 0.546706493429353, "learning_rate": 1.576317923763179e-05, "loss": 0.6445, "step": 24533 }, { "epoch": 0.7163002540071822, "grad_norm": 0.5255576763987806, "learning_rate": 1.5761557177615573e-05, "loss": 0.6177, "step": 24534 }, { "epoch": 0.7163294502350296, "grad_norm": 0.5221273942651669, "learning_rate": 1.5759935117599354e-05, "loss": 0.5517, "step": 24535 }, { "epoch": 0.716358646462877, "grad_norm": 0.5568192604876684, "learning_rate": 1.5758313057583133e-05, "loss": 0.6139, "step": 24536 }, { "epoch": 0.7163878426907243, "grad_norm": 0.5233772959418482, "learning_rate": 1.575669099756691e-05, "loss": 0.5561, "step": 24537 }, { "epoch": 0.7164170389185717, "grad_norm": 0.5178650892424836, "learning_rate": 1.575506893755069e-05, "loss": 0.6076, "step": 24538 }, { "epoch": 0.716446235146419, "grad_norm": 0.5551469812904158, "learning_rate": 1.5753446877534468e-05, "loss": 0.6871, "step": 24539 }, { "epoch": 0.7164754313742664, "grad_norm": 0.5165887020107154, "learning_rate": 1.575182481751825e-05, "loss": 0.6148, "step": 24540 }, { "epoch": 0.7165046276021138, "grad_norm": 0.5329067293292186, "learning_rate": 1.5750202757502028e-05, "loss": 0.6314, "step": 24541 }, { "epoch": 0.7165338238299611, "grad_norm": 0.5365003205826402, "learning_rate": 1.5748580697485806e-05, "loss": 0.5784, "step": 24542 }, { "epoch": 0.7165630200578086, "grad_norm": 0.5147226534970438, "learning_rate": 1.5746958637469585e-05, "loss": 0.5923, "step": 24543 }, { "epoch": 0.716592216285656, "grad_norm": 0.5226241090898459, "learning_rate": 1.5745336577453367e-05, "loss": 0.6023, "step": 24544 }, { "epoch": 0.7166214125135033, "grad_norm": 0.5151491346831794, "learning_rate": 1.574371451743715e-05, "loss": 0.5867, "step": 24545 }, { "epoch": 0.7166506087413507, "grad_norm": 0.5145937015753217, "learning_rate": 1.5742092457420927e-05, "loss": 0.5481, "step": 24546 }, { "epoch": 0.716679804969198, "grad_norm": 0.5227689875170087, "learning_rate": 1.5740470397404705e-05, "loss": 0.6038, "step": 24547 }, { "epoch": 0.7167090011970454, "grad_norm": 0.5608734738430182, "learning_rate": 1.5738848337388483e-05, "loss": 0.6545, "step": 24548 }, { "epoch": 0.7167381974248928, "grad_norm": 0.5280876983961572, "learning_rate": 1.5737226277372262e-05, "loss": 0.5831, "step": 24549 }, { "epoch": 0.7167673936527401, "grad_norm": 0.5156756270996046, "learning_rate": 1.5735604217356044e-05, "loss": 0.5614, "step": 24550 }, { "epoch": 0.7167965898805875, "grad_norm": 0.5091122861051876, "learning_rate": 1.5733982157339822e-05, "loss": 0.618, "step": 24551 }, { "epoch": 0.7168257861084348, "grad_norm": 0.5406051626663555, "learning_rate": 1.57323600973236e-05, "loss": 0.6418, "step": 24552 }, { "epoch": 0.7168549823362822, "grad_norm": 0.47496876444365765, "learning_rate": 1.573073803730738e-05, "loss": 0.5066, "step": 24553 }, { "epoch": 0.7168841785641296, "grad_norm": 0.5071786562184749, "learning_rate": 1.572911597729116e-05, "loss": 0.5471, "step": 24554 }, { "epoch": 0.7169133747919769, "grad_norm": 0.46626363753584044, "learning_rate": 1.5727493917274942e-05, "loss": 0.4707, "step": 24555 }, { "epoch": 0.7169425710198243, "grad_norm": 0.6133744140932946, "learning_rate": 1.572587185725872e-05, "loss": 0.8028, "step": 24556 }, { "epoch": 0.7169717672476716, "grad_norm": 0.504936398962298, "learning_rate": 1.57242497972425e-05, "loss": 0.5812, "step": 24557 }, { "epoch": 0.717000963475519, "grad_norm": 0.4676843930929437, "learning_rate": 1.5722627737226277e-05, "loss": 0.517, "step": 24558 }, { "epoch": 0.7170301597033664, "grad_norm": 0.5521772719287921, "learning_rate": 1.5721005677210056e-05, "loss": 0.6494, "step": 24559 }, { "epoch": 0.7170593559312137, "grad_norm": 0.5233783960305323, "learning_rate": 1.5719383617193838e-05, "loss": 0.5881, "step": 24560 }, { "epoch": 0.7170885521590611, "grad_norm": 0.5418088309126811, "learning_rate": 1.5717761557177616e-05, "loss": 0.6163, "step": 24561 }, { "epoch": 0.7171177483869084, "grad_norm": 0.5038838967142484, "learning_rate": 1.5716139497161394e-05, "loss": 0.5509, "step": 24562 }, { "epoch": 0.7171469446147558, "grad_norm": 0.5012135541453827, "learning_rate": 1.5714517437145176e-05, "loss": 0.5597, "step": 24563 }, { "epoch": 0.7171761408426032, "grad_norm": 0.4893877166604734, "learning_rate": 1.5712895377128954e-05, "loss": 0.5083, "step": 24564 }, { "epoch": 0.7172053370704505, "grad_norm": 0.6576779720668183, "learning_rate": 1.5711273317112736e-05, "loss": 0.707, "step": 24565 }, { "epoch": 0.7172345332982979, "grad_norm": 0.47269710258955366, "learning_rate": 1.5709651257096515e-05, "loss": 0.5042, "step": 24566 }, { "epoch": 0.7172637295261453, "grad_norm": 0.5409892279919927, "learning_rate": 1.5708029197080293e-05, "loss": 0.6603, "step": 24567 }, { "epoch": 0.7172929257539926, "grad_norm": 0.5032930779838186, "learning_rate": 1.570640713706407e-05, "loss": 0.565, "step": 24568 }, { "epoch": 0.71732212198184, "grad_norm": 0.5470140019660497, "learning_rate": 1.570478507704785e-05, "loss": 0.6548, "step": 24569 }, { "epoch": 0.7173513182096873, "grad_norm": 0.505604186053845, "learning_rate": 1.570316301703163e-05, "loss": 0.5526, "step": 24570 }, { "epoch": 0.7173805144375347, "grad_norm": 0.5291649129159732, "learning_rate": 1.570154095701541e-05, "loss": 0.5935, "step": 24571 }, { "epoch": 0.7174097106653821, "grad_norm": 0.5235486542430602, "learning_rate": 1.5699918896999188e-05, "loss": 0.6019, "step": 24572 }, { "epoch": 0.7174389068932294, "grad_norm": 0.5163592954448973, "learning_rate": 1.569829683698297e-05, "loss": 0.5796, "step": 24573 }, { "epoch": 0.7174681031210768, "grad_norm": 0.5278754203906628, "learning_rate": 1.569667477696675e-05, "loss": 0.5883, "step": 24574 }, { "epoch": 0.7174972993489241, "grad_norm": 0.5370056454300394, "learning_rate": 1.569505271695053e-05, "loss": 0.5877, "step": 24575 }, { "epoch": 0.7175264955767715, "grad_norm": 0.5451995593888769, "learning_rate": 1.569343065693431e-05, "loss": 0.5964, "step": 24576 }, { "epoch": 0.7175556918046189, "grad_norm": 0.48844942066380187, "learning_rate": 1.5691808596918087e-05, "loss": 0.5014, "step": 24577 }, { "epoch": 0.7175848880324662, "grad_norm": 0.5858742337598152, "learning_rate": 1.5690186536901865e-05, "loss": 0.679, "step": 24578 }, { "epoch": 0.7176140842603136, "grad_norm": 0.5467904141730823, "learning_rate": 1.5688564476885644e-05, "loss": 0.6574, "step": 24579 }, { "epoch": 0.717643280488161, "grad_norm": 0.5234757681789363, "learning_rate": 1.5686942416869425e-05, "loss": 0.5834, "step": 24580 }, { "epoch": 0.7176724767160083, "grad_norm": 0.5587558923221962, "learning_rate": 1.5685320356853204e-05, "loss": 0.6988, "step": 24581 }, { "epoch": 0.7177016729438557, "grad_norm": 0.5007705076197453, "learning_rate": 1.5683698296836985e-05, "loss": 0.5317, "step": 24582 }, { "epoch": 0.717730869171703, "grad_norm": 0.541211008832598, "learning_rate": 1.5682076236820764e-05, "loss": 0.6082, "step": 24583 }, { "epoch": 0.7177600653995504, "grad_norm": 0.5032630657859573, "learning_rate": 1.5680454176804542e-05, "loss": 0.6048, "step": 24584 }, { "epoch": 0.7177892616273978, "grad_norm": 0.5438235659040567, "learning_rate": 1.5678832116788324e-05, "loss": 0.6221, "step": 24585 }, { "epoch": 0.7178184578552451, "grad_norm": 0.5536332071547466, "learning_rate": 1.5677210056772102e-05, "loss": 0.6036, "step": 24586 }, { "epoch": 0.7178476540830925, "grad_norm": 0.5043618727889487, "learning_rate": 1.567558799675588e-05, "loss": 0.5516, "step": 24587 }, { "epoch": 0.7178768503109398, "grad_norm": 0.5048713365273809, "learning_rate": 1.567396593673966e-05, "loss": 0.5644, "step": 24588 }, { "epoch": 0.7179060465387872, "grad_norm": 0.5122415193013776, "learning_rate": 1.5672343876723437e-05, "loss": 0.568, "step": 24589 }, { "epoch": 0.7179352427666346, "grad_norm": 0.5425050687815853, "learning_rate": 1.567072181670722e-05, "loss": 0.6444, "step": 24590 }, { "epoch": 0.7179644389944819, "grad_norm": 0.5210249194508407, "learning_rate": 1.5669099756690998e-05, "loss": 0.5692, "step": 24591 }, { "epoch": 0.7179936352223293, "grad_norm": 0.56665196825446, "learning_rate": 1.566747769667478e-05, "loss": 0.6796, "step": 24592 }, { "epoch": 0.7180228314501766, "grad_norm": 0.5482117823011292, "learning_rate": 1.5665855636658558e-05, "loss": 0.6727, "step": 24593 }, { "epoch": 0.718052027678024, "grad_norm": 0.5572015561379304, "learning_rate": 1.5664233576642336e-05, "loss": 0.6641, "step": 24594 }, { "epoch": 0.7180812239058714, "grad_norm": 0.4718030540616208, "learning_rate": 1.5662611516626115e-05, "loss": 0.5276, "step": 24595 }, { "epoch": 0.7181104201337187, "grad_norm": 0.5471090672327588, "learning_rate": 1.5660989456609896e-05, "loss": 0.6589, "step": 24596 }, { "epoch": 0.7181396163615661, "grad_norm": 0.5884556578794007, "learning_rate": 1.5659367396593675e-05, "loss": 0.6501, "step": 24597 }, { "epoch": 0.7181688125894135, "grad_norm": 0.491263215595266, "learning_rate": 1.5657745336577453e-05, "loss": 0.5395, "step": 24598 }, { "epoch": 0.7181980088172608, "grad_norm": 0.5022941327275051, "learning_rate": 1.565612327656123e-05, "loss": 0.572, "step": 24599 }, { "epoch": 0.7182272050451082, "grad_norm": 0.5365201481457123, "learning_rate": 1.5654501216545013e-05, "loss": 0.5944, "step": 24600 }, { "epoch": 0.7182564012729555, "grad_norm": 0.5346953837893179, "learning_rate": 1.5652879156528795e-05, "loss": 0.6455, "step": 24601 }, { "epoch": 0.7182855975008029, "grad_norm": 0.5074614331291101, "learning_rate": 1.5651257096512573e-05, "loss": 0.5661, "step": 24602 }, { "epoch": 0.7183147937286503, "grad_norm": 0.5886582316573751, "learning_rate": 1.564963503649635e-05, "loss": 0.7005, "step": 24603 }, { "epoch": 0.7183439899564976, "grad_norm": 0.5119971035965171, "learning_rate": 1.564801297648013e-05, "loss": 0.5499, "step": 24604 }, { "epoch": 0.718373186184345, "grad_norm": 0.5295199214574109, "learning_rate": 1.564639091646391e-05, "loss": 0.5827, "step": 24605 }, { "epoch": 0.7184023824121923, "grad_norm": 0.48642650068590615, "learning_rate": 1.564476885644769e-05, "loss": 0.5206, "step": 24606 }, { "epoch": 0.7184315786400397, "grad_norm": 0.5163180753733966, "learning_rate": 1.564314679643147e-05, "loss": 0.5681, "step": 24607 }, { "epoch": 0.7184607748678871, "grad_norm": 0.5695782714684983, "learning_rate": 1.5641524736415247e-05, "loss": 0.6642, "step": 24608 }, { "epoch": 0.7184899710957344, "grad_norm": 0.6020407751388106, "learning_rate": 1.5639902676399025e-05, "loss": 0.5729, "step": 24609 }, { "epoch": 0.7185191673235818, "grad_norm": 0.5339766295315931, "learning_rate": 1.5638280616382807e-05, "loss": 0.6545, "step": 24610 }, { "epoch": 0.7185483635514291, "grad_norm": 0.5767770235734905, "learning_rate": 1.563665855636659e-05, "loss": 0.604, "step": 24611 }, { "epoch": 0.7185775597792765, "grad_norm": 0.5267803805606818, "learning_rate": 1.5635036496350367e-05, "loss": 0.5981, "step": 24612 }, { "epoch": 0.7186067560071239, "grad_norm": 0.544481189052067, "learning_rate": 1.5633414436334146e-05, "loss": 0.6186, "step": 24613 }, { "epoch": 0.7186359522349712, "grad_norm": 0.5629534060337086, "learning_rate": 1.5631792376317924e-05, "loss": 0.632, "step": 24614 }, { "epoch": 0.7186651484628186, "grad_norm": 0.45631183968017974, "learning_rate": 1.5630170316301702e-05, "loss": 0.4641, "step": 24615 }, { "epoch": 0.718694344690666, "grad_norm": 0.5641845030350385, "learning_rate": 1.5628548256285484e-05, "loss": 0.6892, "step": 24616 }, { "epoch": 0.7187235409185133, "grad_norm": 0.5596607419696396, "learning_rate": 1.5626926196269262e-05, "loss": 0.6988, "step": 24617 }, { "epoch": 0.7187527371463607, "grad_norm": 0.5468130544052638, "learning_rate": 1.562530413625304e-05, "loss": 0.6663, "step": 24618 }, { "epoch": 0.718781933374208, "grad_norm": 0.5220142688272418, "learning_rate": 1.562368207623682e-05, "loss": 0.5862, "step": 24619 }, { "epoch": 0.7188111296020554, "grad_norm": 0.4964944111997092, "learning_rate": 1.56220600162206e-05, "loss": 0.518, "step": 24620 }, { "epoch": 0.7188403258299028, "grad_norm": 0.4983285824372699, "learning_rate": 1.5620437956204383e-05, "loss": 0.5897, "step": 24621 }, { "epoch": 0.7188695220577501, "grad_norm": 0.5343874982872981, "learning_rate": 1.561881589618816e-05, "loss": 0.6509, "step": 24622 }, { "epoch": 0.7188987182855975, "grad_norm": 0.5225421896339104, "learning_rate": 1.561719383617194e-05, "loss": 0.5635, "step": 24623 }, { "epoch": 0.7189279145134448, "grad_norm": 0.499600330206566, "learning_rate": 1.5615571776155718e-05, "loss": 0.5353, "step": 24624 }, { "epoch": 0.7189571107412922, "grad_norm": 0.4886585785751888, "learning_rate": 1.5613949716139496e-05, "loss": 0.5252, "step": 24625 }, { "epoch": 0.7189863069691396, "grad_norm": 0.5201312607688124, "learning_rate": 1.5612327656123278e-05, "loss": 0.6022, "step": 24626 }, { "epoch": 0.7190155031969869, "grad_norm": 0.5103788998257051, "learning_rate": 1.5610705596107056e-05, "loss": 0.5724, "step": 24627 }, { "epoch": 0.7190446994248343, "grad_norm": 0.5253685246603079, "learning_rate": 1.5609083536090835e-05, "loss": 0.6022, "step": 24628 }, { "epoch": 0.7190738956526816, "grad_norm": 0.5768514074121114, "learning_rate": 1.5607461476074616e-05, "loss": 0.7153, "step": 24629 }, { "epoch": 0.719103091880529, "grad_norm": 0.5190608201221024, "learning_rate": 1.5605839416058395e-05, "loss": 0.5733, "step": 24630 }, { "epoch": 0.7191322881083764, "grad_norm": 0.5189668097128111, "learning_rate": 1.5604217356042177e-05, "loss": 0.5717, "step": 24631 }, { "epoch": 0.7191614843362237, "grad_norm": 0.503962914582667, "learning_rate": 1.5602595296025955e-05, "loss": 0.5489, "step": 24632 }, { "epoch": 0.7191906805640711, "grad_norm": 0.5089684895021402, "learning_rate": 1.5600973236009733e-05, "loss": 0.5667, "step": 24633 }, { "epoch": 0.7192198767919185, "grad_norm": 0.5268121407429045, "learning_rate": 1.5599351175993512e-05, "loss": 0.5865, "step": 24634 }, { "epoch": 0.7192490730197658, "grad_norm": 0.4944170876918874, "learning_rate": 1.559772911597729e-05, "loss": 0.5154, "step": 24635 }, { "epoch": 0.7192782692476132, "grad_norm": 0.5515302611816078, "learning_rate": 1.5596107055961072e-05, "loss": 0.6665, "step": 24636 }, { "epoch": 0.7193074654754605, "grad_norm": 0.5469028949896031, "learning_rate": 1.559448499594485e-05, "loss": 0.6656, "step": 24637 }, { "epoch": 0.7193366617033079, "grad_norm": 0.5045215555041518, "learning_rate": 1.559286293592863e-05, "loss": 0.5488, "step": 24638 }, { "epoch": 0.7193658579311553, "grad_norm": 0.5164300858406062, "learning_rate": 1.559124087591241e-05, "loss": 0.5788, "step": 24639 }, { "epoch": 0.7193950541590026, "grad_norm": 0.5190378893311139, "learning_rate": 1.558961881589619e-05, "loss": 0.5755, "step": 24640 }, { "epoch": 0.71942425038685, "grad_norm": 0.532215043993796, "learning_rate": 1.558799675587997e-05, "loss": 0.6048, "step": 24641 }, { "epoch": 0.7194534466146973, "grad_norm": 0.52377473344521, "learning_rate": 1.558637469586375e-05, "loss": 0.5876, "step": 24642 }, { "epoch": 0.7194826428425447, "grad_norm": 0.5170962576231681, "learning_rate": 1.5584752635847527e-05, "loss": 0.5989, "step": 24643 }, { "epoch": 0.7195118390703921, "grad_norm": 0.5962537204021822, "learning_rate": 1.5583130575831306e-05, "loss": 0.6845, "step": 24644 }, { "epoch": 0.7195410352982394, "grad_norm": 0.5518633550270711, "learning_rate": 1.5581508515815084e-05, "loss": 0.6528, "step": 24645 }, { "epoch": 0.7195702315260868, "grad_norm": 0.5183159581785292, "learning_rate": 1.5579886455798866e-05, "loss": 0.6323, "step": 24646 }, { "epoch": 0.7195994277539342, "grad_norm": 0.4900139624495911, "learning_rate": 1.5578264395782644e-05, "loss": 0.5629, "step": 24647 }, { "epoch": 0.7196286239817815, "grad_norm": 0.4962673800800013, "learning_rate": 1.5576642335766426e-05, "loss": 0.5758, "step": 24648 }, { "epoch": 0.7196578202096289, "grad_norm": 0.5344783188034691, "learning_rate": 1.5575020275750204e-05, "loss": 0.639, "step": 24649 }, { "epoch": 0.7196870164374762, "grad_norm": 0.5356264145067194, "learning_rate": 1.5573398215733983e-05, "loss": 0.6371, "step": 24650 }, { "epoch": 0.7197162126653236, "grad_norm": 0.5280204163623489, "learning_rate": 1.557177615571776e-05, "loss": 0.5405, "step": 24651 }, { "epoch": 0.719745408893171, "grad_norm": 0.5223089753161124, "learning_rate": 1.5570154095701543e-05, "loss": 0.5792, "step": 24652 }, { "epoch": 0.7197746051210183, "grad_norm": 0.4878742647735707, "learning_rate": 1.556853203568532e-05, "loss": 0.5325, "step": 24653 }, { "epoch": 0.7198038013488657, "grad_norm": 0.4785611000572481, "learning_rate": 1.55669099756691e-05, "loss": 0.4919, "step": 24654 }, { "epoch": 0.719832997576713, "grad_norm": 0.5266633188297378, "learning_rate": 1.5565287915652878e-05, "loss": 0.6165, "step": 24655 }, { "epoch": 0.7198621938045604, "grad_norm": 0.56177503671561, "learning_rate": 1.556366585563666e-05, "loss": 0.644, "step": 24656 }, { "epoch": 0.7198913900324078, "grad_norm": 0.5172587176531712, "learning_rate": 1.5562043795620438e-05, "loss": 0.5817, "step": 24657 }, { "epoch": 0.7199205862602551, "grad_norm": 0.5352116798542775, "learning_rate": 1.556042173560422e-05, "loss": 0.6104, "step": 24658 }, { "epoch": 0.7199497824881025, "grad_norm": 0.5390187461225997, "learning_rate": 1.5558799675587998e-05, "loss": 0.6341, "step": 24659 }, { "epoch": 0.7199789787159498, "grad_norm": 0.5389305187898611, "learning_rate": 1.5557177615571777e-05, "loss": 0.609, "step": 24660 }, { "epoch": 0.7200081749437972, "grad_norm": 0.5166380977029147, "learning_rate": 1.5555555555555555e-05, "loss": 0.5843, "step": 24661 }, { "epoch": 0.7200373711716446, "grad_norm": 0.5451477803730014, "learning_rate": 1.5553933495539337e-05, "loss": 0.6605, "step": 24662 }, { "epoch": 0.7200665673994919, "grad_norm": 0.509780879757566, "learning_rate": 1.5552311435523115e-05, "loss": 0.5087, "step": 24663 }, { "epoch": 0.7200957636273394, "grad_norm": 0.5193863342905092, "learning_rate": 1.5550689375506893e-05, "loss": 0.5466, "step": 24664 }, { "epoch": 0.7201249598551868, "grad_norm": 0.4989968881638487, "learning_rate": 1.5549067315490672e-05, "loss": 0.5747, "step": 24665 }, { "epoch": 0.7201541560830341, "grad_norm": 0.54535590321334, "learning_rate": 1.5547445255474454e-05, "loss": 0.6408, "step": 24666 }, { "epoch": 0.7201833523108815, "grad_norm": 0.5118619706257916, "learning_rate": 1.5545823195458235e-05, "loss": 0.5626, "step": 24667 }, { "epoch": 0.7202125485387288, "grad_norm": 0.561896150696609, "learning_rate": 1.5544201135442014e-05, "loss": 0.6962, "step": 24668 }, { "epoch": 0.7202417447665762, "grad_norm": 0.5178189031142486, "learning_rate": 1.5542579075425792e-05, "loss": 0.6262, "step": 24669 }, { "epoch": 0.7202709409944236, "grad_norm": 0.5069351372876764, "learning_rate": 1.554095701540957e-05, "loss": 0.5809, "step": 24670 }, { "epoch": 0.7203001372222709, "grad_norm": 0.5289751974823619, "learning_rate": 1.553933495539335e-05, "loss": 0.6306, "step": 24671 }, { "epoch": 0.7203293334501183, "grad_norm": 0.6570823074016883, "learning_rate": 1.553771289537713e-05, "loss": 0.6814, "step": 24672 }, { "epoch": 0.7203585296779657, "grad_norm": 0.5315856853514282, "learning_rate": 1.553609083536091e-05, "loss": 0.6225, "step": 24673 }, { "epoch": 0.720387725905813, "grad_norm": 0.5506111680991732, "learning_rate": 1.5534468775344687e-05, "loss": 0.6576, "step": 24674 }, { "epoch": 0.7204169221336604, "grad_norm": 0.505631487121861, "learning_rate": 1.5532846715328466e-05, "loss": 0.5547, "step": 24675 }, { "epoch": 0.7204461183615077, "grad_norm": 0.5057432533197751, "learning_rate": 1.5531224655312247e-05, "loss": 0.5179, "step": 24676 }, { "epoch": 0.7204753145893551, "grad_norm": 0.5334894472928828, "learning_rate": 1.552960259529603e-05, "loss": 0.5877, "step": 24677 }, { "epoch": 0.7205045108172025, "grad_norm": 0.4961443175524219, "learning_rate": 1.5527980535279808e-05, "loss": 0.5377, "step": 24678 }, { "epoch": 0.7205337070450498, "grad_norm": 0.5142285966774943, "learning_rate": 1.5526358475263586e-05, "loss": 0.6015, "step": 24679 }, { "epoch": 0.7205629032728972, "grad_norm": 0.5114493093162792, "learning_rate": 1.5524736415247364e-05, "loss": 0.5788, "step": 24680 }, { "epoch": 0.7205920995007445, "grad_norm": 0.5817561355344434, "learning_rate": 1.5523114355231143e-05, "loss": 0.6048, "step": 24681 }, { "epoch": 0.7206212957285919, "grad_norm": 0.5292001006790165, "learning_rate": 1.5521492295214925e-05, "loss": 0.613, "step": 24682 }, { "epoch": 0.7206504919564393, "grad_norm": 0.5252728103898417, "learning_rate": 1.5519870235198703e-05, "loss": 0.5901, "step": 24683 }, { "epoch": 0.7206796881842866, "grad_norm": 0.549509494033027, "learning_rate": 1.551824817518248e-05, "loss": 0.6361, "step": 24684 }, { "epoch": 0.720708884412134, "grad_norm": 0.5564288257340055, "learning_rate": 1.551662611516626e-05, "loss": 0.7077, "step": 24685 }, { "epoch": 0.7207380806399813, "grad_norm": 0.5519944302702807, "learning_rate": 1.551500405515004e-05, "loss": 0.6521, "step": 24686 }, { "epoch": 0.7207672768678287, "grad_norm": 0.5463009985563415, "learning_rate": 1.5513381995133823e-05, "loss": 0.6433, "step": 24687 }, { "epoch": 0.7207964730956761, "grad_norm": 0.5093140961911534, "learning_rate": 1.55117599351176e-05, "loss": 0.5843, "step": 24688 }, { "epoch": 0.7208256693235234, "grad_norm": 0.5367587842055502, "learning_rate": 1.551013787510138e-05, "loss": 0.6271, "step": 24689 }, { "epoch": 0.7208548655513708, "grad_norm": 0.5139481679777911, "learning_rate": 1.5508515815085158e-05, "loss": 0.5965, "step": 24690 }, { "epoch": 0.7208840617792182, "grad_norm": 0.5895507779786727, "learning_rate": 1.5506893755068937e-05, "loss": 0.6515, "step": 24691 }, { "epoch": 0.7209132580070655, "grad_norm": 0.5263690974229128, "learning_rate": 1.550527169505272e-05, "loss": 0.6088, "step": 24692 }, { "epoch": 0.7209424542349129, "grad_norm": 0.5033200829851721, "learning_rate": 1.5503649635036497e-05, "loss": 0.5771, "step": 24693 }, { "epoch": 0.7209716504627602, "grad_norm": 0.525357733392116, "learning_rate": 1.5502027575020275e-05, "loss": 0.6187, "step": 24694 }, { "epoch": 0.7210008466906076, "grad_norm": 0.5062296696507554, "learning_rate": 1.5500405515004057e-05, "loss": 0.5451, "step": 24695 }, { "epoch": 0.721030042918455, "grad_norm": 0.48326386514167025, "learning_rate": 1.5498783454987835e-05, "loss": 0.5259, "step": 24696 }, { "epoch": 0.7210592391463023, "grad_norm": 0.5159984605904204, "learning_rate": 1.5497161394971617e-05, "loss": 0.5991, "step": 24697 }, { "epoch": 0.7210884353741497, "grad_norm": 0.5211754056558571, "learning_rate": 1.5495539334955395e-05, "loss": 0.5561, "step": 24698 }, { "epoch": 0.721117631601997, "grad_norm": 0.536892706146165, "learning_rate": 1.5493917274939174e-05, "loss": 0.6595, "step": 24699 }, { "epoch": 0.7211468278298444, "grad_norm": 0.50335225589631, "learning_rate": 1.5492295214922952e-05, "loss": 0.5672, "step": 24700 }, { "epoch": 0.7211760240576918, "grad_norm": 0.522343048539302, "learning_rate": 1.549067315490673e-05, "loss": 0.6206, "step": 24701 }, { "epoch": 0.7212052202855391, "grad_norm": 0.5249255957970649, "learning_rate": 1.5489051094890512e-05, "loss": 0.6319, "step": 24702 }, { "epoch": 0.7212344165133865, "grad_norm": 0.5168443625048047, "learning_rate": 1.548742903487429e-05, "loss": 0.5717, "step": 24703 }, { "epoch": 0.7212636127412339, "grad_norm": 0.5121958254862063, "learning_rate": 1.548580697485807e-05, "loss": 0.607, "step": 24704 }, { "epoch": 0.7212928089690812, "grad_norm": 0.5520377013782892, "learning_rate": 1.548418491484185e-05, "loss": 0.6724, "step": 24705 }, { "epoch": 0.7213220051969286, "grad_norm": 0.4838843664090997, "learning_rate": 1.548256285482563e-05, "loss": 0.5433, "step": 24706 }, { "epoch": 0.7213512014247759, "grad_norm": 0.49691921494713365, "learning_rate": 1.548094079480941e-05, "loss": 0.5655, "step": 24707 }, { "epoch": 0.7213803976526233, "grad_norm": 0.5342384774216955, "learning_rate": 1.547931873479319e-05, "loss": 0.6248, "step": 24708 }, { "epoch": 0.7214095938804707, "grad_norm": 0.46758370480364164, "learning_rate": 1.5477696674776968e-05, "loss": 0.5123, "step": 24709 }, { "epoch": 0.721438790108318, "grad_norm": 0.5911566251818708, "learning_rate": 1.5476074614760746e-05, "loss": 0.5298, "step": 24710 }, { "epoch": 0.7214679863361654, "grad_norm": 0.5320910281730861, "learning_rate": 1.5474452554744524e-05, "loss": 0.613, "step": 24711 }, { "epoch": 0.7214971825640127, "grad_norm": 0.5064659657008557, "learning_rate": 1.5472830494728306e-05, "loss": 0.58, "step": 24712 }, { "epoch": 0.7215263787918601, "grad_norm": 0.49926846968480015, "learning_rate": 1.5471208434712085e-05, "loss": 0.5509, "step": 24713 }, { "epoch": 0.7215555750197075, "grad_norm": 0.5480691463259785, "learning_rate": 1.5469586374695866e-05, "loss": 0.6173, "step": 24714 }, { "epoch": 0.7215847712475548, "grad_norm": 0.5270951377312637, "learning_rate": 1.5467964314679645e-05, "loss": 0.5863, "step": 24715 }, { "epoch": 0.7216139674754022, "grad_norm": 0.5149875234693261, "learning_rate": 1.5466342254663423e-05, "loss": 0.5559, "step": 24716 }, { "epoch": 0.7216431637032495, "grad_norm": 0.5291603545895792, "learning_rate": 1.54647201946472e-05, "loss": 0.5969, "step": 24717 }, { "epoch": 0.7216723599310969, "grad_norm": 0.4989839824947951, "learning_rate": 1.5463098134630983e-05, "loss": 0.5769, "step": 24718 }, { "epoch": 0.7217015561589443, "grad_norm": 0.5438005890377272, "learning_rate": 1.546147607461476e-05, "loss": 0.6299, "step": 24719 }, { "epoch": 0.7217307523867916, "grad_norm": 0.5172104029468524, "learning_rate": 1.545985401459854e-05, "loss": 0.5599, "step": 24720 }, { "epoch": 0.721759948614639, "grad_norm": 0.5575355829159648, "learning_rate": 1.545823195458232e-05, "loss": 0.6504, "step": 24721 }, { "epoch": 0.7217891448424864, "grad_norm": 0.5060537142205018, "learning_rate": 1.54566098945661e-05, "loss": 0.5216, "step": 24722 }, { "epoch": 0.7218183410703337, "grad_norm": 0.5083736830813033, "learning_rate": 1.545498783454988e-05, "loss": 0.5648, "step": 24723 }, { "epoch": 0.7218475372981811, "grad_norm": 0.5660679172482437, "learning_rate": 1.545336577453366e-05, "loss": 0.6655, "step": 24724 }, { "epoch": 0.7218767335260284, "grad_norm": 0.5471931906090292, "learning_rate": 1.545174371451744e-05, "loss": 0.6532, "step": 24725 }, { "epoch": 0.7219059297538758, "grad_norm": 0.5763436607447903, "learning_rate": 1.5450121654501217e-05, "loss": 0.6924, "step": 24726 }, { "epoch": 0.7219351259817232, "grad_norm": 0.5427535952300737, "learning_rate": 1.5448499594484995e-05, "loss": 0.6393, "step": 24727 }, { "epoch": 0.7219643222095705, "grad_norm": 0.5426638152491936, "learning_rate": 1.5446877534468777e-05, "loss": 0.6262, "step": 24728 }, { "epoch": 0.7219935184374179, "grad_norm": 0.5511304509605927, "learning_rate": 1.5445255474452556e-05, "loss": 0.6291, "step": 24729 }, { "epoch": 0.7220227146652652, "grad_norm": 0.5057577801953852, "learning_rate": 1.5443633414436334e-05, "loss": 0.5514, "step": 24730 }, { "epoch": 0.7220519108931126, "grad_norm": 0.48034526673042, "learning_rate": 1.5442011354420112e-05, "loss": 0.5073, "step": 24731 }, { "epoch": 0.72208110712096, "grad_norm": 0.5608265040553678, "learning_rate": 1.5440389294403894e-05, "loss": 0.6784, "step": 24732 }, { "epoch": 0.7221103033488073, "grad_norm": 0.5532409375710291, "learning_rate": 1.5438767234387676e-05, "loss": 0.6496, "step": 24733 }, { "epoch": 0.7221394995766547, "grad_norm": 0.5151279487870725, "learning_rate": 1.5437145174371454e-05, "loss": 0.5738, "step": 24734 }, { "epoch": 0.722168695804502, "grad_norm": 0.48706652556324304, "learning_rate": 1.5435523114355233e-05, "loss": 0.5444, "step": 24735 }, { "epoch": 0.7221978920323494, "grad_norm": 0.5191867935772282, "learning_rate": 1.543390105433901e-05, "loss": 0.611, "step": 24736 }, { "epoch": 0.7222270882601968, "grad_norm": 0.5192609025076661, "learning_rate": 1.543227899432279e-05, "loss": 0.5801, "step": 24737 }, { "epoch": 0.7222562844880441, "grad_norm": 0.5437939838050512, "learning_rate": 1.543065693430657e-05, "loss": 0.6942, "step": 24738 }, { "epoch": 0.7222854807158915, "grad_norm": 0.49466935524971717, "learning_rate": 1.542903487429035e-05, "loss": 0.5678, "step": 24739 }, { "epoch": 0.7223146769437389, "grad_norm": 0.4988961444953242, "learning_rate": 1.5427412814274128e-05, "loss": 0.5461, "step": 24740 }, { "epoch": 0.7223438731715862, "grad_norm": 0.5085671147980169, "learning_rate": 1.5425790754257906e-05, "loss": 0.5705, "step": 24741 }, { "epoch": 0.7223730693994336, "grad_norm": 0.5772164244083519, "learning_rate": 1.5424168694241688e-05, "loss": 0.6486, "step": 24742 }, { "epoch": 0.7224022656272809, "grad_norm": 0.5140125473384154, "learning_rate": 1.542254663422547e-05, "loss": 0.5826, "step": 24743 }, { "epoch": 0.7224314618551283, "grad_norm": 0.5131694623310001, "learning_rate": 1.5420924574209248e-05, "loss": 0.583, "step": 24744 }, { "epoch": 0.7224606580829757, "grad_norm": 0.5505842056574756, "learning_rate": 1.5419302514193026e-05, "loss": 0.6714, "step": 24745 }, { "epoch": 0.722489854310823, "grad_norm": 0.5237800179657239, "learning_rate": 1.5417680454176805e-05, "loss": 0.5899, "step": 24746 }, { "epoch": 0.7225190505386704, "grad_norm": 0.559397499281718, "learning_rate": 1.5416058394160583e-05, "loss": 0.6042, "step": 24747 }, { "epoch": 0.7225482467665177, "grad_norm": 0.5067163075032723, "learning_rate": 1.5414436334144365e-05, "loss": 0.5755, "step": 24748 }, { "epoch": 0.7225774429943651, "grad_norm": 0.5496159306599255, "learning_rate": 1.5412814274128143e-05, "loss": 0.6763, "step": 24749 }, { "epoch": 0.7226066392222125, "grad_norm": 0.5296845998531269, "learning_rate": 1.5411192214111922e-05, "loss": 0.6261, "step": 24750 }, { "epoch": 0.7226358354500598, "grad_norm": 0.5205375075430235, "learning_rate": 1.54095701540957e-05, "loss": 0.6233, "step": 24751 }, { "epoch": 0.7226650316779072, "grad_norm": 0.641273676419741, "learning_rate": 1.5407948094079482e-05, "loss": 0.6612, "step": 24752 }, { "epoch": 0.7226942279057545, "grad_norm": 0.5760281667757629, "learning_rate": 1.5406326034063264e-05, "loss": 0.7051, "step": 24753 }, { "epoch": 0.7227234241336019, "grad_norm": 0.523505498274359, "learning_rate": 1.5404703974047042e-05, "loss": 0.5963, "step": 24754 }, { "epoch": 0.7227526203614493, "grad_norm": 0.5289860823484227, "learning_rate": 1.540308191403082e-05, "loss": 0.6335, "step": 24755 }, { "epoch": 0.7227818165892966, "grad_norm": 0.5187379147762204, "learning_rate": 1.54014598540146e-05, "loss": 0.6289, "step": 24756 }, { "epoch": 0.722811012817144, "grad_norm": 0.5019027566461648, "learning_rate": 1.5399837793998377e-05, "loss": 0.5606, "step": 24757 }, { "epoch": 0.7228402090449914, "grad_norm": 0.5295388232468365, "learning_rate": 1.539821573398216e-05, "loss": 0.5018, "step": 24758 }, { "epoch": 0.7228694052728387, "grad_norm": 0.532421842252421, "learning_rate": 1.5396593673965937e-05, "loss": 0.6164, "step": 24759 }, { "epoch": 0.7228986015006861, "grad_norm": 0.5479463794902781, "learning_rate": 1.5394971613949716e-05, "loss": 0.6299, "step": 24760 }, { "epoch": 0.7229277977285334, "grad_norm": 0.5364002687747876, "learning_rate": 1.5393349553933497e-05, "loss": 0.6327, "step": 24761 }, { "epoch": 0.7229569939563808, "grad_norm": 0.509358392828859, "learning_rate": 1.5391727493917276e-05, "loss": 0.5198, "step": 24762 }, { "epoch": 0.7229861901842282, "grad_norm": 0.524961159018513, "learning_rate": 1.5390105433901058e-05, "loss": 0.6031, "step": 24763 }, { "epoch": 0.7230153864120755, "grad_norm": 0.4955406368589415, "learning_rate": 1.5388483373884836e-05, "loss": 0.5468, "step": 24764 }, { "epoch": 0.7230445826399229, "grad_norm": 0.5191031007400204, "learning_rate": 1.5386861313868614e-05, "loss": 0.531, "step": 24765 }, { "epoch": 0.7230737788677702, "grad_norm": 0.5566870016581367, "learning_rate": 1.5385239253852393e-05, "loss": 0.6487, "step": 24766 }, { "epoch": 0.7231029750956176, "grad_norm": 0.5217834921903255, "learning_rate": 1.538361719383617e-05, "loss": 0.5946, "step": 24767 }, { "epoch": 0.723132171323465, "grad_norm": 0.531818263097796, "learning_rate": 1.5381995133819953e-05, "loss": 0.5905, "step": 24768 }, { "epoch": 0.7231613675513123, "grad_norm": 0.5249169409857573, "learning_rate": 1.538037307380373e-05, "loss": 0.6252, "step": 24769 }, { "epoch": 0.7231905637791597, "grad_norm": 0.48875180999068785, "learning_rate": 1.537875101378751e-05, "loss": 0.5479, "step": 24770 }, { "epoch": 0.723219760007007, "grad_norm": 0.5283637885812474, "learning_rate": 1.537712895377129e-05, "loss": 0.6204, "step": 24771 }, { "epoch": 0.7232489562348544, "grad_norm": 0.5207676232487526, "learning_rate": 1.537550689375507e-05, "loss": 0.5754, "step": 24772 }, { "epoch": 0.7232781524627018, "grad_norm": 0.5152006021818033, "learning_rate": 1.5373884833738848e-05, "loss": 0.5272, "step": 24773 }, { "epoch": 0.7233073486905491, "grad_norm": 0.543149131975423, "learning_rate": 1.537226277372263e-05, "loss": 0.5986, "step": 24774 }, { "epoch": 0.7233365449183965, "grad_norm": 0.5066984952369897, "learning_rate": 1.5370640713706408e-05, "loss": 0.5785, "step": 24775 }, { "epoch": 0.7233657411462439, "grad_norm": 0.5329527284381536, "learning_rate": 1.5369018653690187e-05, "loss": 0.6684, "step": 24776 }, { "epoch": 0.7233949373740912, "grad_norm": 0.5121618034738579, "learning_rate": 1.5367396593673965e-05, "loss": 0.5808, "step": 24777 }, { "epoch": 0.7234241336019386, "grad_norm": 0.5058404745449964, "learning_rate": 1.5365774533657747e-05, "loss": 0.5838, "step": 24778 }, { "epoch": 0.7234533298297859, "grad_norm": 0.5505450116535829, "learning_rate": 1.5364152473641525e-05, "loss": 0.6057, "step": 24779 }, { "epoch": 0.7234825260576333, "grad_norm": 0.5023776558847494, "learning_rate": 1.5362530413625307e-05, "loss": 0.5918, "step": 24780 }, { "epoch": 0.7235117222854807, "grad_norm": 0.5301515677651301, "learning_rate": 1.5360908353609085e-05, "loss": 0.5963, "step": 24781 }, { "epoch": 0.723540918513328, "grad_norm": 0.5296066145171557, "learning_rate": 1.5359286293592864e-05, "loss": 0.5934, "step": 24782 }, { "epoch": 0.7235701147411754, "grad_norm": 0.5220265083107886, "learning_rate": 1.5357664233576642e-05, "loss": 0.6131, "step": 24783 }, { "epoch": 0.7235993109690229, "grad_norm": 0.48332864167999967, "learning_rate": 1.5356042173560424e-05, "loss": 0.5263, "step": 24784 }, { "epoch": 0.7236285071968702, "grad_norm": 0.5116781236378193, "learning_rate": 1.5354420113544202e-05, "loss": 0.5723, "step": 24785 }, { "epoch": 0.7236577034247176, "grad_norm": 0.510326889709316, "learning_rate": 1.535279805352798e-05, "loss": 0.5467, "step": 24786 }, { "epoch": 0.7236868996525649, "grad_norm": 0.4970212865466831, "learning_rate": 1.535117599351176e-05, "loss": 0.5006, "step": 24787 }, { "epoch": 0.7237160958804123, "grad_norm": 0.5000376886833382, "learning_rate": 1.534955393349554e-05, "loss": 0.5191, "step": 24788 }, { "epoch": 0.7237452921082597, "grad_norm": 0.504602565751102, "learning_rate": 1.534793187347932e-05, "loss": 0.5668, "step": 24789 }, { "epoch": 0.723774488336107, "grad_norm": 0.5019055055446152, "learning_rate": 1.53463098134631e-05, "loss": 0.5283, "step": 24790 }, { "epoch": 0.7238036845639544, "grad_norm": 0.5527653459518159, "learning_rate": 1.534468775344688e-05, "loss": 0.6276, "step": 24791 }, { "epoch": 0.7238328807918017, "grad_norm": 0.5359817081314215, "learning_rate": 1.5343065693430657e-05, "loss": 0.6035, "step": 24792 }, { "epoch": 0.7238620770196491, "grad_norm": 0.5120598317318792, "learning_rate": 1.5341443633414436e-05, "loss": 0.5765, "step": 24793 }, { "epoch": 0.7238912732474965, "grad_norm": 0.5204612961238769, "learning_rate": 1.5339821573398218e-05, "loss": 0.5397, "step": 24794 }, { "epoch": 0.7239204694753438, "grad_norm": 0.5810046264999076, "learning_rate": 1.5338199513381996e-05, "loss": 0.7436, "step": 24795 }, { "epoch": 0.7239496657031912, "grad_norm": 0.5284036698908897, "learning_rate": 1.5336577453365774e-05, "loss": 0.6023, "step": 24796 }, { "epoch": 0.7239788619310386, "grad_norm": 0.5314457732483961, "learning_rate": 1.5334955393349553e-05, "loss": 0.6027, "step": 24797 }, { "epoch": 0.7240080581588859, "grad_norm": 0.5416042113065916, "learning_rate": 1.5333333333333334e-05, "loss": 0.6189, "step": 24798 }, { "epoch": 0.7240372543867333, "grad_norm": 0.5399113205216599, "learning_rate": 1.5331711273317116e-05, "loss": 0.6226, "step": 24799 }, { "epoch": 0.7240664506145806, "grad_norm": 0.5920628000268313, "learning_rate": 1.5330089213300895e-05, "loss": 0.6866, "step": 24800 }, { "epoch": 0.724095646842428, "grad_norm": 0.5453351637331016, "learning_rate": 1.5328467153284673e-05, "loss": 0.6106, "step": 24801 }, { "epoch": 0.7241248430702754, "grad_norm": 0.5054084742024602, "learning_rate": 1.532684509326845e-05, "loss": 0.5332, "step": 24802 }, { "epoch": 0.7241540392981227, "grad_norm": 0.4769542094440199, "learning_rate": 1.532522303325223e-05, "loss": 0.5194, "step": 24803 }, { "epoch": 0.7241832355259701, "grad_norm": 0.49255775565208615, "learning_rate": 1.532360097323601e-05, "loss": 0.5774, "step": 24804 }, { "epoch": 0.7242124317538174, "grad_norm": 0.5382403520614764, "learning_rate": 1.532197891321979e-05, "loss": 0.5961, "step": 24805 }, { "epoch": 0.7242416279816648, "grad_norm": 0.5473469586827552, "learning_rate": 1.5320356853203568e-05, "loss": 0.6427, "step": 24806 }, { "epoch": 0.7242708242095122, "grad_norm": 0.5358403457429618, "learning_rate": 1.5318734793187347e-05, "loss": 0.5926, "step": 24807 }, { "epoch": 0.7243000204373595, "grad_norm": 0.5027861527552859, "learning_rate": 1.531711273317113e-05, "loss": 0.583, "step": 24808 }, { "epoch": 0.7243292166652069, "grad_norm": 0.5516087198532981, "learning_rate": 1.531549067315491e-05, "loss": 0.6501, "step": 24809 }, { "epoch": 0.7243584128930542, "grad_norm": 0.5084150153275052, "learning_rate": 1.531386861313869e-05, "loss": 0.5668, "step": 24810 }, { "epoch": 0.7243876091209016, "grad_norm": 0.5571198597317771, "learning_rate": 1.5312246553122467e-05, "loss": 0.6524, "step": 24811 }, { "epoch": 0.724416805348749, "grad_norm": 0.5655903451658998, "learning_rate": 1.5310624493106245e-05, "loss": 0.6503, "step": 24812 }, { "epoch": 0.7244460015765963, "grad_norm": 0.513957071816197, "learning_rate": 1.5309002433090024e-05, "loss": 0.5918, "step": 24813 }, { "epoch": 0.7244751978044437, "grad_norm": 0.5186962134372057, "learning_rate": 1.5307380373073805e-05, "loss": 0.5923, "step": 24814 }, { "epoch": 0.724504394032291, "grad_norm": 0.5890753049085707, "learning_rate": 1.5305758313057584e-05, "loss": 0.6546, "step": 24815 }, { "epoch": 0.7245335902601384, "grad_norm": 0.5192311665098063, "learning_rate": 1.5304136253041362e-05, "loss": 0.5725, "step": 24816 }, { "epoch": 0.7245627864879858, "grad_norm": 0.5346462020394274, "learning_rate": 1.530251419302514e-05, "loss": 0.6272, "step": 24817 }, { "epoch": 0.7245919827158331, "grad_norm": 0.5198138961040709, "learning_rate": 1.5300892133008922e-05, "loss": 0.5999, "step": 24818 }, { "epoch": 0.7246211789436805, "grad_norm": 0.503739889034316, "learning_rate": 1.5299270072992704e-05, "loss": 0.5775, "step": 24819 }, { "epoch": 0.7246503751715279, "grad_norm": 0.5814430625100457, "learning_rate": 1.5297648012976482e-05, "loss": 0.6846, "step": 24820 }, { "epoch": 0.7246795713993752, "grad_norm": 0.5035916392813068, "learning_rate": 1.529602595296026e-05, "loss": 0.5726, "step": 24821 }, { "epoch": 0.7247087676272226, "grad_norm": 0.5291456112398303, "learning_rate": 1.529440389294404e-05, "loss": 0.58, "step": 24822 }, { "epoch": 0.7247379638550699, "grad_norm": 0.5283687135892597, "learning_rate": 1.5292781832927818e-05, "loss": 0.612, "step": 24823 }, { "epoch": 0.7247671600829173, "grad_norm": 0.5265023024970742, "learning_rate": 1.52911597729116e-05, "loss": 0.626, "step": 24824 }, { "epoch": 0.7247963563107647, "grad_norm": 0.5409900649644036, "learning_rate": 1.5289537712895378e-05, "loss": 0.5786, "step": 24825 }, { "epoch": 0.724825552538612, "grad_norm": 0.5419913109796559, "learning_rate": 1.5287915652879156e-05, "loss": 0.6193, "step": 24826 }, { "epoch": 0.7248547487664594, "grad_norm": 0.5347911942014171, "learning_rate": 1.5286293592862934e-05, "loss": 0.6265, "step": 24827 }, { "epoch": 0.7248839449943068, "grad_norm": 0.4979147324848625, "learning_rate": 1.5284671532846716e-05, "loss": 0.5519, "step": 24828 }, { "epoch": 0.7249131412221541, "grad_norm": 0.5596608739096857, "learning_rate": 1.5283049472830498e-05, "loss": 0.5822, "step": 24829 }, { "epoch": 0.7249423374500015, "grad_norm": 0.49578465064401406, "learning_rate": 1.5281427412814276e-05, "loss": 0.5557, "step": 24830 }, { "epoch": 0.7249715336778488, "grad_norm": 0.543393929340236, "learning_rate": 1.5279805352798055e-05, "loss": 0.6119, "step": 24831 }, { "epoch": 0.7250007299056962, "grad_norm": 0.5459134867052818, "learning_rate": 1.5278183292781833e-05, "loss": 0.6135, "step": 24832 }, { "epoch": 0.7250299261335436, "grad_norm": 0.5059279051931477, "learning_rate": 1.527656123276561e-05, "loss": 0.566, "step": 24833 }, { "epoch": 0.7250591223613909, "grad_norm": 0.5273549900900762, "learning_rate": 1.5274939172749393e-05, "loss": 0.5974, "step": 24834 }, { "epoch": 0.7250883185892383, "grad_norm": 0.4898302469740009, "learning_rate": 1.527331711273317e-05, "loss": 0.5336, "step": 24835 }, { "epoch": 0.7251175148170856, "grad_norm": 0.48694451772528363, "learning_rate": 1.527169505271695e-05, "loss": 0.5291, "step": 24836 }, { "epoch": 0.725146711044933, "grad_norm": 0.47156010380405405, "learning_rate": 1.5270072992700732e-05, "loss": 0.4897, "step": 24837 }, { "epoch": 0.7251759072727804, "grad_norm": 0.5637122627455798, "learning_rate": 1.526845093268451e-05, "loss": 0.6405, "step": 24838 }, { "epoch": 0.7252051035006277, "grad_norm": 0.5270748941149581, "learning_rate": 1.526682887266829e-05, "loss": 0.6283, "step": 24839 }, { "epoch": 0.7252342997284751, "grad_norm": 0.5592010227144272, "learning_rate": 1.526520681265207e-05, "loss": 0.6289, "step": 24840 }, { "epoch": 0.7252634959563224, "grad_norm": 0.5774273837806329, "learning_rate": 1.526358475263585e-05, "loss": 0.6462, "step": 24841 }, { "epoch": 0.7252926921841698, "grad_norm": 0.5344913338567917, "learning_rate": 1.5261962692619627e-05, "loss": 0.6468, "step": 24842 }, { "epoch": 0.7253218884120172, "grad_norm": 0.525924972181517, "learning_rate": 1.5260340632603405e-05, "loss": 0.592, "step": 24843 }, { "epoch": 0.7253510846398645, "grad_norm": 0.497061106619665, "learning_rate": 1.5258718572587185e-05, "loss": 0.5504, "step": 24844 }, { "epoch": 0.7253802808677119, "grad_norm": 0.5279328669652384, "learning_rate": 1.5257096512570965e-05, "loss": 0.6318, "step": 24845 }, { "epoch": 0.7254094770955593, "grad_norm": 0.5104768763313288, "learning_rate": 1.5255474452554747e-05, "loss": 0.5716, "step": 24846 }, { "epoch": 0.7254386733234066, "grad_norm": 0.5308809622547417, "learning_rate": 1.5253852392538526e-05, "loss": 0.6062, "step": 24847 }, { "epoch": 0.725467869551254, "grad_norm": 0.498628359982638, "learning_rate": 1.5252230332522304e-05, "loss": 0.5536, "step": 24848 }, { "epoch": 0.7254970657791013, "grad_norm": 0.5224965531372285, "learning_rate": 1.5250608272506084e-05, "loss": 0.6051, "step": 24849 }, { "epoch": 0.7255262620069487, "grad_norm": 0.5085965713120681, "learning_rate": 1.5248986212489862e-05, "loss": 0.5611, "step": 24850 }, { "epoch": 0.7255554582347961, "grad_norm": 0.5365980510228507, "learning_rate": 1.5247364152473643e-05, "loss": 0.5619, "step": 24851 }, { "epoch": 0.7255846544626434, "grad_norm": 0.5375751002160766, "learning_rate": 1.5245742092457421e-05, "loss": 0.5795, "step": 24852 }, { "epoch": 0.7256138506904908, "grad_norm": 0.5121403756701363, "learning_rate": 1.5244120032441201e-05, "loss": 0.6115, "step": 24853 }, { "epoch": 0.7256430469183381, "grad_norm": 0.540384448943619, "learning_rate": 1.524249797242498e-05, "loss": 0.6233, "step": 24854 }, { "epoch": 0.7256722431461855, "grad_norm": 0.5086725180176377, "learning_rate": 1.524087591240876e-05, "loss": 0.5385, "step": 24855 }, { "epoch": 0.7257014393740329, "grad_norm": 0.5170215451055891, "learning_rate": 1.5239253852392541e-05, "loss": 0.5818, "step": 24856 }, { "epoch": 0.7257306356018802, "grad_norm": 0.5021330153190547, "learning_rate": 1.523763179237632e-05, "loss": 0.5812, "step": 24857 }, { "epoch": 0.7257598318297276, "grad_norm": 0.5265931280294637, "learning_rate": 1.5236009732360098e-05, "loss": 0.6026, "step": 24858 }, { "epoch": 0.725789028057575, "grad_norm": 0.5175218001360404, "learning_rate": 1.5234387672343878e-05, "loss": 0.5529, "step": 24859 }, { "epoch": 0.7258182242854223, "grad_norm": 0.5205554268362101, "learning_rate": 1.5232765612327656e-05, "loss": 0.6081, "step": 24860 }, { "epoch": 0.7258474205132697, "grad_norm": 0.5332884190317052, "learning_rate": 1.5231143552311436e-05, "loss": 0.579, "step": 24861 }, { "epoch": 0.725876616741117, "grad_norm": 0.5083573236897729, "learning_rate": 1.5229521492295215e-05, "loss": 0.5875, "step": 24862 }, { "epoch": 0.7259058129689644, "grad_norm": 0.5520287673589004, "learning_rate": 1.5227899432278995e-05, "loss": 0.6494, "step": 24863 }, { "epoch": 0.7259350091968118, "grad_norm": 0.4775089748358585, "learning_rate": 1.5226277372262773e-05, "loss": 0.4395, "step": 24864 }, { "epoch": 0.7259642054246591, "grad_norm": 0.5845879372637913, "learning_rate": 1.5224655312246555e-05, "loss": 0.6744, "step": 24865 }, { "epoch": 0.7259934016525065, "grad_norm": 0.5337984711448346, "learning_rate": 1.5223033252230335e-05, "loss": 0.5867, "step": 24866 }, { "epoch": 0.7260225978803538, "grad_norm": 0.515503100886421, "learning_rate": 1.5221411192214113e-05, "loss": 0.6107, "step": 24867 }, { "epoch": 0.7260517941082012, "grad_norm": 0.5334245827293855, "learning_rate": 1.5219789132197892e-05, "loss": 0.5932, "step": 24868 }, { "epoch": 0.7260809903360486, "grad_norm": 0.5311446847185107, "learning_rate": 1.5218167072181672e-05, "loss": 0.5287, "step": 24869 }, { "epoch": 0.7261101865638959, "grad_norm": 0.4867289024475023, "learning_rate": 1.521654501216545e-05, "loss": 0.4864, "step": 24870 }, { "epoch": 0.7261393827917433, "grad_norm": 0.48942958185986346, "learning_rate": 1.521492295214923e-05, "loss": 0.5547, "step": 24871 }, { "epoch": 0.7261685790195906, "grad_norm": 0.4965788711872024, "learning_rate": 1.5213300892133009e-05, "loss": 0.5321, "step": 24872 }, { "epoch": 0.726197775247438, "grad_norm": 0.4751119518639608, "learning_rate": 1.5211678832116789e-05, "loss": 0.5281, "step": 24873 }, { "epoch": 0.7262269714752854, "grad_norm": 0.497468078349842, "learning_rate": 1.5210056772100567e-05, "loss": 0.5191, "step": 24874 }, { "epoch": 0.7262561677031327, "grad_norm": 0.5473612094469186, "learning_rate": 1.5208434712084349e-05, "loss": 0.6328, "step": 24875 }, { "epoch": 0.7262853639309801, "grad_norm": 0.5097690126354781, "learning_rate": 1.5206812652068127e-05, "loss": 0.5752, "step": 24876 }, { "epoch": 0.7263145601588274, "grad_norm": 0.5050350852744333, "learning_rate": 1.5205190592051907e-05, "loss": 0.5369, "step": 24877 }, { "epoch": 0.7263437563866748, "grad_norm": 0.5052259030342279, "learning_rate": 1.5203568532035686e-05, "loss": 0.5529, "step": 24878 }, { "epoch": 0.7263729526145222, "grad_norm": 0.5130200777993137, "learning_rate": 1.5201946472019466e-05, "loss": 0.5992, "step": 24879 }, { "epoch": 0.7264021488423695, "grad_norm": 0.5543024890868982, "learning_rate": 1.5200324412003244e-05, "loss": 0.6696, "step": 24880 }, { "epoch": 0.7264313450702169, "grad_norm": 0.48913598855211615, "learning_rate": 1.5198702351987024e-05, "loss": 0.5457, "step": 24881 }, { "epoch": 0.7264605412980643, "grad_norm": 0.5300527992576107, "learning_rate": 1.5197080291970803e-05, "loss": 0.6363, "step": 24882 }, { "epoch": 0.7264897375259116, "grad_norm": 0.48625843917880524, "learning_rate": 1.5195458231954583e-05, "loss": 0.5464, "step": 24883 }, { "epoch": 0.726518933753759, "grad_norm": 0.5146670020322035, "learning_rate": 1.5193836171938364e-05, "loss": 0.5916, "step": 24884 }, { "epoch": 0.7265481299816063, "grad_norm": 0.5199001601146287, "learning_rate": 1.5192214111922143e-05, "loss": 0.5743, "step": 24885 }, { "epoch": 0.7265773262094537, "grad_norm": 0.5452033238268436, "learning_rate": 1.5190592051905921e-05, "loss": 0.6277, "step": 24886 }, { "epoch": 0.7266065224373011, "grad_norm": 0.5394743986725646, "learning_rate": 1.5188969991889701e-05, "loss": 0.632, "step": 24887 }, { "epoch": 0.7266357186651484, "grad_norm": 0.4909091066910248, "learning_rate": 1.518734793187348e-05, "loss": 0.5484, "step": 24888 }, { "epoch": 0.7266649148929958, "grad_norm": 0.4903175416995841, "learning_rate": 1.518572587185726e-05, "loss": 0.559, "step": 24889 }, { "epoch": 0.7266941111208431, "grad_norm": 0.5765560833813019, "learning_rate": 1.5184103811841038e-05, "loss": 0.6383, "step": 24890 }, { "epoch": 0.7267233073486905, "grad_norm": 0.5447854551200199, "learning_rate": 1.5182481751824818e-05, "loss": 0.6234, "step": 24891 }, { "epoch": 0.7267525035765379, "grad_norm": 0.5771598615976915, "learning_rate": 1.5180859691808596e-05, "loss": 0.6612, "step": 24892 }, { "epoch": 0.7267816998043852, "grad_norm": 0.557655281770944, "learning_rate": 1.5179237631792375e-05, "loss": 0.6765, "step": 24893 }, { "epoch": 0.7268108960322326, "grad_norm": 0.5247587331123675, "learning_rate": 1.5177615571776158e-05, "loss": 0.6391, "step": 24894 }, { "epoch": 0.72684009226008, "grad_norm": 0.49807821739611025, "learning_rate": 1.5175993511759937e-05, "loss": 0.569, "step": 24895 }, { "epoch": 0.7268692884879273, "grad_norm": 0.5325201716124138, "learning_rate": 1.5174371451743715e-05, "loss": 0.6217, "step": 24896 }, { "epoch": 0.7268984847157747, "grad_norm": 0.48816197653833276, "learning_rate": 1.5172749391727495e-05, "loss": 0.5176, "step": 24897 }, { "epoch": 0.726927680943622, "grad_norm": 0.491290101849878, "learning_rate": 1.5171127331711274e-05, "loss": 0.538, "step": 24898 }, { "epoch": 0.7269568771714694, "grad_norm": 0.5211132042805873, "learning_rate": 1.5169505271695054e-05, "loss": 0.568, "step": 24899 }, { "epoch": 0.7269860733993168, "grad_norm": 0.510921395270463, "learning_rate": 1.5167883211678832e-05, "loss": 0.5814, "step": 24900 }, { "epoch": 0.7270152696271641, "grad_norm": 0.5111790258173176, "learning_rate": 1.5166261151662612e-05, "loss": 0.5476, "step": 24901 }, { "epoch": 0.7270444658550115, "grad_norm": 0.5086215293651516, "learning_rate": 1.516463909164639e-05, "loss": 0.5469, "step": 24902 }, { "epoch": 0.7270736620828588, "grad_norm": 0.5302296433393001, "learning_rate": 1.5163017031630172e-05, "loss": 0.6172, "step": 24903 }, { "epoch": 0.7271028583107062, "grad_norm": 0.5421340579439237, "learning_rate": 1.5161394971613952e-05, "loss": 0.6326, "step": 24904 }, { "epoch": 0.7271320545385537, "grad_norm": 0.5196807095185221, "learning_rate": 1.515977291159773e-05, "loss": 0.5983, "step": 24905 }, { "epoch": 0.727161250766401, "grad_norm": 0.5551691463186688, "learning_rate": 1.5158150851581509e-05, "loss": 0.6649, "step": 24906 }, { "epoch": 0.7271904469942484, "grad_norm": 0.5514608790633502, "learning_rate": 1.5156528791565289e-05, "loss": 0.6456, "step": 24907 }, { "epoch": 0.7272196432220958, "grad_norm": 0.4845016167557373, "learning_rate": 1.5154906731549067e-05, "loss": 0.5327, "step": 24908 }, { "epoch": 0.7272488394499431, "grad_norm": 0.4901880843171922, "learning_rate": 1.5153284671532847e-05, "loss": 0.5234, "step": 24909 }, { "epoch": 0.7272780356777905, "grad_norm": 0.505198695606939, "learning_rate": 1.5151662611516626e-05, "loss": 0.5353, "step": 24910 }, { "epoch": 0.7273072319056378, "grad_norm": 0.48377558110113533, "learning_rate": 1.5150040551500406e-05, "loss": 0.5313, "step": 24911 }, { "epoch": 0.7273364281334852, "grad_norm": 0.5356130031002405, "learning_rate": 1.5148418491484188e-05, "loss": 0.6095, "step": 24912 }, { "epoch": 0.7273656243613326, "grad_norm": 0.5508606869586318, "learning_rate": 1.5146796431467966e-05, "loss": 0.623, "step": 24913 }, { "epoch": 0.7273948205891799, "grad_norm": 0.5381443363239389, "learning_rate": 1.5145174371451744e-05, "loss": 0.6396, "step": 24914 }, { "epoch": 0.7274240168170273, "grad_norm": 0.5458030580735251, "learning_rate": 1.5143552311435525e-05, "loss": 0.5753, "step": 24915 }, { "epoch": 0.7274532130448746, "grad_norm": 0.4954634834786254, "learning_rate": 1.5141930251419303e-05, "loss": 0.5595, "step": 24916 }, { "epoch": 0.727482409272722, "grad_norm": 0.5213703806422869, "learning_rate": 1.5140308191403083e-05, "loss": 0.51, "step": 24917 }, { "epoch": 0.7275116055005694, "grad_norm": 0.5157730632995934, "learning_rate": 1.5138686131386861e-05, "loss": 0.5529, "step": 24918 }, { "epoch": 0.7275408017284167, "grad_norm": 0.5216262957117551, "learning_rate": 1.5137064071370641e-05, "loss": 0.6015, "step": 24919 }, { "epoch": 0.7275699979562641, "grad_norm": 0.567237062912874, "learning_rate": 1.513544201135442e-05, "loss": 0.6753, "step": 24920 }, { "epoch": 0.7275991941841115, "grad_norm": 0.4973715050084073, "learning_rate": 1.5133819951338198e-05, "loss": 0.5574, "step": 24921 }, { "epoch": 0.7276283904119588, "grad_norm": 0.49605027600825063, "learning_rate": 1.5132197891321982e-05, "loss": 0.559, "step": 24922 }, { "epoch": 0.7276575866398062, "grad_norm": 0.5106156996692072, "learning_rate": 1.513057583130576e-05, "loss": 0.5756, "step": 24923 }, { "epoch": 0.7276867828676535, "grad_norm": 0.5425247561640156, "learning_rate": 1.5128953771289538e-05, "loss": 0.6582, "step": 24924 }, { "epoch": 0.7277159790955009, "grad_norm": 0.5096474224648193, "learning_rate": 1.5127331711273318e-05, "loss": 0.6218, "step": 24925 }, { "epoch": 0.7277451753233483, "grad_norm": 0.5640441794982872, "learning_rate": 1.5125709651257097e-05, "loss": 0.5973, "step": 24926 }, { "epoch": 0.7277743715511956, "grad_norm": 0.4872135094701039, "learning_rate": 1.5124087591240877e-05, "loss": 0.5374, "step": 24927 }, { "epoch": 0.727803567779043, "grad_norm": 0.5243951611705219, "learning_rate": 1.5122465531224655e-05, "loss": 0.6046, "step": 24928 }, { "epoch": 0.7278327640068903, "grad_norm": 0.5139079557407868, "learning_rate": 1.5120843471208435e-05, "loss": 0.5876, "step": 24929 }, { "epoch": 0.7278619602347377, "grad_norm": 0.5567234380961491, "learning_rate": 1.5119221411192214e-05, "loss": 0.6553, "step": 24930 }, { "epoch": 0.7278911564625851, "grad_norm": 0.48507203221330564, "learning_rate": 1.5117599351175995e-05, "loss": 0.4945, "step": 24931 }, { "epoch": 0.7279203526904324, "grad_norm": 0.5485099903592705, "learning_rate": 1.5115977291159775e-05, "loss": 0.6014, "step": 24932 }, { "epoch": 0.7279495489182798, "grad_norm": 0.5138426728031881, "learning_rate": 1.5114355231143554e-05, "loss": 0.5757, "step": 24933 }, { "epoch": 0.7279787451461271, "grad_norm": 0.5302074948364645, "learning_rate": 1.5112733171127332e-05, "loss": 0.5849, "step": 24934 }, { "epoch": 0.7280079413739745, "grad_norm": 0.5192841688192368, "learning_rate": 1.5111111111111112e-05, "loss": 0.5815, "step": 24935 }, { "epoch": 0.7280371376018219, "grad_norm": 0.5516667970711426, "learning_rate": 1.510948905109489e-05, "loss": 0.6228, "step": 24936 }, { "epoch": 0.7280663338296692, "grad_norm": 0.4940851298870547, "learning_rate": 1.510786699107867e-05, "loss": 0.5396, "step": 24937 }, { "epoch": 0.7280955300575166, "grad_norm": 0.5084452972862359, "learning_rate": 1.5106244931062449e-05, "loss": 0.5728, "step": 24938 }, { "epoch": 0.728124726285364, "grad_norm": 0.4879992372033486, "learning_rate": 1.510462287104623e-05, "loss": 0.4839, "step": 24939 }, { "epoch": 0.7281539225132113, "grad_norm": 0.5077037320469152, "learning_rate": 1.5103000811030008e-05, "loss": 0.5905, "step": 24940 }, { "epoch": 0.7281831187410587, "grad_norm": 0.5396846345464535, "learning_rate": 1.510137875101379e-05, "loss": 0.6512, "step": 24941 }, { "epoch": 0.728212314968906, "grad_norm": 0.564683021433975, "learning_rate": 1.5099756690997568e-05, "loss": 0.6075, "step": 24942 }, { "epoch": 0.7282415111967534, "grad_norm": 0.5752890192884, "learning_rate": 1.5098134630981348e-05, "loss": 0.7283, "step": 24943 }, { "epoch": 0.7282707074246008, "grad_norm": 0.5043864897193308, "learning_rate": 1.5096512570965126e-05, "loss": 0.5654, "step": 24944 }, { "epoch": 0.7282999036524481, "grad_norm": 0.47366485290399973, "learning_rate": 1.5094890510948906e-05, "loss": 0.4914, "step": 24945 }, { "epoch": 0.7283290998802955, "grad_norm": 0.5904198348521749, "learning_rate": 1.5093268450932685e-05, "loss": 0.7003, "step": 24946 }, { "epoch": 0.7283582961081428, "grad_norm": 0.5048387800057291, "learning_rate": 1.5091646390916465e-05, "loss": 0.5211, "step": 24947 }, { "epoch": 0.7283874923359902, "grad_norm": 0.5277728020815596, "learning_rate": 1.5090024330900243e-05, "loss": 0.6228, "step": 24948 }, { "epoch": 0.7284166885638376, "grad_norm": 0.5884728339738965, "learning_rate": 1.5088402270884023e-05, "loss": 0.6576, "step": 24949 }, { "epoch": 0.7284458847916849, "grad_norm": 0.53077254194349, "learning_rate": 1.5086780210867805e-05, "loss": 0.5956, "step": 24950 }, { "epoch": 0.7284750810195323, "grad_norm": 0.5691403418607559, "learning_rate": 1.5085158150851583e-05, "loss": 0.6829, "step": 24951 }, { "epoch": 0.7285042772473797, "grad_norm": 0.5622905371176068, "learning_rate": 1.5083536090835362e-05, "loss": 0.6565, "step": 24952 }, { "epoch": 0.728533473475227, "grad_norm": 0.48632876617449217, "learning_rate": 1.5081914030819142e-05, "loss": 0.5319, "step": 24953 }, { "epoch": 0.7285626697030744, "grad_norm": 0.51237149510148, "learning_rate": 1.508029197080292e-05, "loss": 0.584, "step": 24954 }, { "epoch": 0.7285918659309217, "grad_norm": 0.4905935837694441, "learning_rate": 1.50786699107867e-05, "loss": 0.5626, "step": 24955 }, { "epoch": 0.7286210621587691, "grad_norm": 0.5178998870775287, "learning_rate": 1.5077047850770478e-05, "loss": 0.5879, "step": 24956 }, { "epoch": 0.7286502583866165, "grad_norm": 0.5048131188461124, "learning_rate": 1.5075425790754259e-05, "loss": 0.5628, "step": 24957 }, { "epoch": 0.7286794546144638, "grad_norm": 0.5445298697048558, "learning_rate": 1.5073803730738037e-05, "loss": 0.6126, "step": 24958 }, { "epoch": 0.7287086508423112, "grad_norm": 0.523858639881809, "learning_rate": 1.5072181670721815e-05, "loss": 0.6046, "step": 24959 }, { "epoch": 0.7287378470701585, "grad_norm": 0.49498835297670835, "learning_rate": 1.5070559610705599e-05, "loss": 0.5285, "step": 24960 }, { "epoch": 0.7287670432980059, "grad_norm": 0.5604303856260597, "learning_rate": 1.5068937550689377e-05, "loss": 0.6384, "step": 24961 }, { "epoch": 0.7287962395258533, "grad_norm": 0.521774618084868, "learning_rate": 1.5067315490673156e-05, "loss": 0.6253, "step": 24962 }, { "epoch": 0.7288254357537006, "grad_norm": 0.5103881563594768, "learning_rate": 1.5065693430656936e-05, "loss": 0.5577, "step": 24963 }, { "epoch": 0.728854631981548, "grad_norm": 0.5120437640451506, "learning_rate": 1.5064071370640714e-05, "loss": 0.6063, "step": 24964 }, { "epoch": 0.7288838282093953, "grad_norm": 0.5248510672097295, "learning_rate": 1.5062449310624494e-05, "loss": 0.6168, "step": 24965 }, { "epoch": 0.7289130244372427, "grad_norm": 0.5509756916103715, "learning_rate": 1.5060827250608272e-05, "loss": 0.6469, "step": 24966 }, { "epoch": 0.7289422206650901, "grad_norm": 0.5142937851018279, "learning_rate": 1.5059205190592052e-05, "loss": 0.5727, "step": 24967 }, { "epoch": 0.7289714168929374, "grad_norm": 0.5515582185572736, "learning_rate": 1.505758313057583e-05, "loss": 0.6364, "step": 24968 }, { "epoch": 0.7290006131207848, "grad_norm": 0.5682895853374997, "learning_rate": 1.5055961070559613e-05, "loss": 0.6649, "step": 24969 }, { "epoch": 0.7290298093486322, "grad_norm": 0.5322805441698293, "learning_rate": 1.5054339010543391e-05, "loss": 0.6465, "step": 24970 }, { "epoch": 0.7290590055764795, "grad_norm": 0.5028542609655128, "learning_rate": 1.5052716950527171e-05, "loss": 0.5248, "step": 24971 }, { "epoch": 0.7290882018043269, "grad_norm": 0.4987547622801925, "learning_rate": 1.505109489051095e-05, "loss": 0.5112, "step": 24972 }, { "epoch": 0.7291173980321742, "grad_norm": 0.4944301526506465, "learning_rate": 1.504947283049473e-05, "loss": 0.5396, "step": 24973 }, { "epoch": 0.7291465942600216, "grad_norm": 0.5136175818695062, "learning_rate": 1.5047850770478508e-05, "loss": 0.5496, "step": 24974 }, { "epoch": 0.729175790487869, "grad_norm": 0.504970173286425, "learning_rate": 1.5046228710462288e-05, "loss": 0.5469, "step": 24975 }, { "epoch": 0.7292049867157163, "grad_norm": 0.5390229249841428, "learning_rate": 1.5044606650446066e-05, "loss": 0.6092, "step": 24976 }, { "epoch": 0.7292341829435637, "grad_norm": 0.47369778863050893, "learning_rate": 1.5042984590429846e-05, "loss": 0.4823, "step": 24977 }, { "epoch": 0.729263379171411, "grad_norm": 0.5600972001136659, "learning_rate": 1.5041362530413625e-05, "loss": 0.6618, "step": 24978 }, { "epoch": 0.7292925753992584, "grad_norm": 0.5101255938807392, "learning_rate": 1.5039740470397407e-05, "loss": 0.6022, "step": 24979 }, { "epoch": 0.7293217716271058, "grad_norm": 0.49092579014145704, "learning_rate": 1.5038118410381185e-05, "loss": 0.5121, "step": 24980 }, { "epoch": 0.7293509678549531, "grad_norm": 0.48885356792768175, "learning_rate": 1.5036496350364965e-05, "loss": 0.5432, "step": 24981 }, { "epoch": 0.7293801640828005, "grad_norm": 0.477766901692584, "learning_rate": 1.5034874290348743e-05, "loss": 0.5025, "step": 24982 }, { "epoch": 0.7294093603106478, "grad_norm": 0.5048524635155266, "learning_rate": 1.5033252230332523e-05, "loss": 0.5725, "step": 24983 }, { "epoch": 0.7294385565384952, "grad_norm": 0.5483144150408911, "learning_rate": 1.5031630170316302e-05, "loss": 0.5984, "step": 24984 }, { "epoch": 0.7294677527663426, "grad_norm": 0.4871141447452081, "learning_rate": 1.5030008110300082e-05, "loss": 0.5286, "step": 24985 }, { "epoch": 0.7294969489941899, "grad_norm": 0.5138719485524168, "learning_rate": 1.502838605028386e-05, "loss": 0.573, "step": 24986 }, { "epoch": 0.7295261452220373, "grad_norm": 0.528420644065099, "learning_rate": 1.5026763990267639e-05, "loss": 0.5988, "step": 24987 }, { "epoch": 0.7295553414498847, "grad_norm": 0.5212892414503484, "learning_rate": 1.5025141930251422e-05, "loss": 0.576, "step": 24988 }, { "epoch": 0.729584537677732, "grad_norm": 0.5376727613666018, "learning_rate": 1.50235198702352e-05, "loss": 0.6696, "step": 24989 }, { "epoch": 0.7296137339055794, "grad_norm": 0.5596141146671653, "learning_rate": 1.5021897810218979e-05, "loss": 0.6421, "step": 24990 }, { "epoch": 0.7296429301334267, "grad_norm": 0.5122880777560395, "learning_rate": 1.5020275750202759e-05, "loss": 0.5966, "step": 24991 }, { "epoch": 0.7296721263612741, "grad_norm": 0.536770326404974, "learning_rate": 1.5018653690186537e-05, "loss": 0.634, "step": 24992 }, { "epoch": 0.7297013225891215, "grad_norm": 0.5245458778036857, "learning_rate": 1.5017031630170317e-05, "loss": 0.5757, "step": 24993 }, { "epoch": 0.7297305188169688, "grad_norm": 0.5454007151101473, "learning_rate": 1.5015409570154096e-05, "loss": 0.6132, "step": 24994 }, { "epoch": 0.7297597150448162, "grad_norm": 0.5230463879970295, "learning_rate": 1.5013787510137876e-05, "loss": 0.586, "step": 24995 }, { "epoch": 0.7297889112726635, "grad_norm": 0.5420856448855954, "learning_rate": 1.5012165450121654e-05, "loss": 0.6046, "step": 24996 }, { "epoch": 0.7298181075005109, "grad_norm": 0.529450579948636, "learning_rate": 1.5010543390105436e-05, "loss": 0.5995, "step": 24997 }, { "epoch": 0.7298473037283583, "grad_norm": 0.49226721864861434, "learning_rate": 1.5008921330089214e-05, "loss": 0.5068, "step": 24998 }, { "epoch": 0.7298764999562056, "grad_norm": 0.5193917866468194, "learning_rate": 1.5007299270072994e-05, "loss": 0.5903, "step": 24999 }, { "epoch": 0.729905696184053, "grad_norm": 0.5434575462212672, "learning_rate": 1.5005677210056773e-05, "loss": 0.6555, "step": 25000 }, { "epoch": 0.7299348924119003, "grad_norm": 0.4962187011436665, "learning_rate": 1.5004055150040553e-05, "loss": 0.55, "step": 25001 }, { "epoch": 0.7299640886397477, "grad_norm": 0.5145021520100201, "learning_rate": 1.5002433090024331e-05, "loss": 0.5936, "step": 25002 }, { "epoch": 0.7299932848675951, "grad_norm": 0.48907022585515925, "learning_rate": 1.5000811030008111e-05, "loss": 0.5447, "step": 25003 }, { "epoch": 0.7300224810954424, "grad_norm": 0.5382761870366544, "learning_rate": 1.499918896999189e-05, "loss": 0.6265, "step": 25004 }, { "epoch": 0.7300516773232898, "grad_norm": 0.5143390196911702, "learning_rate": 1.499756690997567e-05, "loss": 0.5626, "step": 25005 }, { "epoch": 0.7300808735511372, "grad_norm": 0.5285770957108165, "learning_rate": 1.4995944849959448e-05, "loss": 0.636, "step": 25006 }, { "epoch": 0.7301100697789845, "grad_norm": 0.5379588319137386, "learning_rate": 1.499432278994323e-05, "loss": 0.6376, "step": 25007 }, { "epoch": 0.7301392660068319, "grad_norm": 0.5034877389576206, "learning_rate": 1.4992700729927008e-05, "loss": 0.5813, "step": 25008 }, { "epoch": 0.7301684622346792, "grad_norm": 0.5319004501941254, "learning_rate": 1.4991078669910788e-05, "loss": 0.5636, "step": 25009 }, { "epoch": 0.7301976584625266, "grad_norm": 0.5891013903179452, "learning_rate": 1.4989456609894567e-05, "loss": 0.6882, "step": 25010 }, { "epoch": 0.730226854690374, "grad_norm": 0.4663674486961688, "learning_rate": 1.4987834549878347e-05, "loss": 0.4739, "step": 25011 }, { "epoch": 0.7302560509182213, "grad_norm": 0.5618051912366199, "learning_rate": 1.4986212489862125e-05, "loss": 0.6887, "step": 25012 }, { "epoch": 0.7302852471460687, "grad_norm": 0.520671137925785, "learning_rate": 1.4984590429845905e-05, "loss": 0.6318, "step": 25013 }, { "epoch": 0.730314443373916, "grad_norm": 0.5314076816094941, "learning_rate": 1.4982968369829683e-05, "loss": 0.5935, "step": 25014 }, { "epoch": 0.7303436396017634, "grad_norm": 0.5315604939539093, "learning_rate": 1.4981346309813462e-05, "loss": 0.6094, "step": 25015 }, { "epoch": 0.7303728358296108, "grad_norm": 0.45264620763113217, "learning_rate": 1.4979724249797245e-05, "loss": 0.4524, "step": 25016 }, { "epoch": 0.7304020320574581, "grad_norm": 0.510537035875441, "learning_rate": 1.4978102189781024e-05, "loss": 0.5708, "step": 25017 }, { "epoch": 0.7304312282853055, "grad_norm": 0.562759367412109, "learning_rate": 1.4976480129764802e-05, "loss": 0.6775, "step": 25018 }, { "epoch": 0.7304604245131529, "grad_norm": 0.5096322534825767, "learning_rate": 1.4974858069748582e-05, "loss": 0.5599, "step": 25019 }, { "epoch": 0.7304896207410002, "grad_norm": 0.5495425904716488, "learning_rate": 1.497323600973236e-05, "loss": 0.6039, "step": 25020 }, { "epoch": 0.7305188169688476, "grad_norm": 0.5266455113401168, "learning_rate": 1.497161394971614e-05, "loss": 0.5553, "step": 25021 }, { "epoch": 0.7305480131966949, "grad_norm": 0.541651586695769, "learning_rate": 1.4969991889699919e-05, "loss": 0.6745, "step": 25022 }, { "epoch": 0.7305772094245423, "grad_norm": 0.49791358323795387, "learning_rate": 1.4968369829683699e-05, "loss": 0.5334, "step": 25023 }, { "epoch": 0.7306064056523897, "grad_norm": 0.5415310104028951, "learning_rate": 1.4966747769667477e-05, "loss": 0.6212, "step": 25024 }, { "epoch": 0.7306356018802371, "grad_norm": 0.5068679178601544, "learning_rate": 1.4965125709651256e-05, "loss": 0.5787, "step": 25025 }, { "epoch": 0.7306647981080845, "grad_norm": 0.5137704186900646, "learning_rate": 1.496350364963504e-05, "loss": 0.5924, "step": 25026 }, { "epoch": 0.7306939943359319, "grad_norm": 0.4888694639764962, "learning_rate": 1.4961881589618818e-05, "loss": 0.5313, "step": 25027 }, { "epoch": 0.7307231905637792, "grad_norm": 0.5310116816999548, "learning_rate": 1.4960259529602596e-05, "loss": 0.5857, "step": 25028 }, { "epoch": 0.7307523867916266, "grad_norm": 0.5247526235194249, "learning_rate": 1.4958637469586376e-05, "loss": 0.6142, "step": 25029 }, { "epoch": 0.7307815830194739, "grad_norm": 0.5022673521931741, "learning_rate": 1.4957015409570154e-05, "loss": 0.5507, "step": 25030 }, { "epoch": 0.7308107792473213, "grad_norm": 0.5491347333271006, "learning_rate": 1.4955393349553934e-05, "loss": 0.6469, "step": 25031 }, { "epoch": 0.7308399754751687, "grad_norm": 0.5346041466335811, "learning_rate": 1.4953771289537713e-05, "loss": 0.5979, "step": 25032 }, { "epoch": 0.730869171703016, "grad_norm": 0.5306400100308918, "learning_rate": 1.4952149229521493e-05, "loss": 0.6078, "step": 25033 }, { "epoch": 0.7308983679308634, "grad_norm": 0.5640195493532016, "learning_rate": 1.4950527169505271e-05, "loss": 0.5889, "step": 25034 }, { "epoch": 0.7309275641587107, "grad_norm": 0.5344450120249241, "learning_rate": 1.4948905109489053e-05, "loss": 0.6009, "step": 25035 }, { "epoch": 0.7309567603865581, "grad_norm": 0.47950448189129663, "learning_rate": 1.4947283049472831e-05, "loss": 0.5315, "step": 25036 }, { "epoch": 0.7309859566144055, "grad_norm": 0.560651746958295, "learning_rate": 1.4945660989456611e-05, "loss": 0.6368, "step": 25037 }, { "epoch": 0.7310151528422528, "grad_norm": 0.5324985847657941, "learning_rate": 1.494403892944039e-05, "loss": 0.6399, "step": 25038 }, { "epoch": 0.7310443490701002, "grad_norm": 0.5271814963655325, "learning_rate": 1.494241686942417e-05, "loss": 0.5948, "step": 25039 }, { "epoch": 0.7310735452979475, "grad_norm": 0.5825286345646881, "learning_rate": 1.4940794809407948e-05, "loss": 0.6805, "step": 25040 }, { "epoch": 0.7311027415257949, "grad_norm": 0.5047489241853296, "learning_rate": 1.4939172749391728e-05, "loss": 0.5653, "step": 25041 }, { "epoch": 0.7311319377536423, "grad_norm": 0.574488770537013, "learning_rate": 1.4937550689375507e-05, "loss": 0.6746, "step": 25042 }, { "epoch": 0.7311611339814896, "grad_norm": 0.5352172159188265, "learning_rate": 1.4935928629359285e-05, "loss": 0.5878, "step": 25043 }, { "epoch": 0.731190330209337, "grad_norm": 0.5481194259546122, "learning_rate": 1.4934306569343065e-05, "loss": 0.6484, "step": 25044 }, { "epoch": 0.7312195264371844, "grad_norm": 0.5035033759951917, "learning_rate": 1.4932684509326847e-05, "loss": 0.5369, "step": 25045 }, { "epoch": 0.7312487226650317, "grad_norm": 0.5113063326653998, "learning_rate": 1.4931062449310625e-05, "loss": 0.5963, "step": 25046 }, { "epoch": 0.7312779188928791, "grad_norm": 0.48307557002260554, "learning_rate": 1.4929440389294405e-05, "loss": 0.527, "step": 25047 }, { "epoch": 0.7313071151207264, "grad_norm": 0.5197517046104506, "learning_rate": 1.4927818329278184e-05, "loss": 0.5707, "step": 25048 }, { "epoch": 0.7313363113485738, "grad_norm": 0.5732827348152146, "learning_rate": 1.4926196269261964e-05, "loss": 0.6803, "step": 25049 }, { "epoch": 0.7313655075764212, "grad_norm": 0.4811223554983274, "learning_rate": 1.4924574209245742e-05, "loss": 0.5203, "step": 25050 }, { "epoch": 0.7313947038042685, "grad_norm": 0.5146026641345194, "learning_rate": 1.4922952149229522e-05, "loss": 0.5534, "step": 25051 }, { "epoch": 0.7314239000321159, "grad_norm": 0.5687551752710298, "learning_rate": 1.49213300892133e-05, "loss": 0.654, "step": 25052 }, { "epoch": 0.7314530962599632, "grad_norm": 0.502651291324754, "learning_rate": 1.4919708029197079e-05, "loss": 0.5733, "step": 25053 }, { "epoch": 0.7314822924878106, "grad_norm": 0.559973104723245, "learning_rate": 1.4918085969180862e-05, "loss": 0.5228, "step": 25054 }, { "epoch": 0.731511488715658, "grad_norm": 0.5105255230502991, "learning_rate": 1.4916463909164641e-05, "loss": 0.5587, "step": 25055 }, { "epoch": 0.7315406849435053, "grad_norm": 0.4988133501494853, "learning_rate": 1.491484184914842e-05, "loss": 0.5716, "step": 25056 }, { "epoch": 0.7315698811713527, "grad_norm": 0.5186616377340136, "learning_rate": 1.49132197891322e-05, "loss": 0.6168, "step": 25057 }, { "epoch": 0.7315990773992, "grad_norm": 0.5186947451254735, "learning_rate": 1.4911597729115978e-05, "loss": 0.5469, "step": 25058 }, { "epoch": 0.7316282736270474, "grad_norm": 0.5041137020619691, "learning_rate": 1.4909975669099758e-05, "loss": 0.5611, "step": 25059 }, { "epoch": 0.7316574698548948, "grad_norm": 0.5004762871639993, "learning_rate": 1.4908353609083536e-05, "loss": 0.5586, "step": 25060 }, { "epoch": 0.7316866660827421, "grad_norm": 0.5365435414163422, "learning_rate": 1.4906731549067316e-05, "loss": 0.6461, "step": 25061 }, { "epoch": 0.7317158623105895, "grad_norm": 0.5171288571331553, "learning_rate": 1.4905109489051095e-05, "loss": 0.6238, "step": 25062 }, { "epoch": 0.7317450585384369, "grad_norm": 0.5022374087742036, "learning_rate": 1.4903487429034876e-05, "loss": 0.4948, "step": 25063 }, { "epoch": 0.7317742547662842, "grad_norm": 0.514032213840948, "learning_rate": 1.4901865369018655e-05, "loss": 0.5539, "step": 25064 }, { "epoch": 0.7318034509941316, "grad_norm": 0.48606065196857007, "learning_rate": 1.4900243309002435e-05, "loss": 0.5151, "step": 25065 }, { "epoch": 0.7318326472219789, "grad_norm": 0.49910056350596466, "learning_rate": 1.4898621248986213e-05, "loss": 0.5404, "step": 25066 }, { "epoch": 0.7318618434498263, "grad_norm": 0.5304114098728059, "learning_rate": 1.4896999188969993e-05, "loss": 0.5951, "step": 25067 }, { "epoch": 0.7318910396776737, "grad_norm": 0.49383692567980786, "learning_rate": 1.4895377128953772e-05, "loss": 0.5185, "step": 25068 }, { "epoch": 0.731920235905521, "grad_norm": 0.5156165551993619, "learning_rate": 1.4893755068937552e-05, "loss": 0.5934, "step": 25069 }, { "epoch": 0.7319494321333684, "grad_norm": 0.6220994062285089, "learning_rate": 1.489213300892133e-05, "loss": 0.6302, "step": 25070 }, { "epoch": 0.7319786283612157, "grad_norm": 0.5516613741535193, "learning_rate": 1.4890510948905108e-05, "loss": 0.6685, "step": 25071 }, { "epoch": 0.7320078245890631, "grad_norm": 0.5073935240548633, "learning_rate": 1.4888888888888888e-05, "loss": 0.5887, "step": 25072 }, { "epoch": 0.7320370208169105, "grad_norm": 0.4854307668173422, "learning_rate": 1.488726682887267e-05, "loss": 0.5236, "step": 25073 }, { "epoch": 0.7320662170447578, "grad_norm": 0.4980282811412634, "learning_rate": 1.4885644768856449e-05, "loss": 0.5752, "step": 25074 }, { "epoch": 0.7320954132726052, "grad_norm": 0.49770974839560417, "learning_rate": 1.4884022708840229e-05, "loss": 0.5411, "step": 25075 }, { "epoch": 0.7321246095004526, "grad_norm": 0.5839426422093023, "learning_rate": 1.4882400648824007e-05, "loss": 0.7031, "step": 25076 }, { "epoch": 0.7321538057282999, "grad_norm": 0.48862163327374186, "learning_rate": 1.4880778588807787e-05, "loss": 0.5266, "step": 25077 }, { "epoch": 0.7321830019561473, "grad_norm": 0.5526143410193339, "learning_rate": 1.4879156528791565e-05, "loss": 0.6438, "step": 25078 }, { "epoch": 0.7322121981839946, "grad_norm": 0.5401656998217559, "learning_rate": 1.4877534468775346e-05, "loss": 0.6529, "step": 25079 }, { "epoch": 0.732241394411842, "grad_norm": 0.505545046420485, "learning_rate": 1.4875912408759124e-05, "loss": 0.5834, "step": 25080 }, { "epoch": 0.7322705906396894, "grad_norm": 0.5316057948786579, "learning_rate": 1.4874290348742902e-05, "loss": 0.6001, "step": 25081 }, { "epoch": 0.7322997868675367, "grad_norm": 0.5168695322754296, "learning_rate": 1.4872668288726686e-05, "loss": 0.5442, "step": 25082 }, { "epoch": 0.7323289830953841, "grad_norm": 0.531461398675186, "learning_rate": 1.4871046228710464e-05, "loss": 0.6033, "step": 25083 }, { "epoch": 0.7323581793232314, "grad_norm": 0.5587546447690293, "learning_rate": 1.4869424168694242e-05, "loss": 0.6639, "step": 25084 }, { "epoch": 0.7323873755510788, "grad_norm": 0.4937554925864886, "learning_rate": 1.4867802108678023e-05, "loss": 0.5501, "step": 25085 }, { "epoch": 0.7324165717789262, "grad_norm": 0.5387117714343287, "learning_rate": 1.4866180048661801e-05, "loss": 0.6108, "step": 25086 }, { "epoch": 0.7324457680067735, "grad_norm": 0.5270364844479819, "learning_rate": 1.4864557988645581e-05, "loss": 0.6109, "step": 25087 }, { "epoch": 0.7324749642346209, "grad_norm": 0.5468597770748105, "learning_rate": 1.486293592862936e-05, "loss": 0.6482, "step": 25088 }, { "epoch": 0.7325041604624682, "grad_norm": 0.5015925104768522, "learning_rate": 1.486131386861314e-05, "loss": 0.5572, "step": 25089 }, { "epoch": 0.7325333566903156, "grad_norm": 0.5000864896752636, "learning_rate": 1.4859691808596918e-05, "loss": 0.551, "step": 25090 }, { "epoch": 0.732562552918163, "grad_norm": 0.4955455055071357, "learning_rate": 1.4858069748580696e-05, "loss": 0.5736, "step": 25091 }, { "epoch": 0.7325917491460103, "grad_norm": 0.5124871615280893, "learning_rate": 1.4856447688564478e-05, "loss": 0.6041, "step": 25092 }, { "epoch": 0.7326209453738577, "grad_norm": 0.47212066361500743, "learning_rate": 1.4854825628548258e-05, "loss": 0.4943, "step": 25093 }, { "epoch": 0.732650141601705, "grad_norm": 0.4931519773577633, "learning_rate": 1.4853203568532036e-05, "loss": 0.5022, "step": 25094 }, { "epoch": 0.7326793378295524, "grad_norm": 0.5086502622400763, "learning_rate": 1.4851581508515816e-05, "loss": 0.5521, "step": 25095 }, { "epoch": 0.7327085340573998, "grad_norm": 0.5403899958913857, "learning_rate": 1.4849959448499595e-05, "loss": 0.6792, "step": 25096 }, { "epoch": 0.7327377302852471, "grad_norm": 0.4833463205118353, "learning_rate": 1.4848337388483375e-05, "loss": 0.5175, "step": 25097 }, { "epoch": 0.7327669265130945, "grad_norm": 0.5408566822921768, "learning_rate": 1.4846715328467153e-05, "loss": 0.6262, "step": 25098 }, { "epoch": 0.7327961227409419, "grad_norm": 0.5521603442570365, "learning_rate": 1.4845093268450933e-05, "loss": 0.6694, "step": 25099 }, { "epoch": 0.7328253189687892, "grad_norm": 0.5042904988561111, "learning_rate": 1.4843471208434712e-05, "loss": 0.5738, "step": 25100 }, { "epoch": 0.7328545151966366, "grad_norm": 0.5440600480495722, "learning_rate": 1.4841849148418493e-05, "loss": 0.6528, "step": 25101 }, { "epoch": 0.7328837114244839, "grad_norm": 0.5453613900211626, "learning_rate": 1.4840227088402272e-05, "loss": 0.6096, "step": 25102 }, { "epoch": 0.7329129076523313, "grad_norm": 0.46119713165123805, "learning_rate": 1.4838605028386052e-05, "loss": 0.486, "step": 25103 }, { "epoch": 0.7329421038801787, "grad_norm": 0.5263558813734045, "learning_rate": 1.483698296836983e-05, "loss": 0.6179, "step": 25104 }, { "epoch": 0.732971300108026, "grad_norm": 0.5268619367833884, "learning_rate": 1.483536090835361e-05, "loss": 0.591, "step": 25105 }, { "epoch": 0.7330004963358734, "grad_norm": 0.5251732424230169, "learning_rate": 1.4833738848337389e-05, "loss": 0.6687, "step": 25106 }, { "epoch": 0.7330296925637207, "grad_norm": 0.5347157870021106, "learning_rate": 1.4832116788321169e-05, "loss": 0.6233, "step": 25107 }, { "epoch": 0.7330588887915681, "grad_norm": 0.4854387734854489, "learning_rate": 1.4830494728304947e-05, "loss": 0.5446, "step": 25108 }, { "epoch": 0.7330880850194155, "grad_norm": 0.547319734444069, "learning_rate": 1.4828872668288726e-05, "loss": 0.5808, "step": 25109 }, { "epoch": 0.7331172812472628, "grad_norm": 0.5750924476288817, "learning_rate": 1.4827250608272506e-05, "loss": 0.6817, "step": 25110 }, { "epoch": 0.7331464774751102, "grad_norm": 0.5376870861997967, "learning_rate": 1.4825628548256287e-05, "loss": 0.6309, "step": 25111 }, { "epoch": 0.7331756737029576, "grad_norm": 0.5199217671922762, "learning_rate": 1.4824006488240066e-05, "loss": 0.5727, "step": 25112 }, { "epoch": 0.7332048699308049, "grad_norm": 0.5401298438048041, "learning_rate": 1.4822384428223846e-05, "loss": 0.5539, "step": 25113 }, { "epoch": 0.7332340661586523, "grad_norm": 0.5202954565698185, "learning_rate": 1.4820762368207624e-05, "loss": 0.5834, "step": 25114 }, { "epoch": 0.7332632623864996, "grad_norm": 0.5177102986043757, "learning_rate": 1.4819140308191404e-05, "loss": 0.5919, "step": 25115 }, { "epoch": 0.733292458614347, "grad_norm": 0.5461054550865464, "learning_rate": 1.4817518248175183e-05, "loss": 0.628, "step": 25116 }, { "epoch": 0.7333216548421944, "grad_norm": 0.5239876692697888, "learning_rate": 1.4815896188158963e-05, "loss": 0.5925, "step": 25117 }, { "epoch": 0.7333508510700417, "grad_norm": 0.5040920718097377, "learning_rate": 1.4814274128142741e-05, "loss": 0.5861, "step": 25118 }, { "epoch": 0.7333800472978891, "grad_norm": 0.5075107904372498, "learning_rate": 1.481265206812652e-05, "loss": 0.5804, "step": 25119 }, { "epoch": 0.7334092435257364, "grad_norm": 0.5044966213773979, "learning_rate": 1.4811030008110301e-05, "loss": 0.5875, "step": 25120 }, { "epoch": 0.7334384397535838, "grad_norm": 0.5307075153809799, "learning_rate": 1.4809407948094081e-05, "loss": 0.6291, "step": 25121 }, { "epoch": 0.7334676359814312, "grad_norm": 0.551840483229981, "learning_rate": 1.480778588807786e-05, "loss": 0.6552, "step": 25122 }, { "epoch": 0.7334968322092785, "grad_norm": 0.5301602421015434, "learning_rate": 1.480616382806164e-05, "loss": 0.6139, "step": 25123 }, { "epoch": 0.7335260284371259, "grad_norm": 0.526421935974306, "learning_rate": 1.4804541768045418e-05, "loss": 0.6081, "step": 25124 }, { "epoch": 0.7335552246649732, "grad_norm": 0.5077187182293411, "learning_rate": 1.4802919708029198e-05, "loss": 0.5639, "step": 25125 }, { "epoch": 0.7335844208928206, "grad_norm": 0.5157482178517536, "learning_rate": 1.4801297648012977e-05, "loss": 0.5912, "step": 25126 }, { "epoch": 0.733613617120668, "grad_norm": 0.5098361685913351, "learning_rate": 1.4799675587996757e-05, "loss": 0.5883, "step": 25127 }, { "epoch": 0.7336428133485153, "grad_norm": 0.48943795113339267, "learning_rate": 1.4798053527980535e-05, "loss": 0.5207, "step": 25128 }, { "epoch": 0.7336720095763627, "grad_norm": 0.4695284903986368, "learning_rate": 1.4796431467964313e-05, "loss": 0.465, "step": 25129 }, { "epoch": 0.73370120580421, "grad_norm": 0.5305210810301494, "learning_rate": 1.4794809407948095e-05, "loss": 0.6129, "step": 25130 }, { "epoch": 0.7337304020320574, "grad_norm": 0.4981576502202949, "learning_rate": 1.4793187347931875e-05, "loss": 0.5016, "step": 25131 }, { "epoch": 0.7337595982599048, "grad_norm": 0.5487320664093785, "learning_rate": 1.4791565287915654e-05, "loss": 0.618, "step": 25132 }, { "epoch": 0.7337887944877521, "grad_norm": 0.5283484419026729, "learning_rate": 1.4789943227899434e-05, "loss": 0.6176, "step": 25133 }, { "epoch": 0.7338179907155995, "grad_norm": 0.5558256365022265, "learning_rate": 1.4788321167883212e-05, "loss": 0.6975, "step": 25134 }, { "epoch": 0.7338471869434469, "grad_norm": 0.528356649969243, "learning_rate": 1.4786699107866992e-05, "loss": 0.588, "step": 25135 }, { "epoch": 0.7338763831712942, "grad_norm": 0.5440599717671857, "learning_rate": 1.478507704785077e-05, "loss": 0.6522, "step": 25136 }, { "epoch": 0.7339055793991416, "grad_norm": 0.5217744507850908, "learning_rate": 1.4783454987834549e-05, "loss": 0.6139, "step": 25137 }, { "epoch": 0.7339347756269889, "grad_norm": 0.5115143779943679, "learning_rate": 1.4781832927818329e-05, "loss": 0.588, "step": 25138 }, { "epoch": 0.7339639718548363, "grad_norm": 0.5007649634495508, "learning_rate": 1.478021086780211e-05, "loss": 0.5942, "step": 25139 }, { "epoch": 0.7339931680826837, "grad_norm": 0.47941095352299523, "learning_rate": 1.4778588807785889e-05, "loss": 0.5067, "step": 25140 }, { "epoch": 0.734022364310531, "grad_norm": 0.5741304771696518, "learning_rate": 1.4776966747769669e-05, "loss": 0.6423, "step": 25141 }, { "epoch": 0.7340515605383784, "grad_norm": 0.5220967764572239, "learning_rate": 1.4775344687753447e-05, "loss": 0.5761, "step": 25142 }, { "epoch": 0.7340807567662258, "grad_norm": 0.5246149524173562, "learning_rate": 1.4773722627737228e-05, "loss": 0.5899, "step": 25143 }, { "epoch": 0.7341099529940731, "grad_norm": 0.5139511418274507, "learning_rate": 1.4772100567721006e-05, "loss": 0.572, "step": 25144 }, { "epoch": 0.7341391492219205, "grad_norm": 0.5134308399313104, "learning_rate": 1.4770478507704786e-05, "loss": 0.5266, "step": 25145 }, { "epoch": 0.734168345449768, "grad_norm": 0.5133639121591222, "learning_rate": 1.4768856447688564e-05, "loss": 0.6092, "step": 25146 }, { "epoch": 0.7341975416776153, "grad_norm": 0.5622230692944247, "learning_rate": 1.4767234387672343e-05, "loss": 0.6231, "step": 25147 }, { "epoch": 0.7342267379054627, "grad_norm": 0.4989549932233646, "learning_rate": 1.4765612327656126e-05, "loss": 0.5136, "step": 25148 }, { "epoch": 0.73425593413331, "grad_norm": 0.48387705725482283, "learning_rate": 1.4763990267639905e-05, "loss": 0.4991, "step": 25149 }, { "epoch": 0.7342851303611574, "grad_norm": 0.5739081232681581, "learning_rate": 1.4762368207623683e-05, "loss": 0.6525, "step": 25150 }, { "epoch": 0.7343143265890048, "grad_norm": 0.5214761957295776, "learning_rate": 1.4760746147607463e-05, "loss": 0.5956, "step": 25151 }, { "epoch": 0.7343435228168521, "grad_norm": 0.5714564733193477, "learning_rate": 1.4759124087591241e-05, "loss": 0.6768, "step": 25152 }, { "epoch": 0.7343727190446995, "grad_norm": 0.5779458993596747, "learning_rate": 1.4757502027575021e-05, "loss": 0.7104, "step": 25153 }, { "epoch": 0.7344019152725468, "grad_norm": 0.5083114719656752, "learning_rate": 1.47558799675588e-05, "loss": 0.5494, "step": 25154 }, { "epoch": 0.7344311115003942, "grad_norm": 0.4902553536538642, "learning_rate": 1.475425790754258e-05, "loss": 0.5456, "step": 25155 }, { "epoch": 0.7344603077282416, "grad_norm": 0.5660482895436384, "learning_rate": 1.4752635847526358e-05, "loss": 0.566, "step": 25156 }, { "epoch": 0.7344895039560889, "grad_norm": 0.5420359587375149, "learning_rate": 1.4751013787510137e-05, "loss": 0.6413, "step": 25157 }, { "epoch": 0.7345187001839363, "grad_norm": 0.5420578560233588, "learning_rate": 1.4749391727493918e-05, "loss": 0.5646, "step": 25158 }, { "epoch": 0.7345478964117836, "grad_norm": 0.5561710994373456, "learning_rate": 1.4747769667477698e-05, "loss": 0.5966, "step": 25159 }, { "epoch": 0.734577092639631, "grad_norm": 0.5478142375643552, "learning_rate": 1.4746147607461477e-05, "loss": 0.647, "step": 25160 }, { "epoch": 0.7346062888674784, "grad_norm": 0.513496125158498, "learning_rate": 1.4744525547445257e-05, "loss": 0.5819, "step": 25161 }, { "epoch": 0.7346354850953257, "grad_norm": 0.5258722184603646, "learning_rate": 1.4742903487429035e-05, "loss": 0.6109, "step": 25162 }, { "epoch": 0.7346646813231731, "grad_norm": 0.5169166410665134, "learning_rate": 1.4741281427412815e-05, "loss": 0.6014, "step": 25163 }, { "epoch": 0.7346938775510204, "grad_norm": 0.49418130810301536, "learning_rate": 1.4739659367396594e-05, "loss": 0.554, "step": 25164 }, { "epoch": 0.7347230737788678, "grad_norm": 0.5251168689677873, "learning_rate": 1.4738037307380372e-05, "loss": 0.5807, "step": 25165 }, { "epoch": 0.7347522700067152, "grad_norm": 0.517262363832404, "learning_rate": 1.4736415247364152e-05, "loss": 0.6128, "step": 25166 }, { "epoch": 0.7347814662345625, "grad_norm": 0.4896173423494219, "learning_rate": 1.4734793187347934e-05, "loss": 0.5168, "step": 25167 }, { "epoch": 0.7348106624624099, "grad_norm": 0.5475807234284975, "learning_rate": 1.4733171127331712e-05, "loss": 0.6128, "step": 25168 }, { "epoch": 0.7348398586902573, "grad_norm": 0.5151735405333673, "learning_rate": 1.4731549067315492e-05, "loss": 0.5649, "step": 25169 }, { "epoch": 0.7348690549181046, "grad_norm": 0.5386368740632438, "learning_rate": 1.472992700729927e-05, "loss": 0.5926, "step": 25170 }, { "epoch": 0.734898251145952, "grad_norm": 0.5421494410927764, "learning_rate": 1.472830494728305e-05, "loss": 0.6091, "step": 25171 }, { "epoch": 0.7349274473737993, "grad_norm": 0.5363179279498019, "learning_rate": 1.472668288726683e-05, "loss": 0.6325, "step": 25172 }, { "epoch": 0.7349566436016467, "grad_norm": 0.5373153778322219, "learning_rate": 1.472506082725061e-05, "loss": 0.5997, "step": 25173 }, { "epoch": 0.7349858398294941, "grad_norm": 0.5210711807063733, "learning_rate": 1.4723438767234388e-05, "loss": 0.5829, "step": 25174 }, { "epoch": 0.7350150360573414, "grad_norm": 0.4976924196334283, "learning_rate": 1.4721816707218166e-05, "loss": 0.5271, "step": 25175 }, { "epoch": 0.7350442322851888, "grad_norm": 0.5679889682157117, "learning_rate": 1.4720194647201946e-05, "loss": 0.6765, "step": 25176 }, { "epoch": 0.7350734285130361, "grad_norm": 0.5209914429281216, "learning_rate": 1.4718572587185728e-05, "loss": 0.6145, "step": 25177 }, { "epoch": 0.7351026247408835, "grad_norm": 0.5477793605700726, "learning_rate": 1.4716950527169506e-05, "loss": 0.6231, "step": 25178 }, { "epoch": 0.7351318209687309, "grad_norm": 0.5528674712673122, "learning_rate": 1.4715328467153286e-05, "loss": 0.6168, "step": 25179 }, { "epoch": 0.7351610171965782, "grad_norm": 0.5581526720041085, "learning_rate": 1.4713706407137065e-05, "loss": 0.6748, "step": 25180 }, { "epoch": 0.7351902134244256, "grad_norm": 0.5363201485586659, "learning_rate": 1.4712084347120845e-05, "loss": 0.6447, "step": 25181 }, { "epoch": 0.735219409652273, "grad_norm": 0.49715516662427345, "learning_rate": 1.4710462287104623e-05, "loss": 0.5646, "step": 25182 }, { "epoch": 0.7352486058801203, "grad_norm": 0.5396177314537759, "learning_rate": 1.4708840227088403e-05, "loss": 0.6536, "step": 25183 }, { "epoch": 0.7352778021079677, "grad_norm": 0.48711913702558596, "learning_rate": 1.4707218167072182e-05, "loss": 0.5153, "step": 25184 }, { "epoch": 0.735306998335815, "grad_norm": 0.5616098945433284, "learning_rate": 1.470559610705596e-05, "loss": 0.624, "step": 25185 }, { "epoch": 0.7353361945636624, "grad_norm": 0.5311246969979978, "learning_rate": 1.4703974047039742e-05, "loss": 0.5347, "step": 25186 }, { "epoch": 0.7353653907915098, "grad_norm": 0.5189139761275406, "learning_rate": 1.4702351987023522e-05, "loss": 0.6177, "step": 25187 }, { "epoch": 0.7353945870193571, "grad_norm": 0.5695850463106261, "learning_rate": 1.47007299270073e-05, "loss": 0.665, "step": 25188 }, { "epoch": 0.7354237832472045, "grad_norm": 0.5113448654306856, "learning_rate": 1.469910786699108e-05, "loss": 0.595, "step": 25189 }, { "epoch": 0.7354529794750518, "grad_norm": 0.5277695465565314, "learning_rate": 1.4697485806974859e-05, "loss": 0.5788, "step": 25190 }, { "epoch": 0.7354821757028992, "grad_norm": 0.5573482854316527, "learning_rate": 1.4695863746958639e-05, "loss": 0.6531, "step": 25191 }, { "epoch": 0.7355113719307466, "grad_norm": 0.5157165464441387, "learning_rate": 1.4694241686942417e-05, "loss": 0.5805, "step": 25192 }, { "epoch": 0.7355405681585939, "grad_norm": 0.5472410783517695, "learning_rate": 1.4692619626926195e-05, "loss": 0.6718, "step": 25193 }, { "epoch": 0.7355697643864413, "grad_norm": 0.49299146042630815, "learning_rate": 1.4690997566909975e-05, "loss": 0.5808, "step": 25194 }, { "epoch": 0.7355989606142886, "grad_norm": 0.6233083464653485, "learning_rate": 1.4689375506893754e-05, "loss": 0.6318, "step": 25195 }, { "epoch": 0.735628156842136, "grad_norm": 0.5328692679289523, "learning_rate": 1.4687753446877536e-05, "loss": 0.6263, "step": 25196 }, { "epoch": 0.7356573530699834, "grad_norm": 0.5301000690588971, "learning_rate": 1.4686131386861316e-05, "loss": 0.5866, "step": 25197 }, { "epoch": 0.7356865492978307, "grad_norm": 0.5094078606937137, "learning_rate": 1.4684509326845094e-05, "loss": 0.5429, "step": 25198 }, { "epoch": 0.7357157455256781, "grad_norm": 0.5192414007022387, "learning_rate": 1.4682887266828874e-05, "loss": 0.5807, "step": 25199 }, { "epoch": 0.7357449417535254, "grad_norm": 0.5210261482511737, "learning_rate": 1.4681265206812652e-05, "loss": 0.5945, "step": 25200 }, { "epoch": 0.7357741379813728, "grad_norm": 0.6060134152920508, "learning_rate": 1.4679643146796433e-05, "loss": 0.7136, "step": 25201 }, { "epoch": 0.7358033342092202, "grad_norm": 0.5595708419634567, "learning_rate": 1.4678021086780211e-05, "loss": 0.662, "step": 25202 }, { "epoch": 0.7358325304370675, "grad_norm": 0.5027551502246852, "learning_rate": 1.467639902676399e-05, "loss": 0.5305, "step": 25203 }, { "epoch": 0.7358617266649149, "grad_norm": 0.5324444699305426, "learning_rate": 1.467477696674777e-05, "loss": 0.634, "step": 25204 }, { "epoch": 0.7358909228927623, "grad_norm": 0.49504023550522513, "learning_rate": 1.4673154906731551e-05, "loss": 0.5438, "step": 25205 }, { "epoch": 0.7359201191206096, "grad_norm": 0.5276355612903777, "learning_rate": 1.467153284671533e-05, "loss": 0.5733, "step": 25206 }, { "epoch": 0.735949315348457, "grad_norm": 0.5098208972632738, "learning_rate": 1.466991078669911e-05, "loss": 0.5687, "step": 25207 }, { "epoch": 0.7359785115763043, "grad_norm": 0.49397418294460715, "learning_rate": 1.4668288726682888e-05, "loss": 0.5305, "step": 25208 }, { "epoch": 0.7360077078041517, "grad_norm": 0.5030107812711905, "learning_rate": 1.4666666666666668e-05, "loss": 0.5613, "step": 25209 }, { "epoch": 0.7360369040319991, "grad_norm": 0.5414679842910349, "learning_rate": 1.4665044606650446e-05, "loss": 0.6389, "step": 25210 }, { "epoch": 0.7360661002598464, "grad_norm": 0.5577903857725001, "learning_rate": 1.4663422546634226e-05, "loss": 0.6493, "step": 25211 }, { "epoch": 0.7360952964876938, "grad_norm": 0.556628888190251, "learning_rate": 1.4661800486618005e-05, "loss": 0.6421, "step": 25212 }, { "epoch": 0.7361244927155411, "grad_norm": 0.5028682401667487, "learning_rate": 1.4660178426601783e-05, "loss": 0.5422, "step": 25213 }, { "epoch": 0.7361536889433885, "grad_norm": 0.5468480514528696, "learning_rate": 1.4658556366585565e-05, "loss": 0.6594, "step": 25214 }, { "epoch": 0.7361828851712359, "grad_norm": 0.5110206308570842, "learning_rate": 1.4656934306569345e-05, "loss": 0.5771, "step": 25215 }, { "epoch": 0.7362120813990832, "grad_norm": 0.49238322795697526, "learning_rate": 1.4655312246553123e-05, "loss": 0.5359, "step": 25216 }, { "epoch": 0.7362412776269306, "grad_norm": 0.5361086994474956, "learning_rate": 1.4653690186536903e-05, "loss": 0.6254, "step": 25217 }, { "epoch": 0.736270473854778, "grad_norm": 0.5051514300832772, "learning_rate": 1.4652068126520682e-05, "loss": 0.5961, "step": 25218 }, { "epoch": 0.7362996700826253, "grad_norm": 0.5231743048464678, "learning_rate": 1.4650446066504462e-05, "loss": 0.5949, "step": 25219 }, { "epoch": 0.7363288663104727, "grad_norm": 0.5219631745969008, "learning_rate": 1.464882400648824e-05, "loss": 0.6197, "step": 25220 }, { "epoch": 0.73635806253832, "grad_norm": 0.5090593801663684, "learning_rate": 1.464720194647202e-05, "loss": 0.555, "step": 25221 }, { "epoch": 0.7363872587661674, "grad_norm": 0.554842104304709, "learning_rate": 1.4645579886455799e-05, "loss": 0.6098, "step": 25222 }, { "epoch": 0.7364164549940148, "grad_norm": 0.5419775068719592, "learning_rate": 1.4643957826439577e-05, "loss": 0.6611, "step": 25223 }, { "epoch": 0.7364456512218621, "grad_norm": 0.4963647340481351, "learning_rate": 1.4642335766423359e-05, "loss": 0.5566, "step": 25224 }, { "epoch": 0.7364748474497095, "grad_norm": 0.5202574220427487, "learning_rate": 1.4640713706407139e-05, "loss": 0.5764, "step": 25225 }, { "epoch": 0.7365040436775568, "grad_norm": 0.5014172042556281, "learning_rate": 1.4639091646390917e-05, "loss": 0.5762, "step": 25226 }, { "epoch": 0.7365332399054042, "grad_norm": 0.5125197312607221, "learning_rate": 1.4637469586374697e-05, "loss": 0.561, "step": 25227 }, { "epoch": 0.7365624361332516, "grad_norm": 0.5170608756551442, "learning_rate": 1.4635847526358476e-05, "loss": 0.6267, "step": 25228 }, { "epoch": 0.7365916323610989, "grad_norm": 0.5063272141527604, "learning_rate": 1.4634225466342256e-05, "loss": 0.5499, "step": 25229 }, { "epoch": 0.7366208285889463, "grad_norm": 0.5770609666030868, "learning_rate": 1.4632603406326034e-05, "loss": 0.7092, "step": 25230 }, { "epoch": 0.7366500248167936, "grad_norm": 0.5266989802456007, "learning_rate": 1.4630981346309813e-05, "loss": 0.5948, "step": 25231 }, { "epoch": 0.736679221044641, "grad_norm": 0.5006570571590795, "learning_rate": 1.4629359286293593e-05, "loss": 0.6071, "step": 25232 }, { "epoch": 0.7367084172724884, "grad_norm": 0.4832095311427375, "learning_rate": 1.4627737226277374e-05, "loss": 0.5277, "step": 25233 }, { "epoch": 0.7367376135003357, "grad_norm": 0.4817563542941245, "learning_rate": 1.4626115166261153e-05, "loss": 0.5597, "step": 25234 }, { "epoch": 0.7367668097281831, "grad_norm": 0.5158709468707897, "learning_rate": 1.4624493106244933e-05, "loss": 0.584, "step": 25235 }, { "epoch": 0.7367960059560305, "grad_norm": 0.5418269337039398, "learning_rate": 1.4622871046228711e-05, "loss": 0.6481, "step": 25236 }, { "epoch": 0.7368252021838778, "grad_norm": 0.5197308979244657, "learning_rate": 1.4621248986212491e-05, "loss": 0.6167, "step": 25237 }, { "epoch": 0.7368543984117252, "grad_norm": 0.5194605702415007, "learning_rate": 1.461962692619627e-05, "loss": 0.6038, "step": 25238 }, { "epoch": 0.7368835946395725, "grad_norm": 0.5301091383979707, "learning_rate": 1.461800486618005e-05, "loss": 0.5966, "step": 25239 }, { "epoch": 0.7369127908674199, "grad_norm": 0.49280481203799315, "learning_rate": 1.4616382806163828e-05, "loss": 0.5323, "step": 25240 }, { "epoch": 0.7369419870952673, "grad_norm": 0.5676561705979106, "learning_rate": 1.4614760746147606e-05, "loss": 0.6628, "step": 25241 }, { "epoch": 0.7369711833231146, "grad_norm": 0.5736983405399264, "learning_rate": 1.4613138686131387e-05, "loss": 0.6628, "step": 25242 }, { "epoch": 0.737000379550962, "grad_norm": 0.4692313583222024, "learning_rate": 1.4611516626115168e-05, "loss": 0.4706, "step": 25243 }, { "epoch": 0.7370295757788093, "grad_norm": 0.5359733094535312, "learning_rate": 1.4609894566098947e-05, "loss": 0.5811, "step": 25244 }, { "epoch": 0.7370587720066567, "grad_norm": 0.4751299580742647, "learning_rate": 1.4608272506082727e-05, "loss": 0.5219, "step": 25245 }, { "epoch": 0.7370879682345041, "grad_norm": 0.5391543425317146, "learning_rate": 1.4606650446066505e-05, "loss": 0.6508, "step": 25246 }, { "epoch": 0.7371171644623514, "grad_norm": 0.5012065717685973, "learning_rate": 1.4605028386050285e-05, "loss": 0.587, "step": 25247 }, { "epoch": 0.7371463606901988, "grad_norm": 0.49824618957696754, "learning_rate": 1.4603406326034064e-05, "loss": 0.59, "step": 25248 }, { "epoch": 0.7371755569180461, "grad_norm": 0.5285808960539391, "learning_rate": 1.4601784266017844e-05, "loss": 0.5928, "step": 25249 }, { "epoch": 0.7372047531458935, "grad_norm": 0.5445525018021947, "learning_rate": 1.4600162206001622e-05, "loss": 0.6251, "step": 25250 }, { "epoch": 0.7372339493737409, "grad_norm": 0.5274000773662346, "learning_rate": 1.45985401459854e-05, "loss": 0.6221, "step": 25251 }, { "epoch": 0.7372631456015882, "grad_norm": 0.5531405661947579, "learning_rate": 1.4596918085969182e-05, "loss": 0.6438, "step": 25252 }, { "epoch": 0.7372923418294356, "grad_norm": 0.5213105339452347, "learning_rate": 1.4595296025952962e-05, "loss": 0.6072, "step": 25253 }, { "epoch": 0.737321538057283, "grad_norm": 0.49473896814652707, "learning_rate": 1.459367396593674e-05, "loss": 0.4863, "step": 25254 }, { "epoch": 0.7373507342851303, "grad_norm": 0.4972984967856031, "learning_rate": 1.459205190592052e-05, "loss": 0.5428, "step": 25255 }, { "epoch": 0.7373799305129777, "grad_norm": 0.586075469001789, "learning_rate": 1.4590429845904299e-05, "loss": 0.6798, "step": 25256 }, { "epoch": 0.737409126740825, "grad_norm": 0.5477666280560483, "learning_rate": 1.4588807785888079e-05, "loss": 0.571, "step": 25257 }, { "epoch": 0.7374383229686724, "grad_norm": 0.589257056621938, "learning_rate": 1.4587185725871857e-05, "loss": 0.7029, "step": 25258 }, { "epoch": 0.7374675191965198, "grad_norm": 0.5178138172534745, "learning_rate": 1.4585563665855636e-05, "loss": 0.6044, "step": 25259 }, { "epoch": 0.7374967154243671, "grad_norm": 0.5232593196087092, "learning_rate": 1.4583941605839416e-05, "loss": 0.6017, "step": 25260 }, { "epoch": 0.7375259116522145, "grad_norm": 0.5623262075927781, "learning_rate": 1.4582319545823194e-05, "loss": 0.655, "step": 25261 }, { "epoch": 0.7375551078800618, "grad_norm": 0.49959936888176665, "learning_rate": 1.4580697485806976e-05, "loss": 0.5391, "step": 25262 }, { "epoch": 0.7375843041079092, "grad_norm": 0.5317798917056612, "learning_rate": 1.4579075425790756e-05, "loss": 0.6037, "step": 25263 }, { "epoch": 0.7376135003357566, "grad_norm": 0.5241156029577329, "learning_rate": 1.4577453365774534e-05, "loss": 0.5821, "step": 25264 }, { "epoch": 0.7376426965636039, "grad_norm": 0.4909955967896082, "learning_rate": 1.4575831305758315e-05, "loss": 0.5428, "step": 25265 }, { "epoch": 0.7376718927914513, "grad_norm": 0.5231590459066605, "learning_rate": 1.4574209245742093e-05, "loss": 0.613, "step": 25266 }, { "epoch": 0.7377010890192988, "grad_norm": 0.5364878591882841, "learning_rate": 1.4572587185725873e-05, "loss": 0.638, "step": 25267 }, { "epoch": 0.7377302852471461, "grad_norm": 0.5566944641745113, "learning_rate": 1.4570965125709651e-05, "loss": 0.5952, "step": 25268 }, { "epoch": 0.7377594814749935, "grad_norm": 0.538298468213414, "learning_rate": 1.456934306569343e-05, "loss": 0.58, "step": 25269 }, { "epoch": 0.7377886777028408, "grad_norm": 0.5163546752844567, "learning_rate": 1.456772100567721e-05, "loss": 0.6094, "step": 25270 }, { "epoch": 0.7378178739306882, "grad_norm": 0.5344031142814771, "learning_rate": 1.4566098945660992e-05, "loss": 0.6321, "step": 25271 }, { "epoch": 0.7378470701585356, "grad_norm": 0.5357925651550924, "learning_rate": 1.456447688564477e-05, "loss": 0.5926, "step": 25272 }, { "epoch": 0.7378762663863829, "grad_norm": 0.5232882520765909, "learning_rate": 1.456285482562855e-05, "loss": 0.5901, "step": 25273 }, { "epoch": 0.7379054626142303, "grad_norm": 0.5451006302575439, "learning_rate": 1.4561232765612328e-05, "loss": 0.6363, "step": 25274 }, { "epoch": 0.7379346588420777, "grad_norm": 0.487886082833358, "learning_rate": 1.4559610705596108e-05, "loss": 0.5332, "step": 25275 }, { "epoch": 0.737963855069925, "grad_norm": 0.5105562894979196, "learning_rate": 1.4557988645579887e-05, "loss": 0.584, "step": 25276 }, { "epoch": 0.7379930512977724, "grad_norm": 0.5503011996713953, "learning_rate": 1.4556366585563667e-05, "loss": 0.6496, "step": 25277 }, { "epoch": 0.7380222475256197, "grad_norm": 0.5389719319612414, "learning_rate": 1.4554744525547445e-05, "loss": 0.6321, "step": 25278 }, { "epoch": 0.7380514437534671, "grad_norm": 0.5165682891723496, "learning_rate": 1.4553122465531224e-05, "loss": 0.5721, "step": 25279 }, { "epoch": 0.7380806399813145, "grad_norm": 0.5565311682018315, "learning_rate": 1.4551500405515004e-05, "loss": 0.6074, "step": 25280 }, { "epoch": 0.7381098362091618, "grad_norm": 0.555077897698223, "learning_rate": 1.4549878345498785e-05, "loss": 0.6048, "step": 25281 }, { "epoch": 0.7381390324370092, "grad_norm": 0.5655885321058824, "learning_rate": 1.4548256285482564e-05, "loss": 0.6169, "step": 25282 }, { "epoch": 0.7381682286648565, "grad_norm": 0.5505063078622766, "learning_rate": 1.4546634225466344e-05, "loss": 0.648, "step": 25283 }, { "epoch": 0.7381974248927039, "grad_norm": 0.5099879444683193, "learning_rate": 1.4545012165450122e-05, "loss": 0.57, "step": 25284 }, { "epoch": 0.7382266211205513, "grad_norm": 0.49313389294867377, "learning_rate": 1.4543390105433902e-05, "loss": 0.5188, "step": 25285 }, { "epoch": 0.7382558173483986, "grad_norm": 0.5228124886602313, "learning_rate": 1.454176804541768e-05, "loss": 0.611, "step": 25286 }, { "epoch": 0.738285013576246, "grad_norm": 0.55641858782623, "learning_rate": 1.4540145985401459e-05, "loss": 0.6697, "step": 25287 }, { "epoch": 0.7383142098040933, "grad_norm": 0.5511024550349455, "learning_rate": 1.4538523925385239e-05, "loss": 0.637, "step": 25288 }, { "epoch": 0.7383434060319407, "grad_norm": 0.4816329900964534, "learning_rate": 1.4536901865369018e-05, "loss": 0.5163, "step": 25289 }, { "epoch": 0.7383726022597881, "grad_norm": 0.47453974598986437, "learning_rate": 1.45352798053528e-05, "loss": 0.4949, "step": 25290 }, { "epoch": 0.7384017984876354, "grad_norm": 0.5489531920742478, "learning_rate": 1.453365774533658e-05, "loss": 0.5985, "step": 25291 }, { "epoch": 0.7384309947154828, "grad_norm": 0.5026550452412603, "learning_rate": 1.4532035685320358e-05, "loss": 0.5576, "step": 25292 }, { "epoch": 0.7384601909433302, "grad_norm": 0.5118276819365075, "learning_rate": 1.4530413625304138e-05, "loss": 0.568, "step": 25293 }, { "epoch": 0.7384893871711775, "grad_norm": 0.4751199191805327, "learning_rate": 1.4528791565287916e-05, "loss": 0.501, "step": 25294 }, { "epoch": 0.7385185833990249, "grad_norm": 0.5123099303963354, "learning_rate": 1.4527169505271696e-05, "loss": 0.5778, "step": 25295 }, { "epoch": 0.7385477796268722, "grad_norm": 0.5467283842636158, "learning_rate": 1.4525547445255475e-05, "loss": 0.6095, "step": 25296 }, { "epoch": 0.7385769758547196, "grad_norm": 0.5061718895167512, "learning_rate": 1.4523925385239253e-05, "loss": 0.5795, "step": 25297 }, { "epoch": 0.738606172082567, "grad_norm": 0.521919884689092, "learning_rate": 1.4522303325223033e-05, "loss": 0.6259, "step": 25298 }, { "epoch": 0.7386353683104143, "grad_norm": 0.5406591620455254, "learning_rate": 1.4520681265206815e-05, "loss": 0.6368, "step": 25299 }, { "epoch": 0.7386645645382617, "grad_norm": 0.5398771978434356, "learning_rate": 1.4519059205190593e-05, "loss": 0.5939, "step": 25300 }, { "epoch": 0.738693760766109, "grad_norm": 0.5385553491059174, "learning_rate": 1.4517437145174373e-05, "loss": 0.6293, "step": 25301 }, { "epoch": 0.7387229569939564, "grad_norm": 0.4818874095560532, "learning_rate": 1.4515815085158152e-05, "loss": 0.5491, "step": 25302 }, { "epoch": 0.7387521532218038, "grad_norm": 0.5425975242224084, "learning_rate": 1.4514193025141932e-05, "loss": 0.6224, "step": 25303 }, { "epoch": 0.7387813494496511, "grad_norm": 0.5107745410966622, "learning_rate": 1.451257096512571e-05, "loss": 0.5648, "step": 25304 }, { "epoch": 0.7388105456774985, "grad_norm": 0.5203973244450205, "learning_rate": 1.451094890510949e-05, "loss": 0.5769, "step": 25305 }, { "epoch": 0.7388397419053458, "grad_norm": 0.5107213100700494, "learning_rate": 1.4509326845093269e-05, "loss": 0.5738, "step": 25306 }, { "epoch": 0.7388689381331932, "grad_norm": 0.4855106867227883, "learning_rate": 1.4507704785077047e-05, "loss": 0.5074, "step": 25307 }, { "epoch": 0.7388981343610406, "grad_norm": 0.5288639679335643, "learning_rate": 1.4506082725060827e-05, "loss": 0.5558, "step": 25308 }, { "epoch": 0.7389273305888879, "grad_norm": 0.49923955878125154, "learning_rate": 1.4504460665044609e-05, "loss": 0.5723, "step": 25309 }, { "epoch": 0.7389565268167353, "grad_norm": 0.48209143753613115, "learning_rate": 1.4502838605028387e-05, "loss": 0.5068, "step": 25310 }, { "epoch": 0.7389857230445827, "grad_norm": 0.5345190783093013, "learning_rate": 1.4501216545012167e-05, "loss": 0.6141, "step": 25311 }, { "epoch": 0.73901491927243, "grad_norm": 0.5266034581592439, "learning_rate": 1.4499594484995946e-05, "loss": 0.5975, "step": 25312 }, { "epoch": 0.7390441155002774, "grad_norm": 0.53221484668359, "learning_rate": 1.4497972424979726e-05, "loss": 0.6045, "step": 25313 }, { "epoch": 0.7390733117281247, "grad_norm": 0.5271546489248384, "learning_rate": 1.4496350364963504e-05, "loss": 0.6279, "step": 25314 }, { "epoch": 0.7391025079559721, "grad_norm": 0.5347657901824673, "learning_rate": 1.4494728304947282e-05, "loss": 0.6301, "step": 25315 }, { "epoch": 0.7391317041838195, "grad_norm": 0.5175346401630149, "learning_rate": 1.4493106244931062e-05, "loss": 0.5795, "step": 25316 }, { "epoch": 0.7391609004116668, "grad_norm": 0.5268562195966565, "learning_rate": 1.449148418491484e-05, "loss": 0.5866, "step": 25317 }, { "epoch": 0.7391900966395142, "grad_norm": 0.5239615590985871, "learning_rate": 1.4489862124898623e-05, "loss": 0.5701, "step": 25318 }, { "epoch": 0.7392192928673615, "grad_norm": 0.5325806939689884, "learning_rate": 1.4488240064882403e-05, "loss": 0.614, "step": 25319 }, { "epoch": 0.7392484890952089, "grad_norm": 0.5515714486878458, "learning_rate": 1.4486618004866181e-05, "loss": 0.6259, "step": 25320 }, { "epoch": 0.7392776853230563, "grad_norm": 0.5003342954293002, "learning_rate": 1.4484995944849961e-05, "loss": 0.5616, "step": 25321 }, { "epoch": 0.7393068815509036, "grad_norm": 0.5134324350945706, "learning_rate": 1.448337388483374e-05, "loss": 0.6096, "step": 25322 }, { "epoch": 0.739336077778751, "grad_norm": 0.512851743776869, "learning_rate": 1.448175182481752e-05, "loss": 0.5913, "step": 25323 }, { "epoch": 0.7393652740065983, "grad_norm": 0.5060117250437081, "learning_rate": 1.4480129764801298e-05, "loss": 0.546, "step": 25324 }, { "epoch": 0.7393944702344457, "grad_norm": 0.5278730893833615, "learning_rate": 1.4478507704785076e-05, "loss": 0.5832, "step": 25325 }, { "epoch": 0.7394236664622931, "grad_norm": 0.4865138514220854, "learning_rate": 1.4476885644768856e-05, "loss": 0.5439, "step": 25326 }, { "epoch": 0.7394528626901404, "grad_norm": 0.5054784141397419, "learning_rate": 1.4475263584752635e-05, "loss": 0.5705, "step": 25327 }, { "epoch": 0.7394820589179878, "grad_norm": 0.5032047709706695, "learning_rate": 1.4473641524736416e-05, "loss": 0.5562, "step": 25328 }, { "epoch": 0.7395112551458352, "grad_norm": 0.5100714471010863, "learning_rate": 1.4472019464720197e-05, "loss": 0.5446, "step": 25329 }, { "epoch": 0.7395404513736825, "grad_norm": 0.5618323752510086, "learning_rate": 1.4470397404703975e-05, "loss": 0.6128, "step": 25330 }, { "epoch": 0.7395696476015299, "grad_norm": 0.4951070149523489, "learning_rate": 1.4468775344687755e-05, "loss": 0.5638, "step": 25331 }, { "epoch": 0.7395988438293772, "grad_norm": 0.514869233848421, "learning_rate": 1.4467153284671533e-05, "loss": 0.5839, "step": 25332 }, { "epoch": 0.7396280400572246, "grad_norm": 0.5221462867092703, "learning_rate": 1.4465531224655313e-05, "loss": 0.5698, "step": 25333 }, { "epoch": 0.739657236285072, "grad_norm": 0.5212286795940215, "learning_rate": 1.4463909164639092e-05, "loss": 0.6056, "step": 25334 }, { "epoch": 0.7396864325129193, "grad_norm": 0.5378022503394733, "learning_rate": 1.446228710462287e-05, "loss": 0.6227, "step": 25335 }, { "epoch": 0.7397156287407667, "grad_norm": 0.5275529420768619, "learning_rate": 1.446066504460665e-05, "loss": 0.6431, "step": 25336 }, { "epoch": 0.739744824968614, "grad_norm": 0.49930622959902254, "learning_rate": 1.4459042984590432e-05, "loss": 0.5389, "step": 25337 }, { "epoch": 0.7397740211964614, "grad_norm": 0.5052336082500548, "learning_rate": 1.445742092457421e-05, "loss": 0.57, "step": 25338 }, { "epoch": 0.7398032174243088, "grad_norm": 0.538032392556834, "learning_rate": 1.445579886455799e-05, "loss": 0.636, "step": 25339 }, { "epoch": 0.7398324136521561, "grad_norm": 0.5472402704041156, "learning_rate": 1.4454176804541769e-05, "loss": 0.618, "step": 25340 }, { "epoch": 0.7398616098800035, "grad_norm": 0.5358374713533014, "learning_rate": 1.4452554744525549e-05, "loss": 0.6596, "step": 25341 }, { "epoch": 0.7398908061078509, "grad_norm": 0.5363631094413626, "learning_rate": 1.4450932684509327e-05, "loss": 0.6213, "step": 25342 }, { "epoch": 0.7399200023356982, "grad_norm": 0.5285846680593136, "learning_rate": 1.4449310624493107e-05, "loss": 0.6088, "step": 25343 }, { "epoch": 0.7399491985635456, "grad_norm": 0.4771958780840931, "learning_rate": 1.4447688564476886e-05, "loss": 0.4898, "step": 25344 }, { "epoch": 0.7399783947913929, "grad_norm": 0.5238008063841948, "learning_rate": 1.4446066504460664e-05, "loss": 0.564, "step": 25345 }, { "epoch": 0.7400075910192403, "grad_norm": 0.5213071852657585, "learning_rate": 1.4444444444444444e-05, "loss": 0.591, "step": 25346 }, { "epoch": 0.7400367872470877, "grad_norm": 0.49474338704564663, "learning_rate": 1.4442822384428226e-05, "loss": 0.5491, "step": 25347 }, { "epoch": 0.740065983474935, "grad_norm": 0.5336553550496479, "learning_rate": 1.4441200324412004e-05, "loss": 0.6074, "step": 25348 }, { "epoch": 0.7400951797027824, "grad_norm": 0.555794984233532, "learning_rate": 1.4439578264395784e-05, "loss": 0.6598, "step": 25349 }, { "epoch": 0.7401243759306297, "grad_norm": 0.5081368479285936, "learning_rate": 1.4437956204379563e-05, "loss": 0.5647, "step": 25350 }, { "epoch": 0.7401535721584771, "grad_norm": 0.5031659417820559, "learning_rate": 1.4436334144363343e-05, "loss": 0.5277, "step": 25351 }, { "epoch": 0.7401827683863245, "grad_norm": 0.5379318453446349, "learning_rate": 1.4434712084347121e-05, "loss": 0.6235, "step": 25352 }, { "epoch": 0.7402119646141718, "grad_norm": 0.48027503769901997, "learning_rate": 1.44330900243309e-05, "loss": 0.5546, "step": 25353 }, { "epoch": 0.7402411608420192, "grad_norm": 0.5080580805086354, "learning_rate": 1.443146796431468e-05, "loss": 0.5769, "step": 25354 }, { "epoch": 0.7402703570698665, "grad_norm": 0.5246502200774837, "learning_rate": 1.4429845904298458e-05, "loss": 0.5467, "step": 25355 }, { "epoch": 0.7402995532977139, "grad_norm": 0.5578313397455164, "learning_rate": 1.442822384428224e-05, "loss": 0.6317, "step": 25356 }, { "epoch": 0.7403287495255613, "grad_norm": 0.5110716122766887, "learning_rate": 1.442660178426602e-05, "loss": 0.5602, "step": 25357 }, { "epoch": 0.7403579457534086, "grad_norm": 0.5081424076169981, "learning_rate": 1.4424979724249798e-05, "loss": 0.5651, "step": 25358 }, { "epoch": 0.740387141981256, "grad_norm": 0.5408428381072581, "learning_rate": 1.4423357664233578e-05, "loss": 0.6069, "step": 25359 }, { "epoch": 0.7404163382091034, "grad_norm": 0.5172667965562486, "learning_rate": 1.4421735604217357e-05, "loss": 0.5537, "step": 25360 }, { "epoch": 0.7404455344369507, "grad_norm": 0.5446942620296246, "learning_rate": 1.4420113544201137e-05, "loss": 0.6243, "step": 25361 }, { "epoch": 0.7404747306647981, "grad_norm": 0.5157246872439238, "learning_rate": 1.4418491484184915e-05, "loss": 0.5823, "step": 25362 }, { "epoch": 0.7405039268926454, "grad_norm": 0.4958142002204919, "learning_rate": 1.4416869424168693e-05, "loss": 0.5003, "step": 25363 }, { "epoch": 0.7405331231204928, "grad_norm": 0.5326209633791766, "learning_rate": 1.4415247364152473e-05, "loss": 0.586, "step": 25364 }, { "epoch": 0.7405623193483402, "grad_norm": 0.5149115407376132, "learning_rate": 1.4413625304136255e-05, "loss": 0.5363, "step": 25365 }, { "epoch": 0.7405915155761875, "grad_norm": 0.5387328472842481, "learning_rate": 1.4412003244120034e-05, "loss": 0.5956, "step": 25366 }, { "epoch": 0.7406207118040349, "grad_norm": 0.5263708560620075, "learning_rate": 1.4410381184103814e-05, "loss": 0.5805, "step": 25367 }, { "epoch": 0.7406499080318822, "grad_norm": 0.5081686786596687, "learning_rate": 1.4408759124087592e-05, "loss": 0.5931, "step": 25368 }, { "epoch": 0.7406791042597296, "grad_norm": 0.5084394153858875, "learning_rate": 1.4407137064071372e-05, "loss": 0.5564, "step": 25369 }, { "epoch": 0.740708300487577, "grad_norm": 0.5209711230334163, "learning_rate": 1.440551500405515e-05, "loss": 0.6122, "step": 25370 }, { "epoch": 0.7407374967154243, "grad_norm": 0.4749574453818287, "learning_rate": 1.440389294403893e-05, "loss": 0.5208, "step": 25371 }, { "epoch": 0.7407666929432717, "grad_norm": 0.53294704151231, "learning_rate": 1.4402270884022709e-05, "loss": 0.584, "step": 25372 }, { "epoch": 0.740795889171119, "grad_norm": 0.512679793436983, "learning_rate": 1.4400648824006487e-05, "loss": 0.5548, "step": 25373 }, { "epoch": 0.7408250853989664, "grad_norm": 0.5662270108900784, "learning_rate": 1.4399026763990267e-05, "loss": 0.6567, "step": 25374 }, { "epoch": 0.7408542816268138, "grad_norm": 0.5597804185313213, "learning_rate": 1.4397404703974049e-05, "loss": 0.6859, "step": 25375 }, { "epoch": 0.7408834778546611, "grad_norm": 0.551285013279072, "learning_rate": 1.4395782643957828e-05, "loss": 0.6175, "step": 25376 }, { "epoch": 0.7409126740825085, "grad_norm": 0.5435625823224837, "learning_rate": 1.4394160583941608e-05, "loss": 0.6307, "step": 25377 }, { "epoch": 0.7409418703103559, "grad_norm": 0.5279318821322798, "learning_rate": 1.4392538523925386e-05, "loss": 0.6236, "step": 25378 }, { "epoch": 0.7409710665382032, "grad_norm": 0.4906796030603292, "learning_rate": 1.4390916463909166e-05, "loss": 0.5202, "step": 25379 }, { "epoch": 0.7410002627660506, "grad_norm": 0.5168682507887745, "learning_rate": 1.4389294403892944e-05, "loss": 0.5792, "step": 25380 }, { "epoch": 0.7410294589938979, "grad_norm": 0.5123302325030152, "learning_rate": 1.4387672343876723e-05, "loss": 0.6164, "step": 25381 }, { "epoch": 0.7410586552217453, "grad_norm": 0.5046058579659789, "learning_rate": 1.4386050283860503e-05, "loss": 0.5761, "step": 25382 }, { "epoch": 0.7410878514495927, "grad_norm": 0.5445182279855231, "learning_rate": 1.4384428223844281e-05, "loss": 0.6237, "step": 25383 }, { "epoch": 0.74111704767744, "grad_norm": 0.520724376189669, "learning_rate": 1.4382806163828063e-05, "loss": 0.6079, "step": 25384 }, { "epoch": 0.7411462439052874, "grad_norm": 0.5143364298833144, "learning_rate": 1.4381184103811843e-05, "loss": 0.5825, "step": 25385 }, { "epoch": 0.7411754401331347, "grad_norm": 0.5473630792958838, "learning_rate": 1.4379562043795621e-05, "loss": 0.6149, "step": 25386 }, { "epoch": 0.7412046363609822, "grad_norm": 0.512200644517339, "learning_rate": 1.4377939983779401e-05, "loss": 0.5841, "step": 25387 }, { "epoch": 0.7412338325888296, "grad_norm": 0.49105551600605674, "learning_rate": 1.437631792376318e-05, "loss": 0.5493, "step": 25388 }, { "epoch": 0.7412630288166769, "grad_norm": 0.5118009615921645, "learning_rate": 1.437469586374696e-05, "loss": 0.5718, "step": 25389 }, { "epoch": 0.7412922250445243, "grad_norm": 0.5342607334293406, "learning_rate": 1.4373073803730738e-05, "loss": 0.6495, "step": 25390 }, { "epoch": 0.7413214212723717, "grad_norm": 0.5357741173214101, "learning_rate": 1.4371451743714517e-05, "loss": 0.6522, "step": 25391 }, { "epoch": 0.741350617500219, "grad_norm": 0.46626956553704885, "learning_rate": 1.4369829683698297e-05, "loss": 0.5017, "step": 25392 }, { "epoch": 0.7413798137280664, "grad_norm": 0.5450640116923728, "learning_rate": 1.4368207623682075e-05, "loss": 0.624, "step": 25393 }, { "epoch": 0.7414090099559137, "grad_norm": 0.5208058896237504, "learning_rate": 1.4366585563665857e-05, "loss": 0.5118, "step": 25394 }, { "epoch": 0.7414382061837611, "grad_norm": 0.5342706831983902, "learning_rate": 1.4364963503649637e-05, "loss": 0.6258, "step": 25395 }, { "epoch": 0.7414674024116085, "grad_norm": 0.560398533671098, "learning_rate": 1.4363341443633415e-05, "loss": 0.676, "step": 25396 }, { "epoch": 0.7414965986394558, "grad_norm": 0.49936754325259153, "learning_rate": 1.4361719383617195e-05, "loss": 0.5227, "step": 25397 }, { "epoch": 0.7415257948673032, "grad_norm": 0.5284912113034366, "learning_rate": 1.4360097323600974e-05, "loss": 0.586, "step": 25398 }, { "epoch": 0.7415549910951506, "grad_norm": 0.5126959007043194, "learning_rate": 1.4358475263584754e-05, "loss": 0.5749, "step": 25399 }, { "epoch": 0.7415841873229979, "grad_norm": 0.4807859990050095, "learning_rate": 1.4356853203568532e-05, "loss": 0.5191, "step": 25400 }, { "epoch": 0.7416133835508453, "grad_norm": 0.5332338490668674, "learning_rate": 1.435523114355231e-05, "loss": 0.6114, "step": 25401 }, { "epoch": 0.7416425797786926, "grad_norm": 0.5235922003668559, "learning_rate": 1.435360908353609e-05, "loss": 0.5744, "step": 25402 }, { "epoch": 0.74167177600654, "grad_norm": 0.5562440117566464, "learning_rate": 1.4351987023519872e-05, "loss": 0.6097, "step": 25403 }, { "epoch": 0.7417009722343874, "grad_norm": 0.5366629840326193, "learning_rate": 1.435036496350365e-05, "loss": 0.6516, "step": 25404 }, { "epoch": 0.7417301684622347, "grad_norm": 0.5158369457854154, "learning_rate": 1.4348742903487431e-05, "loss": 0.5871, "step": 25405 }, { "epoch": 0.7417593646900821, "grad_norm": 0.5188072684743567, "learning_rate": 1.434712084347121e-05, "loss": 0.6052, "step": 25406 }, { "epoch": 0.7417885609179294, "grad_norm": 0.5430855058931062, "learning_rate": 1.434549878345499e-05, "loss": 0.6263, "step": 25407 }, { "epoch": 0.7418177571457768, "grad_norm": 0.5544189649769854, "learning_rate": 1.4343876723438768e-05, "loss": 0.6301, "step": 25408 }, { "epoch": 0.7418469533736242, "grad_norm": 0.49664209842772405, "learning_rate": 1.4342254663422546e-05, "loss": 0.5244, "step": 25409 }, { "epoch": 0.7418761496014715, "grad_norm": 0.5454113740341926, "learning_rate": 1.4340632603406326e-05, "loss": 0.6248, "step": 25410 }, { "epoch": 0.7419053458293189, "grad_norm": 0.5448925343481829, "learning_rate": 1.4339010543390104e-05, "loss": 0.6036, "step": 25411 }, { "epoch": 0.7419345420571662, "grad_norm": 0.5341514258828522, "learning_rate": 1.4337388483373885e-05, "loss": 0.6335, "step": 25412 }, { "epoch": 0.7419637382850136, "grad_norm": 0.5298917909162337, "learning_rate": 1.4335766423357666e-05, "loss": 0.5519, "step": 25413 }, { "epoch": 0.741992934512861, "grad_norm": 0.522889906583786, "learning_rate": 1.4334144363341445e-05, "loss": 0.6005, "step": 25414 }, { "epoch": 0.7420221307407083, "grad_norm": 0.5628637728000816, "learning_rate": 1.4332522303325225e-05, "loss": 0.6289, "step": 25415 }, { "epoch": 0.7420513269685557, "grad_norm": 0.48918793767546287, "learning_rate": 1.4330900243309003e-05, "loss": 0.5317, "step": 25416 }, { "epoch": 0.742080523196403, "grad_norm": 0.5444328683255973, "learning_rate": 1.4329278183292783e-05, "loss": 0.5665, "step": 25417 }, { "epoch": 0.7421097194242504, "grad_norm": 0.4894933379539887, "learning_rate": 1.4327656123276562e-05, "loss": 0.5262, "step": 25418 }, { "epoch": 0.7421389156520978, "grad_norm": 0.5174974613554298, "learning_rate": 1.432603406326034e-05, "loss": 0.5606, "step": 25419 }, { "epoch": 0.7421681118799451, "grad_norm": 0.5349883706975536, "learning_rate": 1.432441200324412e-05, "loss": 0.5894, "step": 25420 }, { "epoch": 0.7421973081077925, "grad_norm": 0.5236745639358867, "learning_rate": 1.4322789943227898e-05, "loss": 0.6067, "step": 25421 }, { "epoch": 0.7422265043356399, "grad_norm": 0.5167091848560877, "learning_rate": 1.432116788321168e-05, "loss": 0.5888, "step": 25422 }, { "epoch": 0.7422557005634872, "grad_norm": 0.5097951810575023, "learning_rate": 1.431954582319546e-05, "loss": 0.5781, "step": 25423 }, { "epoch": 0.7422848967913346, "grad_norm": 0.45746053111526014, "learning_rate": 1.4317923763179239e-05, "loss": 0.4692, "step": 25424 }, { "epoch": 0.7423140930191819, "grad_norm": 0.476353791572092, "learning_rate": 1.4316301703163019e-05, "loss": 0.5076, "step": 25425 }, { "epoch": 0.7423432892470293, "grad_norm": 0.5569776464755214, "learning_rate": 1.4314679643146797e-05, "loss": 0.5512, "step": 25426 }, { "epoch": 0.7423724854748767, "grad_norm": 0.5314557750875134, "learning_rate": 1.4313057583130577e-05, "loss": 0.5839, "step": 25427 }, { "epoch": 0.742401681702724, "grad_norm": 0.49338778228240676, "learning_rate": 1.4311435523114355e-05, "loss": 0.5586, "step": 25428 }, { "epoch": 0.7424308779305714, "grad_norm": 0.4950471357055509, "learning_rate": 1.4309813463098134e-05, "loss": 0.5274, "step": 25429 }, { "epoch": 0.7424600741584187, "grad_norm": 0.547461558316366, "learning_rate": 1.4308191403081914e-05, "loss": 0.6369, "step": 25430 }, { "epoch": 0.7424892703862661, "grad_norm": 0.4861638981307792, "learning_rate": 1.4306569343065692e-05, "loss": 0.5069, "step": 25431 }, { "epoch": 0.7425184666141135, "grad_norm": 0.5066181641475648, "learning_rate": 1.4304947283049474e-05, "loss": 0.5548, "step": 25432 }, { "epoch": 0.7425476628419608, "grad_norm": 0.5053711192432372, "learning_rate": 1.4303325223033254e-05, "loss": 0.5402, "step": 25433 }, { "epoch": 0.7425768590698082, "grad_norm": 0.5582763668943187, "learning_rate": 1.4301703163017033e-05, "loss": 0.6506, "step": 25434 }, { "epoch": 0.7426060552976556, "grad_norm": 0.5466967508276483, "learning_rate": 1.4300081103000813e-05, "loss": 0.6565, "step": 25435 }, { "epoch": 0.7426352515255029, "grad_norm": 0.5379804310960994, "learning_rate": 1.4298459042984591e-05, "loss": 0.6354, "step": 25436 }, { "epoch": 0.7426644477533503, "grad_norm": 0.5190648106141954, "learning_rate": 1.429683698296837e-05, "loss": 0.6129, "step": 25437 }, { "epoch": 0.7426936439811976, "grad_norm": 0.5513775724556685, "learning_rate": 1.429521492295215e-05, "loss": 0.6176, "step": 25438 }, { "epoch": 0.742722840209045, "grad_norm": 0.5499140772828343, "learning_rate": 1.4293592862935928e-05, "loss": 0.616, "step": 25439 }, { "epoch": 0.7427520364368924, "grad_norm": 0.5101493964326081, "learning_rate": 1.4291970802919708e-05, "loss": 0.5366, "step": 25440 }, { "epoch": 0.7427812326647397, "grad_norm": 0.5639742743645244, "learning_rate": 1.429034874290349e-05, "loss": 0.6693, "step": 25441 }, { "epoch": 0.7428104288925871, "grad_norm": 0.5332038454836029, "learning_rate": 1.4288726682887268e-05, "loss": 0.6193, "step": 25442 }, { "epoch": 0.7428396251204344, "grad_norm": 0.5357038513120868, "learning_rate": 1.4287104622871048e-05, "loss": 0.5992, "step": 25443 }, { "epoch": 0.7428688213482818, "grad_norm": 0.5302827908553311, "learning_rate": 1.4285482562854826e-05, "loss": 0.6117, "step": 25444 }, { "epoch": 0.7428980175761292, "grad_norm": 0.537183767038856, "learning_rate": 1.4283860502838606e-05, "loss": 0.6118, "step": 25445 }, { "epoch": 0.7429272138039765, "grad_norm": 0.5338417666254291, "learning_rate": 1.4282238442822385e-05, "loss": 0.5807, "step": 25446 }, { "epoch": 0.7429564100318239, "grad_norm": 0.5096022074533427, "learning_rate": 1.4280616382806163e-05, "loss": 0.5533, "step": 25447 }, { "epoch": 0.7429856062596712, "grad_norm": 0.48871046405650775, "learning_rate": 1.4278994322789943e-05, "loss": 0.5309, "step": 25448 }, { "epoch": 0.7430148024875186, "grad_norm": 1.0861610939051045, "learning_rate": 1.4277372262773722e-05, "loss": 0.6074, "step": 25449 }, { "epoch": 0.743043998715366, "grad_norm": 0.5066038311500541, "learning_rate": 1.4275750202757503e-05, "loss": 0.5518, "step": 25450 }, { "epoch": 0.7430731949432133, "grad_norm": 0.4944217554974377, "learning_rate": 1.4274128142741283e-05, "loss": 0.5612, "step": 25451 }, { "epoch": 0.7431023911710607, "grad_norm": 0.5172642672640259, "learning_rate": 1.4272506082725062e-05, "loss": 0.5677, "step": 25452 }, { "epoch": 0.7431315873989081, "grad_norm": 0.5487739846560051, "learning_rate": 1.4270884022708842e-05, "loss": 0.6458, "step": 25453 }, { "epoch": 0.7431607836267554, "grad_norm": 0.4909517479844462, "learning_rate": 1.426926196269262e-05, "loss": 0.5056, "step": 25454 }, { "epoch": 0.7431899798546028, "grad_norm": 0.5374071627923157, "learning_rate": 1.42676399026764e-05, "loss": 0.6566, "step": 25455 }, { "epoch": 0.7432191760824501, "grad_norm": 0.5426451020642581, "learning_rate": 1.4266017842660179e-05, "loss": 0.6514, "step": 25456 }, { "epoch": 0.7432483723102975, "grad_norm": 0.5322256691170834, "learning_rate": 1.4264395782643957e-05, "loss": 0.6141, "step": 25457 }, { "epoch": 0.7432775685381449, "grad_norm": 0.5165690325631997, "learning_rate": 1.4262773722627737e-05, "loss": 0.578, "step": 25458 }, { "epoch": 0.7433067647659922, "grad_norm": 0.47326922542867245, "learning_rate": 1.4261151662611516e-05, "loss": 0.5091, "step": 25459 }, { "epoch": 0.7433359609938396, "grad_norm": 0.4711919141549188, "learning_rate": 1.4259529602595297e-05, "loss": 0.4855, "step": 25460 }, { "epoch": 0.743365157221687, "grad_norm": 0.5150626630082613, "learning_rate": 1.4257907542579077e-05, "loss": 0.6027, "step": 25461 }, { "epoch": 0.7433943534495343, "grad_norm": 0.5500219243286834, "learning_rate": 1.4256285482562856e-05, "loss": 0.6252, "step": 25462 }, { "epoch": 0.7434235496773817, "grad_norm": 0.5200879921364744, "learning_rate": 1.4254663422546636e-05, "loss": 0.5615, "step": 25463 }, { "epoch": 0.743452745905229, "grad_norm": 0.5625281464304862, "learning_rate": 1.4253041362530414e-05, "loss": 0.6544, "step": 25464 }, { "epoch": 0.7434819421330764, "grad_norm": 0.5174259814850107, "learning_rate": 1.4251419302514194e-05, "loss": 0.5863, "step": 25465 }, { "epoch": 0.7435111383609238, "grad_norm": 0.5160475742375867, "learning_rate": 1.4249797242497973e-05, "loss": 0.5455, "step": 25466 }, { "epoch": 0.7435403345887711, "grad_norm": 0.4887939679035098, "learning_rate": 1.4248175182481751e-05, "loss": 0.5395, "step": 25467 }, { "epoch": 0.7435695308166185, "grad_norm": 0.5222822596450999, "learning_rate": 1.4246553122465531e-05, "loss": 0.6039, "step": 25468 }, { "epoch": 0.7435987270444658, "grad_norm": 0.519804116191622, "learning_rate": 1.4244931062449313e-05, "loss": 0.5935, "step": 25469 }, { "epoch": 0.7436279232723132, "grad_norm": 0.5225073951406498, "learning_rate": 1.4243309002433091e-05, "loss": 0.6031, "step": 25470 }, { "epoch": 0.7436571195001606, "grad_norm": 0.5413560954230497, "learning_rate": 1.4241686942416871e-05, "loss": 0.6242, "step": 25471 }, { "epoch": 0.7436863157280079, "grad_norm": 0.5211104337359408, "learning_rate": 1.424006488240065e-05, "loss": 0.5994, "step": 25472 }, { "epoch": 0.7437155119558553, "grad_norm": 0.5199201076401374, "learning_rate": 1.423844282238443e-05, "loss": 0.6064, "step": 25473 }, { "epoch": 0.7437447081837026, "grad_norm": 0.5554175618454045, "learning_rate": 1.4236820762368208e-05, "loss": 0.6173, "step": 25474 }, { "epoch": 0.74377390441155, "grad_norm": 0.4992348635578531, "learning_rate": 1.4235198702351986e-05, "loss": 0.5421, "step": 25475 }, { "epoch": 0.7438031006393974, "grad_norm": 0.5154795118077796, "learning_rate": 1.4233576642335767e-05, "loss": 0.5723, "step": 25476 }, { "epoch": 0.7438322968672447, "grad_norm": 0.5169945874916154, "learning_rate": 1.4231954582319545e-05, "loss": 0.5512, "step": 25477 }, { "epoch": 0.7438614930950921, "grad_norm": 0.5141910474229617, "learning_rate": 1.4230332522303325e-05, "loss": 0.5985, "step": 25478 }, { "epoch": 0.7438906893229394, "grad_norm": 0.5298738491086323, "learning_rate": 1.4228710462287107e-05, "loss": 0.5711, "step": 25479 }, { "epoch": 0.7439198855507868, "grad_norm": 0.5681156002852895, "learning_rate": 1.4227088402270885e-05, "loss": 0.6791, "step": 25480 }, { "epoch": 0.7439490817786342, "grad_norm": 0.5361417806326617, "learning_rate": 1.4225466342254665e-05, "loss": 0.632, "step": 25481 }, { "epoch": 0.7439782780064815, "grad_norm": 0.5267359068930993, "learning_rate": 1.4223844282238444e-05, "loss": 0.5978, "step": 25482 }, { "epoch": 0.7440074742343289, "grad_norm": 0.516638495535901, "learning_rate": 1.4222222222222224e-05, "loss": 0.5876, "step": 25483 }, { "epoch": 0.7440366704621763, "grad_norm": 0.5524882688391692, "learning_rate": 1.4220600162206002e-05, "loss": 0.6214, "step": 25484 }, { "epoch": 0.7440658666900236, "grad_norm": 0.5256966289446622, "learning_rate": 1.421897810218978e-05, "loss": 0.5865, "step": 25485 }, { "epoch": 0.744095062917871, "grad_norm": 0.517983079402187, "learning_rate": 1.421735604217356e-05, "loss": 0.5858, "step": 25486 }, { "epoch": 0.7441242591457183, "grad_norm": 0.5135251727006185, "learning_rate": 1.4215733982157339e-05, "loss": 0.544, "step": 25487 }, { "epoch": 0.7441534553735657, "grad_norm": 0.5417769835981348, "learning_rate": 1.421411192214112e-05, "loss": 0.5945, "step": 25488 }, { "epoch": 0.7441826516014131, "grad_norm": 0.5584379036763988, "learning_rate": 1.42124898621249e-05, "loss": 0.5936, "step": 25489 }, { "epoch": 0.7442118478292604, "grad_norm": 0.5226732382595318, "learning_rate": 1.4210867802108679e-05, "loss": 0.5796, "step": 25490 }, { "epoch": 0.7442410440571078, "grad_norm": 0.5174524003250592, "learning_rate": 1.4209245742092459e-05, "loss": 0.6138, "step": 25491 }, { "epoch": 0.7442702402849551, "grad_norm": 0.5804205943346812, "learning_rate": 1.4207623682076237e-05, "loss": 0.649, "step": 25492 }, { "epoch": 0.7442994365128025, "grad_norm": 0.5715483058198875, "learning_rate": 1.4206001622060018e-05, "loss": 0.6734, "step": 25493 }, { "epoch": 0.7443286327406499, "grad_norm": 0.5470407289573157, "learning_rate": 1.4204379562043796e-05, "loss": 0.6731, "step": 25494 }, { "epoch": 0.7443578289684972, "grad_norm": 0.5542808448392524, "learning_rate": 1.4202757502027574e-05, "loss": 0.6524, "step": 25495 }, { "epoch": 0.7443870251963446, "grad_norm": 0.5367976164409329, "learning_rate": 1.4201135442011354e-05, "loss": 0.5799, "step": 25496 }, { "epoch": 0.744416221424192, "grad_norm": 0.5406357788979359, "learning_rate": 1.4199513381995133e-05, "loss": 0.6016, "step": 25497 }, { "epoch": 0.7444454176520393, "grad_norm": 0.5459143696544281, "learning_rate": 1.4197891321978915e-05, "loss": 0.604, "step": 25498 }, { "epoch": 0.7444746138798867, "grad_norm": 0.5106528643117199, "learning_rate": 1.4196269261962695e-05, "loss": 0.5548, "step": 25499 }, { "epoch": 0.744503810107734, "grad_norm": 0.5461431524799474, "learning_rate": 1.4194647201946473e-05, "loss": 0.619, "step": 25500 }, { "epoch": 0.7445330063355814, "grad_norm": 0.4880401059653556, "learning_rate": 1.4193025141930253e-05, "loss": 0.5341, "step": 25501 }, { "epoch": 0.7445622025634288, "grad_norm": 0.5432386524911476, "learning_rate": 1.4191403081914031e-05, "loss": 0.6385, "step": 25502 }, { "epoch": 0.7445913987912761, "grad_norm": 0.531845086317139, "learning_rate": 1.418978102189781e-05, "loss": 0.592, "step": 25503 }, { "epoch": 0.7446205950191235, "grad_norm": 0.487913287221126, "learning_rate": 1.418815896188159e-05, "loss": 0.4986, "step": 25504 }, { "epoch": 0.7446497912469708, "grad_norm": 0.5511864079624416, "learning_rate": 1.4186536901865368e-05, "loss": 0.6106, "step": 25505 }, { "epoch": 0.7446789874748182, "grad_norm": 0.5325951143003365, "learning_rate": 1.4184914841849148e-05, "loss": 0.6066, "step": 25506 }, { "epoch": 0.7447081837026656, "grad_norm": 0.5013211370341311, "learning_rate": 1.418329278183293e-05, "loss": 0.5248, "step": 25507 }, { "epoch": 0.744737379930513, "grad_norm": 0.5444266430871231, "learning_rate": 1.4181670721816708e-05, "loss": 0.6135, "step": 25508 }, { "epoch": 0.7447665761583604, "grad_norm": 0.4710700398364878, "learning_rate": 1.4180048661800488e-05, "loss": 0.5004, "step": 25509 }, { "epoch": 0.7447957723862078, "grad_norm": 0.5508551454515993, "learning_rate": 1.4178426601784267e-05, "loss": 0.6091, "step": 25510 }, { "epoch": 0.7448249686140551, "grad_norm": 0.5393173841044763, "learning_rate": 1.4176804541768047e-05, "loss": 0.631, "step": 25511 }, { "epoch": 0.7448541648419025, "grad_norm": 0.4948993169993416, "learning_rate": 1.4175182481751825e-05, "loss": 0.5064, "step": 25512 }, { "epoch": 0.7448833610697498, "grad_norm": 0.5934896483416511, "learning_rate": 1.4173560421735604e-05, "loss": 0.6317, "step": 25513 }, { "epoch": 0.7449125572975972, "grad_norm": 0.5645240951197852, "learning_rate": 1.4171938361719384e-05, "loss": 0.6547, "step": 25514 }, { "epoch": 0.7449417535254446, "grad_norm": 0.4965026607661528, "learning_rate": 1.4170316301703162e-05, "loss": 0.5257, "step": 25515 }, { "epoch": 0.7449709497532919, "grad_norm": 0.5635151992059514, "learning_rate": 1.4168694241686944e-05, "loss": 0.6397, "step": 25516 }, { "epoch": 0.7450001459811393, "grad_norm": 0.49840586140527937, "learning_rate": 1.4167072181670724e-05, "loss": 0.5492, "step": 25517 }, { "epoch": 0.7450293422089866, "grad_norm": 0.4944141146133801, "learning_rate": 1.4165450121654502e-05, "loss": 0.5441, "step": 25518 }, { "epoch": 0.745058538436834, "grad_norm": 0.5015384022413737, "learning_rate": 1.4163828061638282e-05, "loss": 0.5601, "step": 25519 }, { "epoch": 0.7450877346646814, "grad_norm": 0.554655430169559, "learning_rate": 1.416220600162206e-05, "loss": 0.6879, "step": 25520 }, { "epoch": 0.7451169308925287, "grad_norm": 0.5203358212253526, "learning_rate": 1.416058394160584e-05, "loss": 0.579, "step": 25521 }, { "epoch": 0.7451461271203761, "grad_norm": 0.49643216999985645, "learning_rate": 1.415896188158962e-05, "loss": 0.5888, "step": 25522 }, { "epoch": 0.7451753233482235, "grad_norm": 0.4696890461668216, "learning_rate": 1.4157339821573398e-05, "loss": 0.5231, "step": 25523 }, { "epoch": 0.7452045195760708, "grad_norm": 0.49824073730772134, "learning_rate": 1.4155717761557178e-05, "loss": 0.5266, "step": 25524 }, { "epoch": 0.7452337158039182, "grad_norm": 0.5164443260287033, "learning_rate": 1.4154095701540956e-05, "loss": 0.5394, "step": 25525 }, { "epoch": 0.7452629120317655, "grad_norm": 0.5149501807073019, "learning_rate": 1.4152473641524738e-05, "loss": 0.5922, "step": 25526 }, { "epoch": 0.7452921082596129, "grad_norm": 0.5289114990475791, "learning_rate": 1.4150851581508518e-05, "loss": 0.6314, "step": 25527 }, { "epoch": 0.7453213044874603, "grad_norm": 0.5249809410228112, "learning_rate": 1.4149229521492296e-05, "loss": 0.6033, "step": 25528 }, { "epoch": 0.7453505007153076, "grad_norm": 0.47561943785419236, "learning_rate": 1.4147607461476076e-05, "loss": 0.5288, "step": 25529 }, { "epoch": 0.745379696943155, "grad_norm": 0.4906599155670002, "learning_rate": 1.4145985401459855e-05, "loss": 0.552, "step": 25530 }, { "epoch": 0.7454088931710023, "grad_norm": 0.48653510191827404, "learning_rate": 1.4144363341443633e-05, "loss": 0.5288, "step": 25531 }, { "epoch": 0.7454380893988497, "grad_norm": 0.5370105907947039, "learning_rate": 1.4142741281427413e-05, "loss": 0.6398, "step": 25532 }, { "epoch": 0.7454672856266971, "grad_norm": 0.5475695040789145, "learning_rate": 1.4141119221411191e-05, "loss": 0.6287, "step": 25533 }, { "epoch": 0.7454964818545444, "grad_norm": 0.5217291702331529, "learning_rate": 1.4139497161394972e-05, "loss": 0.6367, "step": 25534 }, { "epoch": 0.7455256780823918, "grad_norm": 0.5222121104037826, "learning_rate": 1.4137875101378753e-05, "loss": 0.6155, "step": 25535 }, { "epoch": 0.7455548743102391, "grad_norm": 0.49978364159841265, "learning_rate": 1.4136253041362532e-05, "loss": 0.54, "step": 25536 }, { "epoch": 0.7455840705380865, "grad_norm": 0.5267003243055904, "learning_rate": 1.4134630981346312e-05, "loss": 0.6245, "step": 25537 }, { "epoch": 0.7456132667659339, "grad_norm": 0.49708040063906467, "learning_rate": 1.413300892133009e-05, "loss": 0.5369, "step": 25538 }, { "epoch": 0.7456424629937812, "grad_norm": 0.5501315080062769, "learning_rate": 1.413138686131387e-05, "loss": 0.6444, "step": 25539 }, { "epoch": 0.7456716592216286, "grad_norm": 0.5683135638523278, "learning_rate": 1.4129764801297649e-05, "loss": 0.6085, "step": 25540 }, { "epoch": 0.745700855449476, "grad_norm": 0.5362370887556747, "learning_rate": 1.4128142741281427e-05, "loss": 0.5904, "step": 25541 }, { "epoch": 0.7457300516773233, "grad_norm": 0.5198040074638391, "learning_rate": 1.4126520681265207e-05, "loss": 0.5964, "step": 25542 }, { "epoch": 0.7457592479051707, "grad_norm": 0.5099064754707704, "learning_rate": 1.4124898621248985e-05, "loss": 0.5656, "step": 25543 }, { "epoch": 0.745788444133018, "grad_norm": 0.5390626217081719, "learning_rate": 1.4123276561232765e-05, "loss": 0.6392, "step": 25544 }, { "epoch": 0.7458176403608654, "grad_norm": 0.5473711405223846, "learning_rate": 1.4121654501216547e-05, "loss": 0.6029, "step": 25545 }, { "epoch": 0.7458468365887128, "grad_norm": 0.551105316117328, "learning_rate": 1.4120032441200326e-05, "loss": 0.6361, "step": 25546 }, { "epoch": 0.7458760328165601, "grad_norm": 0.5671693321549723, "learning_rate": 1.4118410381184106e-05, "loss": 0.6189, "step": 25547 }, { "epoch": 0.7459052290444075, "grad_norm": 0.5678117952643478, "learning_rate": 1.4116788321167884e-05, "loss": 0.7282, "step": 25548 }, { "epoch": 0.7459344252722548, "grad_norm": 0.5492091899450361, "learning_rate": 1.4115166261151664e-05, "loss": 0.6531, "step": 25549 }, { "epoch": 0.7459636215001022, "grad_norm": 0.5113402756222631, "learning_rate": 1.4113544201135442e-05, "loss": 0.5734, "step": 25550 }, { "epoch": 0.7459928177279496, "grad_norm": 0.5250168422281569, "learning_rate": 1.411192214111922e-05, "loss": 0.5783, "step": 25551 }, { "epoch": 0.7460220139557969, "grad_norm": 0.5130381665510585, "learning_rate": 1.4110300081103001e-05, "loss": 0.5847, "step": 25552 }, { "epoch": 0.7460512101836443, "grad_norm": 0.5535629713759865, "learning_rate": 1.410867802108678e-05, "loss": 0.6488, "step": 25553 }, { "epoch": 0.7460804064114916, "grad_norm": 0.5523174697718355, "learning_rate": 1.4107055961070561e-05, "loss": 0.5264, "step": 25554 }, { "epoch": 0.746109602639339, "grad_norm": 0.5185722230883544, "learning_rate": 1.4105433901054341e-05, "loss": 0.5527, "step": 25555 }, { "epoch": 0.7461387988671864, "grad_norm": 0.5168859562342062, "learning_rate": 1.410381184103812e-05, "loss": 0.5749, "step": 25556 }, { "epoch": 0.7461679950950337, "grad_norm": 0.5670787124113901, "learning_rate": 1.41021897810219e-05, "loss": 0.6144, "step": 25557 }, { "epoch": 0.7461971913228811, "grad_norm": 0.5211309981230151, "learning_rate": 1.4100567721005678e-05, "loss": 0.5825, "step": 25558 }, { "epoch": 0.7462263875507285, "grad_norm": 0.5882812913970744, "learning_rate": 1.4098945660989456e-05, "loss": 0.678, "step": 25559 }, { "epoch": 0.7462555837785758, "grad_norm": 0.5125955000681992, "learning_rate": 1.4097323600973236e-05, "loss": 0.581, "step": 25560 }, { "epoch": 0.7462847800064232, "grad_norm": 0.5027078742836516, "learning_rate": 1.4095701540957015e-05, "loss": 0.5586, "step": 25561 }, { "epoch": 0.7463139762342705, "grad_norm": 0.5113585380692284, "learning_rate": 1.4094079480940795e-05, "loss": 0.609, "step": 25562 }, { "epoch": 0.7463431724621179, "grad_norm": 0.5214812950509811, "learning_rate": 1.4092457420924573e-05, "loss": 0.6106, "step": 25563 }, { "epoch": 0.7463723686899653, "grad_norm": 0.5300632751488372, "learning_rate": 1.4090835360908355e-05, "loss": 0.6377, "step": 25564 }, { "epoch": 0.7464015649178126, "grad_norm": 0.529855192253312, "learning_rate": 1.4089213300892135e-05, "loss": 0.5726, "step": 25565 }, { "epoch": 0.74643076114566, "grad_norm": 0.5576831008402497, "learning_rate": 1.4087591240875913e-05, "loss": 0.6326, "step": 25566 }, { "epoch": 0.7464599573735073, "grad_norm": 0.5058742951991138, "learning_rate": 1.4085969180859693e-05, "loss": 0.622, "step": 25567 }, { "epoch": 0.7464891536013547, "grad_norm": 0.4895358825362258, "learning_rate": 1.4084347120843472e-05, "loss": 0.5334, "step": 25568 }, { "epoch": 0.7465183498292021, "grad_norm": 0.49823510104731966, "learning_rate": 1.408272506082725e-05, "loss": 0.5491, "step": 25569 }, { "epoch": 0.7465475460570494, "grad_norm": 0.5173834752649223, "learning_rate": 1.408110300081103e-05, "loss": 0.5754, "step": 25570 }, { "epoch": 0.7465767422848968, "grad_norm": 0.5085751340809413, "learning_rate": 1.4079480940794809e-05, "loss": 0.5458, "step": 25571 }, { "epoch": 0.7466059385127441, "grad_norm": 0.4931769912846362, "learning_rate": 1.4077858880778589e-05, "loss": 0.5242, "step": 25572 }, { "epoch": 0.7466351347405915, "grad_norm": 0.5471522177582269, "learning_rate": 1.407623682076237e-05, "loss": 0.6122, "step": 25573 }, { "epoch": 0.7466643309684389, "grad_norm": 0.546797883979654, "learning_rate": 1.4074614760746149e-05, "loss": 0.6223, "step": 25574 }, { "epoch": 0.7466935271962862, "grad_norm": 0.5756634803068258, "learning_rate": 1.4072992700729929e-05, "loss": 0.5761, "step": 25575 }, { "epoch": 0.7467227234241336, "grad_norm": 0.4967748672248444, "learning_rate": 1.4071370640713707e-05, "loss": 0.5295, "step": 25576 }, { "epoch": 0.746751919651981, "grad_norm": 0.536577142360865, "learning_rate": 1.4069748580697487e-05, "loss": 0.5752, "step": 25577 }, { "epoch": 0.7467811158798283, "grad_norm": 0.4928444726427896, "learning_rate": 1.4068126520681266e-05, "loss": 0.5633, "step": 25578 }, { "epoch": 0.7468103121076757, "grad_norm": 0.49512794679594563, "learning_rate": 1.4066504460665044e-05, "loss": 0.5774, "step": 25579 }, { "epoch": 0.746839508335523, "grad_norm": 0.5239471017018532, "learning_rate": 1.4064882400648824e-05, "loss": 0.6095, "step": 25580 }, { "epoch": 0.7468687045633704, "grad_norm": 0.524622559679552, "learning_rate": 1.4063260340632603e-05, "loss": 0.5767, "step": 25581 }, { "epoch": 0.7468979007912178, "grad_norm": 0.5397077318323891, "learning_rate": 1.4061638280616383e-05, "loss": 0.5962, "step": 25582 }, { "epoch": 0.7469270970190651, "grad_norm": 0.5206670649265241, "learning_rate": 1.4060016220600164e-05, "loss": 0.5725, "step": 25583 }, { "epoch": 0.7469562932469125, "grad_norm": 0.5479258478298531, "learning_rate": 1.4058394160583943e-05, "loss": 0.6444, "step": 25584 }, { "epoch": 0.7469854894747598, "grad_norm": 0.46015947441701327, "learning_rate": 1.4056772100567723e-05, "loss": 0.507, "step": 25585 }, { "epoch": 0.7470146857026072, "grad_norm": 0.5227954062417208, "learning_rate": 1.4055150040551501e-05, "loss": 0.5636, "step": 25586 }, { "epoch": 0.7470438819304546, "grad_norm": 0.5098062105874746, "learning_rate": 1.405352798053528e-05, "loss": 0.5618, "step": 25587 }, { "epoch": 0.7470730781583019, "grad_norm": 0.5376380156381856, "learning_rate": 1.405190592051906e-05, "loss": 0.6062, "step": 25588 }, { "epoch": 0.7471022743861493, "grad_norm": 0.5084579716870572, "learning_rate": 1.4050283860502838e-05, "loss": 0.5562, "step": 25589 }, { "epoch": 0.7471314706139967, "grad_norm": 0.5114769958937228, "learning_rate": 1.4048661800486618e-05, "loss": 0.5317, "step": 25590 }, { "epoch": 0.747160666841844, "grad_norm": 0.5305513417555622, "learning_rate": 1.4047039740470396e-05, "loss": 0.5903, "step": 25591 }, { "epoch": 0.7471898630696914, "grad_norm": 0.5080226592272103, "learning_rate": 1.4045417680454178e-05, "loss": 0.5481, "step": 25592 }, { "epoch": 0.7472190592975387, "grad_norm": 0.501939420805246, "learning_rate": 1.4043795620437958e-05, "loss": 0.5601, "step": 25593 }, { "epoch": 0.7472482555253861, "grad_norm": 0.5290498041829196, "learning_rate": 1.4042173560421737e-05, "loss": 0.6309, "step": 25594 }, { "epoch": 0.7472774517532335, "grad_norm": 0.4826193300516124, "learning_rate": 1.4040551500405517e-05, "loss": 0.5102, "step": 25595 }, { "epoch": 0.7473066479810808, "grad_norm": 0.5148202011282579, "learning_rate": 1.4038929440389295e-05, "loss": 0.5633, "step": 25596 }, { "epoch": 0.7473358442089282, "grad_norm": 0.5287508054315564, "learning_rate": 1.4037307380373073e-05, "loss": 0.6063, "step": 25597 }, { "epoch": 0.7473650404367755, "grad_norm": 0.5460512535899543, "learning_rate": 1.4035685320356854e-05, "loss": 0.6348, "step": 25598 }, { "epoch": 0.7473942366646229, "grad_norm": 0.584521467908697, "learning_rate": 1.4034063260340632e-05, "loss": 0.6799, "step": 25599 }, { "epoch": 0.7474234328924703, "grad_norm": 0.495262625553884, "learning_rate": 1.4032441200324412e-05, "loss": 0.5392, "step": 25600 }, { "epoch": 0.7474526291203176, "grad_norm": 0.5283750755520946, "learning_rate": 1.4030819140308194e-05, "loss": 0.6082, "step": 25601 }, { "epoch": 0.747481825348165, "grad_norm": 0.5531474136531694, "learning_rate": 1.4029197080291972e-05, "loss": 0.6189, "step": 25602 }, { "epoch": 0.7475110215760123, "grad_norm": 0.5383638749699324, "learning_rate": 1.4027575020275752e-05, "loss": 0.6528, "step": 25603 }, { "epoch": 0.7475402178038597, "grad_norm": 0.5253526654891084, "learning_rate": 1.402595296025953e-05, "loss": 0.5627, "step": 25604 }, { "epoch": 0.7475694140317071, "grad_norm": 0.5499173384093009, "learning_rate": 1.402433090024331e-05, "loss": 0.6065, "step": 25605 }, { "epoch": 0.7475986102595544, "grad_norm": 0.5138060426325136, "learning_rate": 1.4022708840227089e-05, "loss": 0.5957, "step": 25606 }, { "epoch": 0.7476278064874018, "grad_norm": 0.5151457774828077, "learning_rate": 1.4021086780210867e-05, "loss": 0.5849, "step": 25607 }, { "epoch": 0.7476570027152492, "grad_norm": 0.5135937645803088, "learning_rate": 1.4019464720194647e-05, "loss": 0.556, "step": 25608 }, { "epoch": 0.7476861989430965, "grad_norm": 0.5287572783905666, "learning_rate": 1.4017842660178426e-05, "loss": 0.5848, "step": 25609 }, { "epoch": 0.7477153951709439, "grad_norm": 0.5261113826751608, "learning_rate": 1.4016220600162206e-05, "loss": 0.5617, "step": 25610 }, { "epoch": 0.7477445913987912, "grad_norm": 0.49672149053903486, "learning_rate": 1.4014598540145988e-05, "loss": 0.5689, "step": 25611 }, { "epoch": 0.7477737876266386, "grad_norm": 0.56219259404091, "learning_rate": 1.4012976480129766e-05, "loss": 0.6495, "step": 25612 }, { "epoch": 0.747802983854486, "grad_norm": 0.548386845516242, "learning_rate": 1.4011354420113546e-05, "loss": 0.6493, "step": 25613 }, { "epoch": 0.7478321800823333, "grad_norm": 0.5427454065779839, "learning_rate": 1.4009732360097324e-05, "loss": 0.6399, "step": 25614 }, { "epoch": 0.7478613763101807, "grad_norm": 0.49448718648668594, "learning_rate": 1.4008110300081105e-05, "loss": 0.5565, "step": 25615 }, { "epoch": 0.747890572538028, "grad_norm": 0.5113464921957434, "learning_rate": 1.4006488240064883e-05, "loss": 0.5723, "step": 25616 }, { "epoch": 0.7479197687658754, "grad_norm": 0.5228104359574292, "learning_rate": 1.4004866180048661e-05, "loss": 0.6058, "step": 25617 }, { "epoch": 0.7479489649937228, "grad_norm": 0.5073416382369853, "learning_rate": 1.4003244120032441e-05, "loss": 0.5601, "step": 25618 }, { "epoch": 0.7479781612215701, "grad_norm": 0.5340369989972409, "learning_rate": 1.400162206001622e-05, "loss": 0.6024, "step": 25619 }, { "epoch": 0.7480073574494175, "grad_norm": 0.5190811780942426, "learning_rate": 1.4000000000000001e-05, "loss": 0.5741, "step": 25620 }, { "epoch": 0.7480365536772648, "grad_norm": 0.5306493639402755, "learning_rate": 1.3998377939983782e-05, "loss": 0.565, "step": 25621 }, { "epoch": 0.7480657499051122, "grad_norm": 0.5105943772856522, "learning_rate": 1.399675587996756e-05, "loss": 0.559, "step": 25622 }, { "epoch": 0.7480949461329596, "grad_norm": 0.53154255732331, "learning_rate": 1.399513381995134e-05, "loss": 0.6234, "step": 25623 }, { "epoch": 0.7481241423608069, "grad_norm": 0.5374931897984387, "learning_rate": 1.3993511759935118e-05, "loss": 0.5925, "step": 25624 }, { "epoch": 0.7481533385886543, "grad_norm": 0.542244934639271, "learning_rate": 1.3991889699918897e-05, "loss": 0.625, "step": 25625 }, { "epoch": 0.7481825348165017, "grad_norm": 0.5159332743152534, "learning_rate": 1.3990267639902677e-05, "loss": 0.6143, "step": 25626 }, { "epoch": 0.748211731044349, "grad_norm": 0.5028837766239794, "learning_rate": 1.3988645579886455e-05, "loss": 0.5196, "step": 25627 }, { "epoch": 0.7482409272721965, "grad_norm": 0.5028101457954941, "learning_rate": 1.3987023519870235e-05, "loss": 0.5349, "step": 25628 }, { "epoch": 0.7482701235000438, "grad_norm": 0.5149936327993745, "learning_rate": 1.3985401459854014e-05, "loss": 0.5754, "step": 25629 }, { "epoch": 0.7482993197278912, "grad_norm": 0.5458969611109803, "learning_rate": 1.3983779399837795e-05, "loss": 0.6032, "step": 25630 }, { "epoch": 0.7483285159557386, "grad_norm": 0.5525489441022703, "learning_rate": 1.3982157339821575e-05, "loss": 0.614, "step": 25631 }, { "epoch": 0.7483577121835859, "grad_norm": 0.5299494422023152, "learning_rate": 1.3980535279805354e-05, "loss": 0.5909, "step": 25632 }, { "epoch": 0.7483869084114333, "grad_norm": 0.517353772899846, "learning_rate": 1.3978913219789134e-05, "loss": 0.5729, "step": 25633 }, { "epoch": 0.7484161046392807, "grad_norm": 0.5659520642717913, "learning_rate": 1.3977291159772912e-05, "loss": 0.6866, "step": 25634 }, { "epoch": 0.748445300867128, "grad_norm": 0.5349434079939963, "learning_rate": 1.397566909975669e-05, "loss": 0.5616, "step": 25635 }, { "epoch": 0.7484744970949754, "grad_norm": 0.5319341044312432, "learning_rate": 1.397404703974047e-05, "loss": 0.6235, "step": 25636 }, { "epoch": 0.7485036933228227, "grad_norm": 0.49390307708900283, "learning_rate": 1.3972424979724249e-05, "loss": 0.5386, "step": 25637 }, { "epoch": 0.7485328895506701, "grad_norm": 0.5582329876036681, "learning_rate": 1.397080291970803e-05, "loss": 0.6458, "step": 25638 }, { "epoch": 0.7485620857785175, "grad_norm": 0.49077012114716423, "learning_rate": 1.3969180859691811e-05, "loss": 0.5375, "step": 25639 }, { "epoch": 0.7485912820063648, "grad_norm": 0.5118720768208573, "learning_rate": 1.396755879967559e-05, "loss": 0.6086, "step": 25640 }, { "epoch": 0.7486204782342122, "grad_norm": 0.5341141976532824, "learning_rate": 1.396593673965937e-05, "loss": 0.6205, "step": 25641 }, { "epoch": 0.7486496744620595, "grad_norm": 0.5264602591877926, "learning_rate": 1.3964314679643148e-05, "loss": 0.556, "step": 25642 }, { "epoch": 0.7486788706899069, "grad_norm": 0.5332288815234254, "learning_rate": 1.3962692619626928e-05, "loss": 0.6423, "step": 25643 }, { "epoch": 0.7487080669177543, "grad_norm": 0.4980634450705247, "learning_rate": 1.3961070559610706e-05, "loss": 0.5616, "step": 25644 }, { "epoch": 0.7487372631456016, "grad_norm": 0.5366487557955487, "learning_rate": 1.3959448499594485e-05, "loss": 0.5936, "step": 25645 }, { "epoch": 0.748766459373449, "grad_norm": 0.5382108238106166, "learning_rate": 1.3957826439578265e-05, "loss": 0.592, "step": 25646 }, { "epoch": 0.7487956556012964, "grad_norm": 0.5626959110990212, "learning_rate": 1.3956204379562043e-05, "loss": 0.6729, "step": 25647 }, { "epoch": 0.7488248518291437, "grad_norm": 0.4582535561998677, "learning_rate": 1.3954582319545823e-05, "loss": 0.4683, "step": 25648 }, { "epoch": 0.7488540480569911, "grad_norm": 0.5555666048307815, "learning_rate": 1.3952960259529605e-05, "loss": 0.6165, "step": 25649 }, { "epoch": 0.7488832442848384, "grad_norm": 0.5003646057786711, "learning_rate": 1.3951338199513383e-05, "loss": 0.5468, "step": 25650 }, { "epoch": 0.7489124405126858, "grad_norm": 0.5189330816873119, "learning_rate": 1.3949716139497163e-05, "loss": 0.5702, "step": 25651 }, { "epoch": 0.7489416367405332, "grad_norm": 0.5523591649533786, "learning_rate": 1.3948094079480942e-05, "loss": 0.6274, "step": 25652 }, { "epoch": 0.7489708329683805, "grad_norm": 0.519169830637834, "learning_rate": 1.394647201946472e-05, "loss": 0.5765, "step": 25653 }, { "epoch": 0.7490000291962279, "grad_norm": 0.5403963117915087, "learning_rate": 1.39448499594485e-05, "loss": 0.5757, "step": 25654 }, { "epoch": 0.7490292254240752, "grad_norm": 0.5109438768410696, "learning_rate": 1.3943227899432278e-05, "loss": 0.5809, "step": 25655 }, { "epoch": 0.7490584216519226, "grad_norm": 0.5338989065862162, "learning_rate": 1.3941605839416059e-05, "loss": 0.6202, "step": 25656 }, { "epoch": 0.74908761787977, "grad_norm": 0.5665749699338494, "learning_rate": 1.3939983779399837e-05, "loss": 0.6486, "step": 25657 }, { "epoch": 0.7491168141076173, "grad_norm": 0.4978875102952611, "learning_rate": 1.3938361719383619e-05, "loss": 0.5411, "step": 25658 }, { "epoch": 0.7491460103354647, "grad_norm": 0.5254383575351806, "learning_rate": 1.3936739659367399e-05, "loss": 0.5652, "step": 25659 }, { "epoch": 0.749175206563312, "grad_norm": 0.5363629439422745, "learning_rate": 1.3935117599351177e-05, "loss": 0.6049, "step": 25660 }, { "epoch": 0.7492044027911594, "grad_norm": 0.5488386499077322, "learning_rate": 1.3933495539334957e-05, "loss": 0.6718, "step": 25661 }, { "epoch": 0.7492335990190068, "grad_norm": 0.5336152217555297, "learning_rate": 1.3931873479318736e-05, "loss": 0.5905, "step": 25662 }, { "epoch": 0.7492627952468541, "grad_norm": 0.5251502586514863, "learning_rate": 1.3930251419302514e-05, "loss": 0.6143, "step": 25663 }, { "epoch": 0.7492919914747015, "grad_norm": 0.5118491835107242, "learning_rate": 1.3928629359286294e-05, "loss": 0.5747, "step": 25664 }, { "epoch": 0.7493211877025489, "grad_norm": 0.5192836502675574, "learning_rate": 1.3927007299270072e-05, "loss": 0.5686, "step": 25665 }, { "epoch": 0.7493503839303962, "grad_norm": 0.4945452777418516, "learning_rate": 1.3925385239253852e-05, "loss": 0.5311, "step": 25666 }, { "epoch": 0.7493795801582436, "grad_norm": 0.508831399963024, "learning_rate": 1.392376317923763e-05, "loss": 0.5637, "step": 25667 }, { "epoch": 0.7494087763860909, "grad_norm": 0.5230028720854374, "learning_rate": 1.3922141119221413e-05, "loss": 0.6129, "step": 25668 }, { "epoch": 0.7494379726139383, "grad_norm": 0.6089340056595026, "learning_rate": 1.3920519059205193e-05, "loss": 0.7553, "step": 25669 }, { "epoch": 0.7494671688417857, "grad_norm": 0.5233524656462276, "learning_rate": 1.3918896999188971e-05, "loss": 0.5845, "step": 25670 }, { "epoch": 0.749496365069633, "grad_norm": 0.5115681706470142, "learning_rate": 1.3917274939172751e-05, "loss": 0.578, "step": 25671 }, { "epoch": 0.7495255612974804, "grad_norm": 0.48549172020929493, "learning_rate": 1.391565287915653e-05, "loss": 0.5342, "step": 25672 }, { "epoch": 0.7495547575253277, "grad_norm": 0.4955580113411167, "learning_rate": 1.3914030819140308e-05, "loss": 0.5276, "step": 25673 }, { "epoch": 0.7495839537531751, "grad_norm": 0.5262705780424906, "learning_rate": 1.3912408759124088e-05, "loss": 0.5509, "step": 25674 }, { "epoch": 0.7496131499810225, "grad_norm": 0.556600883073265, "learning_rate": 1.3910786699107866e-05, "loss": 0.6373, "step": 25675 }, { "epoch": 0.7496423462088698, "grad_norm": 0.5118583885986664, "learning_rate": 1.3909164639091646e-05, "loss": 0.5475, "step": 25676 }, { "epoch": 0.7496715424367172, "grad_norm": 0.5236108616736227, "learning_rate": 1.3907542579075428e-05, "loss": 0.5754, "step": 25677 }, { "epoch": 0.7497007386645645, "grad_norm": 0.4710358030178118, "learning_rate": 1.3905920519059206e-05, "loss": 0.4849, "step": 25678 }, { "epoch": 0.7497299348924119, "grad_norm": 0.5244108121929348, "learning_rate": 1.3904298459042987e-05, "loss": 0.5885, "step": 25679 }, { "epoch": 0.7497591311202593, "grad_norm": 0.5289383648494768, "learning_rate": 1.3902676399026765e-05, "loss": 0.6012, "step": 25680 }, { "epoch": 0.7497883273481066, "grad_norm": 0.5274779191112462, "learning_rate": 1.3901054339010543e-05, "loss": 0.5482, "step": 25681 }, { "epoch": 0.749817523575954, "grad_norm": 0.509062104468129, "learning_rate": 1.3899432278994323e-05, "loss": 0.5485, "step": 25682 }, { "epoch": 0.7498467198038014, "grad_norm": 0.6200388798848531, "learning_rate": 1.3897810218978102e-05, "loss": 0.5852, "step": 25683 }, { "epoch": 0.7498759160316487, "grad_norm": 0.5248157882420105, "learning_rate": 1.3896188158961882e-05, "loss": 0.5762, "step": 25684 }, { "epoch": 0.7499051122594961, "grad_norm": 0.5062743301481128, "learning_rate": 1.389456609894566e-05, "loss": 0.6024, "step": 25685 }, { "epoch": 0.7499343084873434, "grad_norm": 0.5316819786049957, "learning_rate": 1.3892944038929442e-05, "loss": 0.6448, "step": 25686 }, { "epoch": 0.7499635047151908, "grad_norm": 0.5159649800801941, "learning_rate": 1.3891321978913222e-05, "loss": 0.5108, "step": 25687 }, { "epoch": 0.7499927009430382, "grad_norm": 0.5158574619486617, "learning_rate": 1.3889699918897e-05, "loss": 0.5975, "step": 25688 }, { "epoch": 0.7500218971708855, "grad_norm": 0.5269027985953818, "learning_rate": 1.388807785888078e-05, "loss": 0.5704, "step": 25689 }, { "epoch": 0.7500510933987329, "grad_norm": 0.5155749970949541, "learning_rate": 1.3886455798864559e-05, "loss": 0.5885, "step": 25690 }, { "epoch": 0.7500802896265802, "grad_norm": 0.5338588369853327, "learning_rate": 1.3884833738848337e-05, "loss": 0.6377, "step": 25691 }, { "epoch": 0.7501094858544276, "grad_norm": 0.5686774469716737, "learning_rate": 1.3883211678832117e-05, "loss": 0.6563, "step": 25692 }, { "epoch": 0.750138682082275, "grad_norm": 0.5730818283402425, "learning_rate": 1.3881589618815896e-05, "loss": 0.6635, "step": 25693 }, { "epoch": 0.7501678783101223, "grad_norm": 0.5226487936898346, "learning_rate": 1.3879967558799676e-05, "loss": 0.5999, "step": 25694 }, { "epoch": 0.7501970745379697, "grad_norm": 0.5253356049497883, "learning_rate": 1.3878345498783454e-05, "loss": 0.6071, "step": 25695 }, { "epoch": 0.750226270765817, "grad_norm": 0.5350621921561458, "learning_rate": 1.3876723438767236e-05, "loss": 0.5877, "step": 25696 }, { "epoch": 0.7502554669936644, "grad_norm": 0.5621792864226395, "learning_rate": 1.3875101378751016e-05, "loss": 0.6183, "step": 25697 }, { "epoch": 0.7502846632215118, "grad_norm": 0.5371256315899418, "learning_rate": 1.3873479318734794e-05, "loss": 0.6492, "step": 25698 }, { "epoch": 0.7503138594493591, "grad_norm": 0.5136098607194521, "learning_rate": 1.3871857258718574e-05, "loss": 0.5718, "step": 25699 }, { "epoch": 0.7503430556772065, "grad_norm": 0.5121326906496548, "learning_rate": 1.3870235198702353e-05, "loss": 0.5824, "step": 25700 }, { "epoch": 0.7503722519050539, "grad_norm": 0.5264051247061708, "learning_rate": 1.3868613138686131e-05, "loss": 0.5866, "step": 25701 }, { "epoch": 0.7504014481329012, "grad_norm": 0.5447661221163145, "learning_rate": 1.3866991078669911e-05, "loss": 0.6289, "step": 25702 }, { "epoch": 0.7504306443607486, "grad_norm": 0.5080673576071129, "learning_rate": 1.386536901865369e-05, "loss": 0.5324, "step": 25703 }, { "epoch": 0.7504598405885959, "grad_norm": 0.5135270927890329, "learning_rate": 1.386374695863747e-05, "loss": 0.5587, "step": 25704 }, { "epoch": 0.7504890368164433, "grad_norm": 0.5258385008848621, "learning_rate": 1.3862124898621251e-05, "loss": 0.5854, "step": 25705 }, { "epoch": 0.7505182330442907, "grad_norm": 0.5661670224502832, "learning_rate": 1.386050283860503e-05, "loss": 0.6629, "step": 25706 }, { "epoch": 0.750547429272138, "grad_norm": 0.5008539771149589, "learning_rate": 1.385888077858881e-05, "loss": 0.5578, "step": 25707 }, { "epoch": 0.7505766254999854, "grad_norm": 0.5240615783735274, "learning_rate": 1.3857258718572588e-05, "loss": 0.5704, "step": 25708 }, { "epoch": 0.7506058217278327, "grad_norm": 0.5276849017816463, "learning_rate": 1.3855636658556367e-05, "loss": 0.6111, "step": 25709 }, { "epoch": 0.7506350179556801, "grad_norm": 0.5010548464562257, "learning_rate": 1.3854014598540147e-05, "loss": 0.5387, "step": 25710 }, { "epoch": 0.7506642141835275, "grad_norm": 0.4770364012955883, "learning_rate": 1.3852392538523925e-05, "loss": 0.525, "step": 25711 }, { "epoch": 0.7506934104113748, "grad_norm": 0.5121716662728915, "learning_rate": 1.3850770478507705e-05, "loss": 0.5893, "step": 25712 }, { "epoch": 0.7507226066392222, "grad_norm": 0.498485324329278, "learning_rate": 1.3849148418491483e-05, "loss": 0.5508, "step": 25713 }, { "epoch": 0.7507518028670696, "grad_norm": 0.6052168343643117, "learning_rate": 1.3847526358475264e-05, "loss": 0.7461, "step": 25714 }, { "epoch": 0.7507809990949169, "grad_norm": 0.5273828264258537, "learning_rate": 1.3845904298459045e-05, "loss": 0.5833, "step": 25715 }, { "epoch": 0.7508101953227643, "grad_norm": 0.5134864984930164, "learning_rate": 1.3844282238442824e-05, "loss": 0.5806, "step": 25716 }, { "epoch": 0.7508393915506116, "grad_norm": 0.4773862063576103, "learning_rate": 1.3842660178426604e-05, "loss": 0.5272, "step": 25717 }, { "epoch": 0.750868587778459, "grad_norm": 0.5132981247586045, "learning_rate": 1.3841038118410382e-05, "loss": 0.5554, "step": 25718 }, { "epoch": 0.7508977840063064, "grad_norm": 0.5310921109302957, "learning_rate": 1.383941605839416e-05, "loss": 0.5932, "step": 25719 }, { "epoch": 0.7509269802341537, "grad_norm": 0.540346832746332, "learning_rate": 1.383779399837794e-05, "loss": 0.6156, "step": 25720 }, { "epoch": 0.7509561764620011, "grad_norm": 0.5345249116646786, "learning_rate": 1.3836171938361719e-05, "loss": 0.5708, "step": 25721 }, { "epoch": 0.7509853726898484, "grad_norm": 0.5629085455626268, "learning_rate": 1.3834549878345499e-05, "loss": 0.7033, "step": 25722 }, { "epoch": 0.7510145689176958, "grad_norm": 0.4808755637999681, "learning_rate": 1.3832927818329277e-05, "loss": 0.4916, "step": 25723 }, { "epoch": 0.7510437651455432, "grad_norm": 0.53767137626167, "learning_rate": 1.3831305758313059e-05, "loss": 0.5867, "step": 25724 }, { "epoch": 0.7510729613733905, "grad_norm": 0.5935461216655981, "learning_rate": 1.382968369829684e-05, "loss": 0.6709, "step": 25725 }, { "epoch": 0.7511021576012379, "grad_norm": 0.5499864814879881, "learning_rate": 1.3828061638280618e-05, "loss": 0.6272, "step": 25726 }, { "epoch": 0.7511313538290852, "grad_norm": 0.5371787974739964, "learning_rate": 1.3826439578264398e-05, "loss": 0.5975, "step": 25727 }, { "epoch": 0.7511605500569326, "grad_norm": 0.5650894567234872, "learning_rate": 1.3824817518248176e-05, "loss": 0.6877, "step": 25728 }, { "epoch": 0.75118974628478, "grad_norm": 0.5444693735897845, "learning_rate": 1.3823195458231954e-05, "loss": 0.5804, "step": 25729 }, { "epoch": 0.7512189425126273, "grad_norm": 0.5163058955726064, "learning_rate": 1.3821573398215734e-05, "loss": 0.5908, "step": 25730 }, { "epoch": 0.7512481387404747, "grad_norm": 0.5582141967700706, "learning_rate": 1.3819951338199513e-05, "loss": 0.6676, "step": 25731 }, { "epoch": 0.751277334968322, "grad_norm": 0.5466209993989709, "learning_rate": 1.3818329278183293e-05, "loss": 0.6291, "step": 25732 }, { "epoch": 0.7513065311961694, "grad_norm": 0.49540127982189713, "learning_rate": 1.3816707218167071e-05, "loss": 0.554, "step": 25733 }, { "epoch": 0.7513357274240168, "grad_norm": 0.5371217622115277, "learning_rate": 1.3815085158150853e-05, "loss": 0.5741, "step": 25734 }, { "epoch": 0.7513649236518641, "grad_norm": 0.5010756783685715, "learning_rate": 1.3813463098134633e-05, "loss": 0.5645, "step": 25735 }, { "epoch": 0.7513941198797115, "grad_norm": 0.5164687198256154, "learning_rate": 1.3811841038118411e-05, "loss": 0.588, "step": 25736 }, { "epoch": 0.7514233161075589, "grad_norm": 0.5224568426439357, "learning_rate": 1.3810218978102192e-05, "loss": 0.6032, "step": 25737 }, { "epoch": 0.7514525123354062, "grad_norm": 0.5753055454693985, "learning_rate": 1.380859691808597e-05, "loss": 0.6768, "step": 25738 }, { "epoch": 0.7514817085632536, "grad_norm": 0.5208202862585196, "learning_rate": 1.3806974858069748e-05, "loss": 0.5678, "step": 25739 }, { "epoch": 0.7515109047911009, "grad_norm": 0.5513212813882988, "learning_rate": 1.3805352798053528e-05, "loss": 0.6261, "step": 25740 }, { "epoch": 0.7515401010189483, "grad_norm": 0.5085282249740098, "learning_rate": 1.3803730738037307e-05, "loss": 0.5718, "step": 25741 }, { "epoch": 0.7515692972467957, "grad_norm": 0.4955340178347499, "learning_rate": 1.3802108678021087e-05, "loss": 0.5742, "step": 25742 }, { "epoch": 0.751598493474643, "grad_norm": 0.5029859526041944, "learning_rate": 1.3800486618004869e-05, "loss": 0.5505, "step": 25743 }, { "epoch": 0.7516276897024904, "grad_norm": 0.5943384664106371, "learning_rate": 1.3798864557988647e-05, "loss": 0.6969, "step": 25744 }, { "epoch": 0.7516568859303377, "grad_norm": 0.5275720935151362, "learning_rate": 1.3797242497972427e-05, "loss": 0.5746, "step": 25745 }, { "epoch": 0.7516860821581851, "grad_norm": 0.5222386355655989, "learning_rate": 1.3795620437956205e-05, "loss": 0.5732, "step": 25746 }, { "epoch": 0.7517152783860325, "grad_norm": 0.4856188887507462, "learning_rate": 1.3793998377939984e-05, "loss": 0.4747, "step": 25747 }, { "epoch": 0.7517444746138798, "grad_norm": 0.5671614884966738, "learning_rate": 1.3792376317923764e-05, "loss": 0.6698, "step": 25748 }, { "epoch": 0.7517736708417273, "grad_norm": 0.5487980760392969, "learning_rate": 1.3790754257907542e-05, "loss": 0.6413, "step": 25749 }, { "epoch": 0.7518028670695747, "grad_norm": 0.5290635790119265, "learning_rate": 1.3789132197891322e-05, "loss": 0.571, "step": 25750 }, { "epoch": 0.751832063297422, "grad_norm": 0.49972465757529916, "learning_rate": 1.37875101378751e-05, "loss": 0.5504, "step": 25751 }, { "epoch": 0.7518612595252694, "grad_norm": 0.5549637095200455, "learning_rate": 1.3785888077858882e-05, "loss": 0.6368, "step": 25752 }, { "epoch": 0.7518904557531167, "grad_norm": 0.5553466503313577, "learning_rate": 1.3784266017842662e-05, "loss": 0.6191, "step": 25753 }, { "epoch": 0.7519196519809641, "grad_norm": 0.5355197825560206, "learning_rate": 1.378264395782644e-05, "loss": 0.5962, "step": 25754 }, { "epoch": 0.7519488482088115, "grad_norm": 0.48619739628874464, "learning_rate": 1.3781021897810221e-05, "loss": 0.5016, "step": 25755 }, { "epoch": 0.7519780444366588, "grad_norm": 0.5636149387416344, "learning_rate": 1.3779399837794e-05, "loss": 0.626, "step": 25756 }, { "epoch": 0.7520072406645062, "grad_norm": 0.5201819846039036, "learning_rate": 1.3777777777777778e-05, "loss": 0.5669, "step": 25757 }, { "epoch": 0.7520364368923536, "grad_norm": 0.5315028782836766, "learning_rate": 1.3776155717761558e-05, "loss": 0.6363, "step": 25758 }, { "epoch": 0.7520656331202009, "grad_norm": 0.5244317676130685, "learning_rate": 1.3774533657745336e-05, "loss": 0.5781, "step": 25759 }, { "epoch": 0.7520948293480483, "grad_norm": 0.6466600999643368, "learning_rate": 1.3772911597729116e-05, "loss": 0.6927, "step": 25760 }, { "epoch": 0.7521240255758956, "grad_norm": 0.5356825738237561, "learning_rate": 1.3771289537712895e-05, "loss": 0.6366, "step": 25761 }, { "epoch": 0.752153221803743, "grad_norm": 0.49278691288853516, "learning_rate": 1.3769667477696676e-05, "loss": 0.5186, "step": 25762 }, { "epoch": 0.7521824180315904, "grad_norm": 0.5215407543111309, "learning_rate": 1.3768045417680456e-05, "loss": 0.5761, "step": 25763 }, { "epoch": 0.7522116142594377, "grad_norm": 0.5386088862638577, "learning_rate": 1.3766423357664235e-05, "loss": 0.6364, "step": 25764 }, { "epoch": 0.7522408104872851, "grad_norm": 0.5146210994255366, "learning_rate": 1.3764801297648015e-05, "loss": 0.5919, "step": 25765 }, { "epoch": 0.7522700067151324, "grad_norm": 0.5378599099804098, "learning_rate": 1.3763179237631793e-05, "loss": 0.5932, "step": 25766 }, { "epoch": 0.7522992029429798, "grad_norm": 0.5305774642658401, "learning_rate": 1.3761557177615572e-05, "loss": 0.5984, "step": 25767 }, { "epoch": 0.7523283991708272, "grad_norm": 0.5203222321665252, "learning_rate": 1.3759935117599352e-05, "loss": 0.6118, "step": 25768 }, { "epoch": 0.7523575953986745, "grad_norm": 0.5227357530736153, "learning_rate": 1.375831305758313e-05, "loss": 0.5883, "step": 25769 }, { "epoch": 0.7523867916265219, "grad_norm": 0.5595213026754847, "learning_rate": 1.375669099756691e-05, "loss": 0.6461, "step": 25770 }, { "epoch": 0.7524159878543693, "grad_norm": 0.526775013058693, "learning_rate": 1.3755068937550692e-05, "loss": 0.5569, "step": 25771 }, { "epoch": 0.7524451840822166, "grad_norm": 0.5931772122860035, "learning_rate": 1.375344687753447e-05, "loss": 0.6581, "step": 25772 }, { "epoch": 0.752474380310064, "grad_norm": 0.5180125352605435, "learning_rate": 1.375182481751825e-05, "loss": 0.6201, "step": 25773 }, { "epoch": 0.7525035765379113, "grad_norm": 0.5666496221874289, "learning_rate": 1.3750202757502029e-05, "loss": 0.6798, "step": 25774 }, { "epoch": 0.7525327727657587, "grad_norm": 0.5616312525041639, "learning_rate": 1.3748580697485807e-05, "loss": 0.6394, "step": 25775 }, { "epoch": 0.7525619689936061, "grad_norm": 0.4791917347801617, "learning_rate": 1.3746958637469587e-05, "loss": 0.4993, "step": 25776 }, { "epoch": 0.7525911652214534, "grad_norm": 0.5237382845781964, "learning_rate": 1.3745336577453365e-05, "loss": 0.5591, "step": 25777 }, { "epoch": 0.7526203614493008, "grad_norm": 0.5345773487646478, "learning_rate": 1.3743714517437146e-05, "loss": 0.6128, "step": 25778 }, { "epoch": 0.7526495576771481, "grad_norm": 0.5394168279842861, "learning_rate": 1.3742092457420924e-05, "loss": 0.6103, "step": 25779 }, { "epoch": 0.7526787539049955, "grad_norm": 0.5137422175654794, "learning_rate": 1.3740470397404704e-05, "loss": 0.5851, "step": 25780 }, { "epoch": 0.7527079501328429, "grad_norm": 0.527688013228303, "learning_rate": 1.3738848337388486e-05, "loss": 0.5663, "step": 25781 }, { "epoch": 0.7527371463606902, "grad_norm": 0.5979336558877699, "learning_rate": 1.3737226277372264e-05, "loss": 0.5826, "step": 25782 }, { "epoch": 0.7527663425885376, "grad_norm": 0.577063485581885, "learning_rate": 1.3735604217356044e-05, "loss": 0.6196, "step": 25783 }, { "epoch": 0.752795538816385, "grad_norm": 0.5003918179144794, "learning_rate": 1.3733982157339823e-05, "loss": 0.5534, "step": 25784 }, { "epoch": 0.7528247350442323, "grad_norm": 0.5127802628214748, "learning_rate": 1.3732360097323601e-05, "loss": 0.5795, "step": 25785 }, { "epoch": 0.7528539312720797, "grad_norm": 0.5345335135479864, "learning_rate": 1.3730738037307381e-05, "loss": 0.6067, "step": 25786 }, { "epoch": 0.752883127499927, "grad_norm": 0.516835360680095, "learning_rate": 1.372911597729116e-05, "loss": 0.5941, "step": 25787 }, { "epoch": 0.7529123237277744, "grad_norm": 0.501601521458153, "learning_rate": 1.372749391727494e-05, "loss": 0.5314, "step": 25788 }, { "epoch": 0.7529415199556218, "grad_norm": 0.5494358217093409, "learning_rate": 1.3725871857258718e-05, "loss": 0.6246, "step": 25789 }, { "epoch": 0.7529707161834691, "grad_norm": 0.5795723002498135, "learning_rate": 1.37242497972425e-05, "loss": 0.6421, "step": 25790 }, { "epoch": 0.7529999124113165, "grad_norm": 0.507584098648223, "learning_rate": 1.372262773722628e-05, "loss": 0.5358, "step": 25791 }, { "epoch": 0.7530291086391638, "grad_norm": 0.5582431628915957, "learning_rate": 1.3721005677210058e-05, "loss": 0.5656, "step": 25792 }, { "epoch": 0.7530583048670112, "grad_norm": 0.5158735041847091, "learning_rate": 1.3719383617193838e-05, "loss": 0.5494, "step": 25793 }, { "epoch": 0.7530875010948586, "grad_norm": 0.4955568323389384, "learning_rate": 1.3717761557177616e-05, "loss": 0.5474, "step": 25794 }, { "epoch": 0.7531166973227059, "grad_norm": 0.5223572229295359, "learning_rate": 1.3716139497161395e-05, "loss": 0.5733, "step": 25795 }, { "epoch": 0.7531458935505533, "grad_norm": 0.5176210783758731, "learning_rate": 1.3714517437145175e-05, "loss": 0.5678, "step": 25796 }, { "epoch": 0.7531750897784006, "grad_norm": 0.5269255499135309, "learning_rate": 1.3712895377128953e-05, "loss": 0.6079, "step": 25797 }, { "epoch": 0.753204286006248, "grad_norm": 0.5216665262799058, "learning_rate": 1.3711273317112733e-05, "loss": 0.5973, "step": 25798 }, { "epoch": 0.7532334822340954, "grad_norm": 0.4956697165520354, "learning_rate": 1.3709651257096512e-05, "loss": 0.5269, "step": 25799 }, { "epoch": 0.7532626784619427, "grad_norm": 0.5478806246028873, "learning_rate": 1.3708029197080293e-05, "loss": 0.611, "step": 25800 }, { "epoch": 0.7532918746897901, "grad_norm": 0.4909015998500961, "learning_rate": 1.3706407137064074e-05, "loss": 0.5423, "step": 25801 }, { "epoch": 0.7533210709176374, "grad_norm": 0.5463121972410667, "learning_rate": 1.3704785077047852e-05, "loss": 0.6409, "step": 25802 }, { "epoch": 0.7533502671454848, "grad_norm": 0.5096436450588034, "learning_rate": 1.370316301703163e-05, "loss": 0.5478, "step": 25803 }, { "epoch": 0.7533794633733322, "grad_norm": 0.5622322116342972, "learning_rate": 1.370154095701541e-05, "loss": 0.6486, "step": 25804 }, { "epoch": 0.7534086596011795, "grad_norm": 0.5647725070602763, "learning_rate": 1.3699918896999189e-05, "loss": 0.6415, "step": 25805 }, { "epoch": 0.7534378558290269, "grad_norm": 0.5635173722814213, "learning_rate": 1.3698296836982969e-05, "loss": 0.6528, "step": 25806 }, { "epoch": 0.7534670520568743, "grad_norm": 0.5338810465359761, "learning_rate": 1.3696674776966747e-05, "loss": 0.5907, "step": 25807 }, { "epoch": 0.7534962482847216, "grad_norm": 0.5120234278872594, "learning_rate": 1.3695052716950527e-05, "loss": 0.5707, "step": 25808 }, { "epoch": 0.753525444512569, "grad_norm": 0.5065249201092061, "learning_rate": 1.3693430656934309e-05, "loss": 0.5707, "step": 25809 }, { "epoch": 0.7535546407404163, "grad_norm": 0.5485630259455784, "learning_rate": 1.3691808596918087e-05, "loss": 0.6346, "step": 25810 }, { "epoch": 0.7535838369682637, "grad_norm": 0.5856170911751087, "learning_rate": 1.3690186536901867e-05, "loss": 0.6189, "step": 25811 }, { "epoch": 0.7536130331961111, "grad_norm": 0.49543789888346446, "learning_rate": 1.3688564476885646e-05, "loss": 0.5631, "step": 25812 }, { "epoch": 0.7536422294239584, "grad_norm": 0.5103818760823362, "learning_rate": 1.3686942416869424e-05, "loss": 0.5762, "step": 25813 }, { "epoch": 0.7536714256518058, "grad_norm": 0.5372438313304585, "learning_rate": 1.3685320356853204e-05, "loss": 0.6069, "step": 25814 }, { "epoch": 0.7537006218796531, "grad_norm": 0.4813521000103708, "learning_rate": 1.3683698296836983e-05, "loss": 0.5143, "step": 25815 }, { "epoch": 0.7537298181075005, "grad_norm": 0.49646193136204475, "learning_rate": 1.3682076236820763e-05, "loss": 0.5532, "step": 25816 }, { "epoch": 0.7537590143353479, "grad_norm": 0.5034159612366325, "learning_rate": 1.3680454176804541e-05, "loss": 0.537, "step": 25817 }, { "epoch": 0.7537882105631952, "grad_norm": 0.4683235537690803, "learning_rate": 1.3678832116788321e-05, "loss": 0.4758, "step": 25818 }, { "epoch": 0.7538174067910426, "grad_norm": 0.5653281528164713, "learning_rate": 1.3677210056772103e-05, "loss": 0.6512, "step": 25819 }, { "epoch": 0.75384660301889, "grad_norm": 0.5217330512180517, "learning_rate": 1.3675587996755881e-05, "loss": 0.585, "step": 25820 }, { "epoch": 0.7538757992467373, "grad_norm": 0.49883504470731704, "learning_rate": 1.3673965936739661e-05, "loss": 0.5099, "step": 25821 }, { "epoch": 0.7539049954745847, "grad_norm": 0.5015446964604496, "learning_rate": 1.367234387672344e-05, "loss": 0.5523, "step": 25822 }, { "epoch": 0.753934191702432, "grad_norm": 0.5287484267700971, "learning_rate": 1.3670721816707218e-05, "loss": 0.5613, "step": 25823 }, { "epoch": 0.7539633879302794, "grad_norm": 0.49911302462832663, "learning_rate": 1.3669099756690998e-05, "loss": 0.5496, "step": 25824 }, { "epoch": 0.7539925841581268, "grad_norm": 0.5320473555562035, "learning_rate": 1.3667477696674777e-05, "loss": 0.5784, "step": 25825 }, { "epoch": 0.7540217803859741, "grad_norm": 0.5151069487289531, "learning_rate": 1.3665855636658557e-05, "loss": 0.557, "step": 25826 }, { "epoch": 0.7540509766138215, "grad_norm": 0.5464930590144594, "learning_rate": 1.3664233576642335e-05, "loss": 0.5962, "step": 25827 }, { "epoch": 0.7540801728416688, "grad_norm": 0.5227784116284437, "learning_rate": 1.3662611516626117e-05, "loss": 0.5784, "step": 25828 }, { "epoch": 0.7541093690695162, "grad_norm": 0.5258392555012021, "learning_rate": 1.3660989456609897e-05, "loss": 0.5749, "step": 25829 }, { "epoch": 0.7541385652973636, "grad_norm": 0.5584970362359925, "learning_rate": 1.3659367396593675e-05, "loss": 0.6286, "step": 25830 }, { "epoch": 0.7541677615252109, "grad_norm": 0.5143492920250772, "learning_rate": 1.3657745336577454e-05, "loss": 0.5801, "step": 25831 }, { "epoch": 0.7541969577530583, "grad_norm": 0.5116800376615147, "learning_rate": 1.3656123276561234e-05, "loss": 0.5659, "step": 25832 }, { "epoch": 0.7542261539809056, "grad_norm": 0.5168610280058512, "learning_rate": 1.3654501216545012e-05, "loss": 0.5806, "step": 25833 }, { "epoch": 0.754255350208753, "grad_norm": 0.536821465058919, "learning_rate": 1.3652879156528792e-05, "loss": 0.6399, "step": 25834 }, { "epoch": 0.7542845464366004, "grad_norm": 0.5527786573670981, "learning_rate": 1.365125709651257e-05, "loss": 0.6696, "step": 25835 }, { "epoch": 0.7543137426644477, "grad_norm": 0.5485016126400302, "learning_rate": 1.364963503649635e-05, "loss": 0.642, "step": 25836 }, { "epoch": 0.7543429388922951, "grad_norm": 0.46206581560030935, "learning_rate": 1.3648012976480132e-05, "loss": 0.4725, "step": 25837 }, { "epoch": 0.7543721351201425, "grad_norm": 0.5005604960728275, "learning_rate": 1.364639091646391e-05, "loss": 0.581, "step": 25838 }, { "epoch": 0.7544013313479898, "grad_norm": 0.5212632438083278, "learning_rate": 1.364476885644769e-05, "loss": 0.5929, "step": 25839 }, { "epoch": 0.7544305275758372, "grad_norm": 0.5466924163544521, "learning_rate": 1.3643146796431469e-05, "loss": 0.5803, "step": 25840 }, { "epoch": 0.7544597238036845, "grad_norm": 0.5338789280400981, "learning_rate": 1.3641524736415247e-05, "loss": 0.6136, "step": 25841 }, { "epoch": 0.7544889200315319, "grad_norm": 0.5396358068163122, "learning_rate": 1.3639902676399027e-05, "loss": 0.6156, "step": 25842 }, { "epoch": 0.7545181162593793, "grad_norm": 0.539316958426977, "learning_rate": 1.3638280616382806e-05, "loss": 0.6284, "step": 25843 }, { "epoch": 0.7545473124872266, "grad_norm": 0.5307733747976391, "learning_rate": 1.3636658556366586e-05, "loss": 0.6101, "step": 25844 }, { "epoch": 0.754576508715074, "grad_norm": 0.5171417445660879, "learning_rate": 1.3635036496350364e-05, "loss": 0.4864, "step": 25845 }, { "epoch": 0.7546057049429213, "grad_norm": 0.5312293107857496, "learning_rate": 1.3633414436334144e-05, "loss": 0.6002, "step": 25846 }, { "epoch": 0.7546349011707687, "grad_norm": 0.4940775856526878, "learning_rate": 1.3631792376317926e-05, "loss": 0.5471, "step": 25847 }, { "epoch": 0.7546640973986161, "grad_norm": 0.49283586860254436, "learning_rate": 1.3630170316301705e-05, "loss": 0.5248, "step": 25848 }, { "epoch": 0.7546932936264634, "grad_norm": 0.514809605769032, "learning_rate": 1.3628548256285485e-05, "loss": 0.5516, "step": 25849 }, { "epoch": 0.7547224898543108, "grad_norm": 0.4852095491146599, "learning_rate": 1.3626926196269263e-05, "loss": 0.5279, "step": 25850 }, { "epoch": 0.7547516860821581, "grad_norm": 0.5535322523355899, "learning_rate": 1.3625304136253041e-05, "loss": 0.6235, "step": 25851 }, { "epoch": 0.7547808823100055, "grad_norm": 0.50904757461812, "learning_rate": 1.3623682076236821e-05, "loss": 0.535, "step": 25852 }, { "epoch": 0.7548100785378529, "grad_norm": 0.5329693559573374, "learning_rate": 1.36220600162206e-05, "loss": 0.5903, "step": 25853 }, { "epoch": 0.7548392747657002, "grad_norm": 0.48956639948163366, "learning_rate": 1.362043795620438e-05, "loss": 0.5798, "step": 25854 }, { "epoch": 0.7548684709935476, "grad_norm": 0.5342715473385544, "learning_rate": 1.3618815896188158e-05, "loss": 0.6395, "step": 25855 }, { "epoch": 0.754897667221395, "grad_norm": 0.5201621737572453, "learning_rate": 1.361719383617194e-05, "loss": 0.5824, "step": 25856 }, { "epoch": 0.7549268634492423, "grad_norm": 0.5599001027429946, "learning_rate": 1.361557177615572e-05, "loss": 0.7187, "step": 25857 }, { "epoch": 0.7549560596770897, "grad_norm": 0.505161135747041, "learning_rate": 1.3613949716139498e-05, "loss": 0.5227, "step": 25858 }, { "epoch": 0.754985255904937, "grad_norm": 0.5584346439540616, "learning_rate": 1.3612327656123278e-05, "loss": 0.6392, "step": 25859 }, { "epoch": 0.7550144521327844, "grad_norm": 0.5361500151624684, "learning_rate": 1.3610705596107057e-05, "loss": 0.6092, "step": 25860 }, { "epoch": 0.7550436483606318, "grad_norm": 0.5696549656802344, "learning_rate": 1.3609083536090835e-05, "loss": 0.6523, "step": 25861 }, { "epoch": 0.7550728445884791, "grad_norm": 0.5386360040767194, "learning_rate": 1.3607461476074615e-05, "loss": 0.6384, "step": 25862 }, { "epoch": 0.7551020408163265, "grad_norm": 0.4886299876430167, "learning_rate": 1.3605839416058394e-05, "loss": 0.5133, "step": 25863 }, { "epoch": 0.7551312370441738, "grad_norm": 0.4985054862731733, "learning_rate": 1.3604217356042174e-05, "loss": 0.5333, "step": 25864 }, { "epoch": 0.7551604332720212, "grad_norm": 0.5395220056358532, "learning_rate": 1.3602595296025952e-05, "loss": 0.598, "step": 25865 }, { "epoch": 0.7551896294998686, "grad_norm": 0.5306741622059233, "learning_rate": 1.3600973236009734e-05, "loss": 0.5779, "step": 25866 }, { "epoch": 0.7552188257277159, "grad_norm": 0.5396625576306878, "learning_rate": 1.3599351175993514e-05, "loss": 0.6476, "step": 25867 }, { "epoch": 0.7552480219555633, "grad_norm": 0.5117403137609025, "learning_rate": 1.3597729115977292e-05, "loss": 0.5725, "step": 25868 }, { "epoch": 0.7552772181834106, "grad_norm": 0.5155858281636263, "learning_rate": 1.359610705596107e-05, "loss": 0.5716, "step": 25869 }, { "epoch": 0.7553064144112581, "grad_norm": 0.482291634550095, "learning_rate": 1.359448499594485e-05, "loss": 0.5069, "step": 25870 }, { "epoch": 0.7553356106391055, "grad_norm": 0.5335399676938485, "learning_rate": 1.3592862935928629e-05, "loss": 0.6125, "step": 25871 }, { "epoch": 0.7553648068669528, "grad_norm": 0.5158282575316822, "learning_rate": 1.359124087591241e-05, "loss": 0.5999, "step": 25872 }, { "epoch": 0.7553940030948002, "grad_norm": 0.49650485981413117, "learning_rate": 1.3589618815896188e-05, "loss": 0.5126, "step": 25873 }, { "epoch": 0.7554231993226476, "grad_norm": 0.5001170705601717, "learning_rate": 1.3587996755879968e-05, "loss": 0.5126, "step": 25874 }, { "epoch": 0.7554523955504949, "grad_norm": 0.5263106451378529, "learning_rate": 1.358637469586375e-05, "loss": 0.6101, "step": 25875 }, { "epoch": 0.7554815917783423, "grad_norm": 0.5198521585754851, "learning_rate": 1.3584752635847528e-05, "loss": 0.5862, "step": 25876 }, { "epoch": 0.7555107880061896, "grad_norm": 0.5011481540976308, "learning_rate": 1.3583130575831308e-05, "loss": 0.562, "step": 25877 }, { "epoch": 0.755539984234037, "grad_norm": 0.5412809674578819, "learning_rate": 1.3581508515815086e-05, "loss": 0.5745, "step": 25878 }, { "epoch": 0.7555691804618844, "grad_norm": 0.7570102520169266, "learning_rate": 1.3579886455798865e-05, "loss": 0.6203, "step": 25879 }, { "epoch": 0.7555983766897317, "grad_norm": 0.5050647533039326, "learning_rate": 1.3578264395782645e-05, "loss": 0.5557, "step": 25880 }, { "epoch": 0.7556275729175791, "grad_norm": 0.5381549523510345, "learning_rate": 1.3576642335766423e-05, "loss": 0.5778, "step": 25881 }, { "epoch": 0.7556567691454265, "grad_norm": 0.47207760835839996, "learning_rate": 1.3575020275750203e-05, "loss": 0.5009, "step": 25882 }, { "epoch": 0.7556859653732738, "grad_norm": 0.5818451784989064, "learning_rate": 1.3573398215733981e-05, "loss": 0.5775, "step": 25883 }, { "epoch": 0.7557151616011212, "grad_norm": 0.5160761052271909, "learning_rate": 1.3571776155717762e-05, "loss": 0.5788, "step": 25884 }, { "epoch": 0.7557443578289685, "grad_norm": 0.5556393364196046, "learning_rate": 1.3570154095701543e-05, "loss": 0.6107, "step": 25885 }, { "epoch": 0.7557735540568159, "grad_norm": 0.5068110044156244, "learning_rate": 1.3568532035685322e-05, "loss": 0.5531, "step": 25886 }, { "epoch": 0.7558027502846633, "grad_norm": 0.5033333372410648, "learning_rate": 1.3566909975669102e-05, "loss": 0.5607, "step": 25887 }, { "epoch": 0.7558319465125106, "grad_norm": 0.5481279914630928, "learning_rate": 1.356528791565288e-05, "loss": 0.6292, "step": 25888 }, { "epoch": 0.755861142740358, "grad_norm": 0.5297443119252229, "learning_rate": 1.3563665855636659e-05, "loss": 0.6235, "step": 25889 }, { "epoch": 0.7558903389682053, "grad_norm": 0.4996449867447422, "learning_rate": 1.3562043795620439e-05, "loss": 0.5351, "step": 25890 }, { "epoch": 0.7559195351960527, "grad_norm": 0.529931268466673, "learning_rate": 1.3560421735604217e-05, "loss": 0.5799, "step": 25891 }, { "epoch": 0.7559487314239001, "grad_norm": 0.4952143919864478, "learning_rate": 1.3558799675587997e-05, "loss": 0.5365, "step": 25892 }, { "epoch": 0.7559779276517474, "grad_norm": 0.48018136941819567, "learning_rate": 1.3557177615571775e-05, "loss": 0.5148, "step": 25893 }, { "epoch": 0.7560071238795948, "grad_norm": 0.5430183219819213, "learning_rate": 1.3555555555555557e-05, "loss": 0.5944, "step": 25894 }, { "epoch": 0.7560363201074422, "grad_norm": 0.5630466070340414, "learning_rate": 1.3553933495539337e-05, "loss": 0.6223, "step": 25895 }, { "epoch": 0.7560655163352895, "grad_norm": 0.54092172784737, "learning_rate": 1.3552311435523116e-05, "loss": 0.6291, "step": 25896 }, { "epoch": 0.7560947125631369, "grad_norm": 0.5101945127140954, "learning_rate": 1.3550689375506894e-05, "loss": 0.5621, "step": 25897 }, { "epoch": 0.7561239087909842, "grad_norm": 0.542927451730123, "learning_rate": 1.3549067315490674e-05, "loss": 0.6548, "step": 25898 }, { "epoch": 0.7561531050188316, "grad_norm": 0.5051693306604657, "learning_rate": 1.3547445255474452e-05, "loss": 0.4992, "step": 25899 }, { "epoch": 0.756182301246679, "grad_norm": 0.5234666958883462, "learning_rate": 1.3545823195458232e-05, "loss": 0.6054, "step": 25900 }, { "epoch": 0.7562114974745263, "grad_norm": 0.5164782684017688, "learning_rate": 1.3544201135442011e-05, "loss": 0.5669, "step": 25901 }, { "epoch": 0.7562406937023737, "grad_norm": 0.5419286342175949, "learning_rate": 1.3542579075425791e-05, "loss": 0.5688, "step": 25902 }, { "epoch": 0.756269889930221, "grad_norm": 0.48507896242405635, "learning_rate": 1.3540957015409573e-05, "loss": 0.5124, "step": 25903 }, { "epoch": 0.7562990861580684, "grad_norm": 0.5149374525447499, "learning_rate": 1.3539334955393351e-05, "loss": 0.597, "step": 25904 }, { "epoch": 0.7563282823859158, "grad_norm": 0.5594193410470468, "learning_rate": 1.3537712895377131e-05, "loss": 0.6548, "step": 25905 }, { "epoch": 0.7563574786137631, "grad_norm": 0.537938139942027, "learning_rate": 1.353609083536091e-05, "loss": 0.5916, "step": 25906 }, { "epoch": 0.7563866748416105, "grad_norm": 0.5520927274244857, "learning_rate": 1.3534468775344688e-05, "loss": 0.615, "step": 25907 }, { "epoch": 0.7564158710694578, "grad_norm": 0.5412504101799026, "learning_rate": 1.3532846715328468e-05, "loss": 0.6293, "step": 25908 }, { "epoch": 0.7564450672973052, "grad_norm": 0.4980953741933767, "learning_rate": 1.3531224655312246e-05, "loss": 0.5416, "step": 25909 }, { "epoch": 0.7564742635251526, "grad_norm": 0.49470819497845436, "learning_rate": 1.3529602595296026e-05, "loss": 0.5674, "step": 25910 }, { "epoch": 0.7565034597529999, "grad_norm": 0.5187613043640511, "learning_rate": 1.3527980535279805e-05, "loss": 0.6114, "step": 25911 }, { "epoch": 0.7565326559808473, "grad_norm": 0.5146236273154338, "learning_rate": 1.3526358475263585e-05, "loss": 0.5624, "step": 25912 }, { "epoch": 0.7565618522086947, "grad_norm": 0.5461460289444241, "learning_rate": 1.3524736415247367e-05, "loss": 0.6207, "step": 25913 }, { "epoch": 0.756591048436542, "grad_norm": 0.5535320705189993, "learning_rate": 1.3523114355231145e-05, "loss": 0.6164, "step": 25914 }, { "epoch": 0.7566202446643894, "grad_norm": 0.5480336978852016, "learning_rate": 1.3521492295214925e-05, "loss": 0.6769, "step": 25915 }, { "epoch": 0.7566494408922367, "grad_norm": 0.515215324038012, "learning_rate": 1.3519870235198703e-05, "loss": 0.54, "step": 25916 }, { "epoch": 0.7566786371200841, "grad_norm": 0.5199004766852863, "learning_rate": 1.3518248175182482e-05, "loss": 0.5969, "step": 25917 }, { "epoch": 0.7567078333479315, "grad_norm": 0.4959092333249236, "learning_rate": 1.3516626115166262e-05, "loss": 0.5399, "step": 25918 }, { "epoch": 0.7567370295757788, "grad_norm": 0.544903338609418, "learning_rate": 1.351500405515004e-05, "loss": 0.665, "step": 25919 }, { "epoch": 0.7567662258036262, "grad_norm": 0.5048177968438894, "learning_rate": 1.351338199513382e-05, "loss": 0.5659, "step": 25920 }, { "epoch": 0.7567954220314735, "grad_norm": 0.5047161681590262, "learning_rate": 1.3511759935117599e-05, "loss": 0.5534, "step": 25921 }, { "epoch": 0.7568246182593209, "grad_norm": 0.46456074060207136, "learning_rate": 1.351013787510138e-05, "loss": 0.4668, "step": 25922 }, { "epoch": 0.7568538144871683, "grad_norm": 0.5130111775866584, "learning_rate": 1.350851581508516e-05, "loss": 0.5932, "step": 25923 }, { "epoch": 0.7568830107150156, "grad_norm": 0.5726767008085634, "learning_rate": 1.3506893755068939e-05, "loss": 0.6652, "step": 25924 }, { "epoch": 0.756912206942863, "grad_norm": 0.5111611508553645, "learning_rate": 1.3505271695052717e-05, "loss": 0.5532, "step": 25925 }, { "epoch": 0.7569414031707103, "grad_norm": 0.4914875217154345, "learning_rate": 1.3503649635036497e-05, "loss": 0.538, "step": 25926 }, { "epoch": 0.7569705993985577, "grad_norm": 0.4833415869891818, "learning_rate": 1.3502027575020276e-05, "loss": 0.5083, "step": 25927 }, { "epoch": 0.7569997956264051, "grad_norm": 0.5095199040021888, "learning_rate": 1.3500405515004056e-05, "loss": 0.5926, "step": 25928 }, { "epoch": 0.7570289918542524, "grad_norm": 0.5315921123324849, "learning_rate": 1.3498783454987834e-05, "loss": 0.6012, "step": 25929 }, { "epoch": 0.7570581880820998, "grad_norm": 0.5361165549312399, "learning_rate": 1.3497161394971614e-05, "loss": 0.6251, "step": 25930 }, { "epoch": 0.7570873843099472, "grad_norm": 0.53647462010806, "learning_rate": 1.3495539334955393e-05, "loss": 0.6287, "step": 25931 }, { "epoch": 0.7571165805377945, "grad_norm": 0.4850668651503801, "learning_rate": 1.3493917274939174e-05, "loss": 0.4933, "step": 25932 }, { "epoch": 0.7571457767656419, "grad_norm": 0.48059340219747293, "learning_rate": 1.3492295214922954e-05, "loss": 0.5029, "step": 25933 }, { "epoch": 0.7571749729934892, "grad_norm": 0.5371806169061162, "learning_rate": 1.3490673154906733e-05, "loss": 0.6291, "step": 25934 }, { "epoch": 0.7572041692213366, "grad_norm": 0.49743581899034506, "learning_rate": 1.3489051094890511e-05, "loss": 0.5595, "step": 25935 }, { "epoch": 0.757233365449184, "grad_norm": 0.524304000421771, "learning_rate": 1.3487429034874291e-05, "loss": 0.5883, "step": 25936 }, { "epoch": 0.7572625616770313, "grad_norm": 0.5773153140770974, "learning_rate": 1.348580697485807e-05, "loss": 0.6456, "step": 25937 }, { "epoch": 0.7572917579048787, "grad_norm": 0.4954996983521992, "learning_rate": 1.348418491484185e-05, "loss": 0.5115, "step": 25938 }, { "epoch": 0.757320954132726, "grad_norm": 0.5231362941222568, "learning_rate": 1.3482562854825628e-05, "loss": 0.6091, "step": 25939 }, { "epoch": 0.7573501503605734, "grad_norm": 0.49228347393652827, "learning_rate": 1.3480940794809408e-05, "loss": 0.5302, "step": 25940 }, { "epoch": 0.7573793465884208, "grad_norm": 0.5159754367343629, "learning_rate": 1.347931873479319e-05, "loss": 0.6096, "step": 25941 }, { "epoch": 0.7574085428162681, "grad_norm": 0.4776996885493574, "learning_rate": 1.3477696674776968e-05, "loss": 0.5016, "step": 25942 }, { "epoch": 0.7574377390441155, "grad_norm": 0.5219940418897023, "learning_rate": 1.3476074614760748e-05, "loss": 0.5714, "step": 25943 }, { "epoch": 0.7574669352719628, "grad_norm": 0.5753678736851344, "learning_rate": 1.3474452554744527e-05, "loss": 0.629, "step": 25944 }, { "epoch": 0.7574961314998102, "grad_norm": 0.5251229054124289, "learning_rate": 1.3472830494728305e-05, "loss": 0.6067, "step": 25945 }, { "epoch": 0.7575253277276576, "grad_norm": 0.517281419216185, "learning_rate": 1.3471208434712085e-05, "loss": 0.5396, "step": 25946 }, { "epoch": 0.7575545239555049, "grad_norm": 0.5609697562716212, "learning_rate": 1.3469586374695863e-05, "loss": 0.6396, "step": 25947 }, { "epoch": 0.7575837201833523, "grad_norm": 0.5974374898521343, "learning_rate": 1.3467964314679644e-05, "loss": 0.6198, "step": 25948 }, { "epoch": 0.7576129164111997, "grad_norm": 0.504030823481522, "learning_rate": 1.3466342254663422e-05, "loss": 0.5741, "step": 25949 }, { "epoch": 0.757642112639047, "grad_norm": 0.5191306697868764, "learning_rate": 1.3464720194647202e-05, "loss": 0.5607, "step": 25950 }, { "epoch": 0.7576713088668944, "grad_norm": 0.5071507466935636, "learning_rate": 1.3463098134630984e-05, "loss": 0.5641, "step": 25951 }, { "epoch": 0.7577005050947417, "grad_norm": 0.4706800252459722, "learning_rate": 1.3461476074614762e-05, "loss": 0.5263, "step": 25952 }, { "epoch": 0.7577297013225891, "grad_norm": 0.5114587900190333, "learning_rate": 1.345985401459854e-05, "loss": 0.5425, "step": 25953 }, { "epoch": 0.7577588975504365, "grad_norm": 0.5198740947125934, "learning_rate": 1.345823195458232e-05, "loss": 0.5918, "step": 25954 }, { "epoch": 0.7577880937782838, "grad_norm": 0.5389606457243219, "learning_rate": 1.3456609894566099e-05, "loss": 0.6515, "step": 25955 }, { "epoch": 0.7578172900061312, "grad_norm": 0.570340570668549, "learning_rate": 1.3454987834549879e-05, "loss": 0.6212, "step": 25956 }, { "epoch": 0.7578464862339785, "grad_norm": 0.5112955115094243, "learning_rate": 1.3453365774533657e-05, "loss": 0.6085, "step": 25957 }, { "epoch": 0.7578756824618259, "grad_norm": 0.525610990879849, "learning_rate": 1.3451743714517437e-05, "loss": 0.5777, "step": 25958 }, { "epoch": 0.7579048786896733, "grad_norm": 0.5197585762303487, "learning_rate": 1.3450121654501216e-05, "loss": 0.6093, "step": 25959 }, { "epoch": 0.7579340749175206, "grad_norm": 0.5774316269301437, "learning_rate": 1.3448499594484998e-05, "loss": 0.6972, "step": 25960 }, { "epoch": 0.757963271145368, "grad_norm": 0.5222435995685072, "learning_rate": 1.3446877534468778e-05, "loss": 0.5889, "step": 25961 }, { "epoch": 0.7579924673732154, "grad_norm": 0.5251672447572661, "learning_rate": 1.3445255474452556e-05, "loss": 0.5931, "step": 25962 }, { "epoch": 0.7580216636010627, "grad_norm": 0.5338212988517431, "learning_rate": 1.3443633414436334e-05, "loss": 0.6352, "step": 25963 }, { "epoch": 0.7580508598289101, "grad_norm": 0.474590397777602, "learning_rate": 1.3442011354420114e-05, "loss": 0.5184, "step": 25964 }, { "epoch": 0.7580800560567574, "grad_norm": 0.5118024786423289, "learning_rate": 1.3440389294403893e-05, "loss": 0.5268, "step": 25965 }, { "epoch": 0.7581092522846048, "grad_norm": 0.5592681830407048, "learning_rate": 1.3438767234387673e-05, "loss": 0.598, "step": 25966 }, { "epoch": 0.7581384485124522, "grad_norm": 0.5399145886175067, "learning_rate": 1.3437145174371451e-05, "loss": 0.5622, "step": 25967 }, { "epoch": 0.7581676447402995, "grad_norm": 0.5213953436074609, "learning_rate": 1.3435523114355231e-05, "loss": 0.61, "step": 25968 }, { "epoch": 0.7581968409681469, "grad_norm": 0.530205612533283, "learning_rate": 1.343390105433901e-05, "loss": 0.622, "step": 25969 }, { "epoch": 0.7582260371959942, "grad_norm": 0.4955286144360435, "learning_rate": 1.3432278994322791e-05, "loss": 0.5261, "step": 25970 }, { "epoch": 0.7582552334238416, "grad_norm": 0.5465795751513505, "learning_rate": 1.3430656934306572e-05, "loss": 0.5895, "step": 25971 }, { "epoch": 0.758284429651689, "grad_norm": 0.5428041508340604, "learning_rate": 1.342903487429035e-05, "loss": 0.6214, "step": 25972 }, { "epoch": 0.7583136258795363, "grad_norm": 0.49640808069778053, "learning_rate": 1.3427412814274128e-05, "loss": 0.5077, "step": 25973 }, { "epoch": 0.7583428221073837, "grad_norm": 0.5562667591373348, "learning_rate": 1.3425790754257908e-05, "loss": 0.6506, "step": 25974 }, { "epoch": 0.758372018335231, "grad_norm": 0.5641307391281285, "learning_rate": 1.3424168694241687e-05, "loss": 0.6811, "step": 25975 }, { "epoch": 0.7584012145630784, "grad_norm": 0.5092406336935262, "learning_rate": 1.3422546634225467e-05, "loss": 0.5419, "step": 25976 }, { "epoch": 0.7584304107909258, "grad_norm": 0.548430987296926, "learning_rate": 1.3420924574209245e-05, "loss": 0.5917, "step": 25977 }, { "epoch": 0.7584596070187731, "grad_norm": 0.543408216546877, "learning_rate": 1.3419302514193025e-05, "loss": 0.6129, "step": 25978 }, { "epoch": 0.7584888032466205, "grad_norm": 0.5217735882682282, "learning_rate": 1.3417680454176807e-05, "loss": 0.634, "step": 25979 }, { "epoch": 0.7585179994744679, "grad_norm": 0.5212489492918461, "learning_rate": 1.3416058394160585e-05, "loss": 0.5851, "step": 25980 }, { "epoch": 0.7585471957023152, "grad_norm": 0.5581144400936626, "learning_rate": 1.3414436334144365e-05, "loss": 0.5515, "step": 25981 }, { "epoch": 0.7585763919301626, "grad_norm": 0.49923082944551533, "learning_rate": 1.3412814274128144e-05, "loss": 0.5478, "step": 25982 }, { "epoch": 0.7586055881580099, "grad_norm": 0.49951113511162665, "learning_rate": 1.3411192214111922e-05, "loss": 0.5677, "step": 25983 }, { "epoch": 0.7586347843858573, "grad_norm": 0.5370619077632753, "learning_rate": 1.3409570154095702e-05, "loss": 0.5622, "step": 25984 }, { "epoch": 0.7586639806137047, "grad_norm": 0.5302736418626817, "learning_rate": 1.340794809407948e-05, "loss": 0.5696, "step": 25985 }, { "epoch": 0.758693176841552, "grad_norm": 0.5450577445300988, "learning_rate": 1.340632603406326e-05, "loss": 0.6277, "step": 25986 }, { "epoch": 0.7587223730693994, "grad_norm": 0.49296340088131435, "learning_rate": 1.3404703974047039e-05, "loss": 0.5153, "step": 25987 }, { "epoch": 0.7587515692972467, "grad_norm": 0.5208445874445705, "learning_rate": 1.3403081914030821e-05, "loss": 0.6096, "step": 25988 }, { "epoch": 0.7587807655250941, "grad_norm": 0.5262443810814316, "learning_rate": 1.3401459854014601e-05, "loss": 0.5591, "step": 25989 }, { "epoch": 0.7588099617529416, "grad_norm": 0.5296526710349313, "learning_rate": 1.339983779399838e-05, "loss": 0.5956, "step": 25990 }, { "epoch": 0.7588391579807889, "grad_norm": 0.5101843209386494, "learning_rate": 1.3398215733982158e-05, "loss": 0.5832, "step": 25991 }, { "epoch": 0.7588683542086363, "grad_norm": 0.5102961559615573, "learning_rate": 1.3396593673965938e-05, "loss": 0.5836, "step": 25992 }, { "epoch": 0.7588975504364837, "grad_norm": 0.5481585364927056, "learning_rate": 1.3394971613949716e-05, "loss": 0.6262, "step": 25993 }, { "epoch": 0.758926746664331, "grad_norm": 0.5662974494155039, "learning_rate": 1.3393349553933496e-05, "loss": 0.7071, "step": 25994 }, { "epoch": 0.7589559428921784, "grad_norm": 0.5077171451296335, "learning_rate": 1.3391727493917275e-05, "loss": 0.58, "step": 25995 }, { "epoch": 0.7589851391200257, "grad_norm": 0.5404531804291629, "learning_rate": 1.3390105433901055e-05, "loss": 0.6822, "step": 25996 }, { "epoch": 0.7590143353478731, "grad_norm": 0.5248434276284764, "learning_rate": 1.3388483373884833e-05, "loss": 0.5625, "step": 25997 }, { "epoch": 0.7590435315757205, "grad_norm": 0.5225595935425544, "learning_rate": 1.3386861313868615e-05, "loss": 0.572, "step": 25998 }, { "epoch": 0.7590727278035678, "grad_norm": 0.4966476511919443, "learning_rate": 1.3385239253852395e-05, "loss": 0.5434, "step": 25999 }, { "epoch": 0.7591019240314152, "grad_norm": 0.48878798489711783, "learning_rate": 1.3383617193836173e-05, "loss": 0.5321, "step": 26000 }, { "epoch": 0.7591311202592625, "grad_norm": 0.5177914319125367, "learning_rate": 1.3381995133819952e-05, "loss": 0.6284, "step": 26001 }, { "epoch": 0.7591603164871099, "grad_norm": 0.5210207313406173, "learning_rate": 1.3380373073803732e-05, "loss": 0.5776, "step": 26002 }, { "epoch": 0.7591895127149573, "grad_norm": 0.5637621971393313, "learning_rate": 1.337875101378751e-05, "loss": 0.6628, "step": 26003 }, { "epoch": 0.7592187089428046, "grad_norm": 0.4824943758861139, "learning_rate": 1.337712895377129e-05, "loss": 0.5011, "step": 26004 }, { "epoch": 0.759247905170652, "grad_norm": 0.5157022497672571, "learning_rate": 1.3375506893755068e-05, "loss": 0.5653, "step": 26005 }, { "epoch": 0.7592771013984994, "grad_norm": 0.5698134977337247, "learning_rate": 1.3373884833738849e-05, "loss": 0.698, "step": 26006 }, { "epoch": 0.7593062976263467, "grad_norm": 0.5183551384269102, "learning_rate": 1.337226277372263e-05, "loss": 0.5673, "step": 26007 }, { "epoch": 0.7593354938541941, "grad_norm": 0.5040684570930942, "learning_rate": 1.3370640713706409e-05, "loss": 0.5762, "step": 26008 }, { "epoch": 0.7593646900820414, "grad_norm": 0.49369296724601824, "learning_rate": 1.3369018653690189e-05, "loss": 0.4841, "step": 26009 }, { "epoch": 0.7593938863098888, "grad_norm": 0.5441477492711474, "learning_rate": 1.3367396593673967e-05, "loss": 0.6096, "step": 26010 }, { "epoch": 0.7594230825377362, "grad_norm": 0.5410575307918625, "learning_rate": 1.3365774533657745e-05, "loss": 0.5911, "step": 26011 }, { "epoch": 0.7594522787655835, "grad_norm": 0.5280184275210442, "learning_rate": 1.3364152473641526e-05, "loss": 0.6013, "step": 26012 }, { "epoch": 0.7594814749934309, "grad_norm": 0.5672496300633953, "learning_rate": 1.3362530413625304e-05, "loss": 0.683, "step": 26013 }, { "epoch": 0.7595106712212782, "grad_norm": 0.46274562791669455, "learning_rate": 1.3360908353609084e-05, "loss": 0.4921, "step": 26014 }, { "epoch": 0.7595398674491256, "grad_norm": 0.5160872563663121, "learning_rate": 1.3359286293592862e-05, "loss": 0.5599, "step": 26015 }, { "epoch": 0.759569063676973, "grad_norm": 0.538914548381773, "learning_rate": 1.3357664233576642e-05, "loss": 0.6006, "step": 26016 }, { "epoch": 0.7595982599048203, "grad_norm": 0.48283665984362, "learning_rate": 1.3356042173560424e-05, "loss": 0.5411, "step": 26017 }, { "epoch": 0.7596274561326677, "grad_norm": 0.5303580075212883, "learning_rate": 1.3354420113544203e-05, "loss": 0.6185, "step": 26018 }, { "epoch": 0.759656652360515, "grad_norm": 0.531519577591478, "learning_rate": 1.3352798053527981e-05, "loss": 0.601, "step": 26019 }, { "epoch": 0.7596858485883624, "grad_norm": 0.5105852539909489, "learning_rate": 1.3351175993511761e-05, "loss": 0.5604, "step": 26020 }, { "epoch": 0.7597150448162098, "grad_norm": 0.5304705034010672, "learning_rate": 1.334955393349554e-05, "loss": 0.6181, "step": 26021 }, { "epoch": 0.7597442410440571, "grad_norm": 0.5090777162593192, "learning_rate": 1.334793187347932e-05, "loss": 0.5615, "step": 26022 }, { "epoch": 0.7597734372719045, "grad_norm": 0.5614186849257485, "learning_rate": 1.3346309813463098e-05, "loss": 0.6254, "step": 26023 }, { "epoch": 0.7598026334997519, "grad_norm": 0.49521866051706076, "learning_rate": 1.3344687753446878e-05, "loss": 0.5487, "step": 26024 }, { "epoch": 0.7598318297275992, "grad_norm": 0.5604384730311337, "learning_rate": 1.3343065693430656e-05, "loss": 0.6766, "step": 26025 }, { "epoch": 0.7598610259554466, "grad_norm": 0.51344037872639, "learning_rate": 1.3341443633414438e-05, "loss": 0.5673, "step": 26026 }, { "epoch": 0.7598902221832939, "grad_norm": 0.5510351064833726, "learning_rate": 1.3339821573398218e-05, "loss": 0.6215, "step": 26027 }, { "epoch": 0.7599194184111413, "grad_norm": 0.4822107027465395, "learning_rate": 1.3338199513381996e-05, "loss": 0.5161, "step": 26028 }, { "epoch": 0.7599486146389887, "grad_norm": 0.5088534632013046, "learning_rate": 1.3336577453365775e-05, "loss": 0.5562, "step": 26029 }, { "epoch": 0.759977810866836, "grad_norm": 0.5320094019732877, "learning_rate": 1.3334955393349555e-05, "loss": 0.5928, "step": 26030 }, { "epoch": 0.7600070070946834, "grad_norm": 0.5259505973174741, "learning_rate": 1.3333333333333333e-05, "loss": 0.6028, "step": 26031 }, { "epoch": 0.7600362033225307, "grad_norm": 0.4920733069612813, "learning_rate": 1.3331711273317113e-05, "loss": 0.541, "step": 26032 }, { "epoch": 0.7600653995503781, "grad_norm": 0.538770769998691, "learning_rate": 1.3330089213300892e-05, "loss": 0.5963, "step": 26033 }, { "epoch": 0.7600945957782255, "grad_norm": 0.5707938055589106, "learning_rate": 1.3328467153284672e-05, "loss": 0.6481, "step": 26034 }, { "epoch": 0.7601237920060728, "grad_norm": 0.5528076336430459, "learning_rate": 1.332684509326845e-05, "loss": 0.6262, "step": 26035 }, { "epoch": 0.7601529882339202, "grad_norm": 0.5375024888897386, "learning_rate": 1.3325223033252232e-05, "loss": 0.6368, "step": 26036 }, { "epoch": 0.7601821844617676, "grad_norm": 0.4895104523007653, "learning_rate": 1.3323600973236012e-05, "loss": 0.502, "step": 26037 }, { "epoch": 0.7602113806896149, "grad_norm": 0.4990743429008064, "learning_rate": 1.332197891321979e-05, "loss": 0.571, "step": 26038 }, { "epoch": 0.7602405769174623, "grad_norm": 0.52770492468075, "learning_rate": 1.3320356853203569e-05, "loss": 0.5957, "step": 26039 }, { "epoch": 0.7602697731453096, "grad_norm": 0.4579199479827121, "learning_rate": 1.3318734793187349e-05, "loss": 0.482, "step": 26040 }, { "epoch": 0.760298969373157, "grad_norm": 0.5350934883702131, "learning_rate": 1.3317112733171127e-05, "loss": 0.6014, "step": 26041 }, { "epoch": 0.7603281656010044, "grad_norm": 0.5216554711311164, "learning_rate": 1.3315490673154907e-05, "loss": 0.5504, "step": 26042 }, { "epoch": 0.7603573618288517, "grad_norm": 0.5317121817058609, "learning_rate": 1.3313868613138686e-05, "loss": 0.5757, "step": 26043 }, { "epoch": 0.7603865580566991, "grad_norm": 0.5222710649252972, "learning_rate": 1.3312246553122466e-05, "loss": 0.5589, "step": 26044 }, { "epoch": 0.7604157542845464, "grad_norm": 0.5531776952464582, "learning_rate": 1.3310624493106247e-05, "loss": 0.6606, "step": 26045 }, { "epoch": 0.7604449505123938, "grad_norm": 0.510500958994522, "learning_rate": 1.3309002433090026e-05, "loss": 0.5519, "step": 26046 }, { "epoch": 0.7604741467402412, "grad_norm": 0.5143173226537852, "learning_rate": 1.3307380373073804e-05, "loss": 0.5668, "step": 26047 }, { "epoch": 0.7605033429680885, "grad_norm": 0.490564099420607, "learning_rate": 1.3305758313057584e-05, "loss": 0.5393, "step": 26048 }, { "epoch": 0.7605325391959359, "grad_norm": 0.5326014010823703, "learning_rate": 1.3304136253041363e-05, "loss": 0.622, "step": 26049 }, { "epoch": 0.7605617354237832, "grad_norm": 0.5113118161486602, "learning_rate": 1.3302514193025143e-05, "loss": 0.5575, "step": 26050 }, { "epoch": 0.7605909316516306, "grad_norm": 0.5381240652621109, "learning_rate": 1.3300892133008921e-05, "loss": 0.5873, "step": 26051 }, { "epoch": 0.760620127879478, "grad_norm": 0.5001191909346536, "learning_rate": 1.3299270072992701e-05, "loss": 0.5443, "step": 26052 }, { "epoch": 0.7606493241073253, "grad_norm": 0.5490938860542051, "learning_rate": 1.329764801297648e-05, "loss": 0.6237, "step": 26053 }, { "epoch": 0.7606785203351727, "grad_norm": 0.49867158139550516, "learning_rate": 1.3296025952960261e-05, "loss": 0.5473, "step": 26054 }, { "epoch": 0.76070771656302, "grad_norm": 0.5137708752546436, "learning_rate": 1.3294403892944041e-05, "loss": 0.5676, "step": 26055 }, { "epoch": 0.7607369127908674, "grad_norm": 0.5263604920028404, "learning_rate": 1.329278183292782e-05, "loss": 0.621, "step": 26056 }, { "epoch": 0.7607661090187148, "grad_norm": 0.531868013625503, "learning_rate": 1.3291159772911598e-05, "loss": 0.611, "step": 26057 }, { "epoch": 0.7607953052465621, "grad_norm": 0.518062399097399, "learning_rate": 1.3289537712895378e-05, "loss": 0.5421, "step": 26058 }, { "epoch": 0.7608245014744095, "grad_norm": 0.48425427690276596, "learning_rate": 1.3287915652879157e-05, "loss": 0.5343, "step": 26059 }, { "epoch": 0.7608536977022569, "grad_norm": 0.501526727323138, "learning_rate": 1.3286293592862937e-05, "loss": 0.5831, "step": 26060 }, { "epoch": 0.7608828939301042, "grad_norm": 0.49389176808799823, "learning_rate": 1.3284671532846715e-05, "loss": 0.5164, "step": 26061 }, { "epoch": 0.7609120901579516, "grad_norm": 0.5173505732879019, "learning_rate": 1.3283049472830495e-05, "loss": 0.576, "step": 26062 }, { "epoch": 0.7609412863857989, "grad_norm": 0.5508065462530508, "learning_rate": 1.3281427412814273e-05, "loss": 0.674, "step": 26063 }, { "epoch": 0.7609704826136463, "grad_norm": 0.5556012237171928, "learning_rate": 1.3279805352798055e-05, "loss": 0.6348, "step": 26064 }, { "epoch": 0.7609996788414937, "grad_norm": 0.5611685549880845, "learning_rate": 1.3278183292781835e-05, "loss": 0.589, "step": 26065 }, { "epoch": 0.761028875069341, "grad_norm": 0.5600026162436251, "learning_rate": 1.3276561232765614e-05, "loss": 0.6052, "step": 26066 }, { "epoch": 0.7610580712971884, "grad_norm": 0.4980366576750089, "learning_rate": 1.3274939172749392e-05, "loss": 0.5338, "step": 26067 }, { "epoch": 0.7610872675250357, "grad_norm": 0.5062892030551094, "learning_rate": 1.3273317112733172e-05, "loss": 0.5835, "step": 26068 }, { "epoch": 0.7611164637528831, "grad_norm": 0.5137036452234232, "learning_rate": 1.327169505271695e-05, "loss": 0.5983, "step": 26069 }, { "epoch": 0.7611456599807305, "grad_norm": 0.5414459974125596, "learning_rate": 1.327007299270073e-05, "loss": 0.6119, "step": 26070 }, { "epoch": 0.7611748562085778, "grad_norm": 0.5892123142739061, "learning_rate": 1.3268450932684509e-05, "loss": 0.5819, "step": 26071 }, { "epoch": 0.7612040524364252, "grad_norm": 0.5078895322025658, "learning_rate": 1.3266828872668289e-05, "loss": 0.5443, "step": 26072 }, { "epoch": 0.7612332486642726, "grad_norm": 0.49505371697550443, "learning_rate": 1.326520681265207e-05, "loss": 0.5346, "step": 26073 }, { "epoch": 0.7612624448921199, "grad_norm": 0.5309074804235842, "learning_rate": 1.3263584752635849e-05, "loss": 0.6605, "step": 26074 }, { "epoch": 0.7612916411199673, "grad_norm": 0.5204703067082173, "learning_rate": 1.3261962692619627e-05, "loss": 0.5768, "step": 26075 }, { "epoch": 0.7613208373478146, "grad_norm": 0.5373281337151, "learning_rate": 1.3260340632603408e-05, "loss": 0.6283, "step": 26076 }, { "epoch": 0.761350033575662, "grad_norm": 0.5540081628731073, "learning_rate": 1.3258718572587186e-05, "loss": 0.6277, "step": 26077 }, { "epoch": 0.7613792298035094, "grad_norm": 0.47934575428990855, "learning_rate": 1.3257096512570966e-05, "loss": 0.4989, "step": 26078 }, { "epoch": 0.7614084260313567, "grad_norm": 0.5504497589776518, "learning_rate": 1.3255474452554744e-05, "loss": 0.6097, "step": 26079 }, { "epoch": 0.7614376222592041, "grad_norm": 0.5421247329323209, "learning_rate": 1.3253852392538524e-05, "loss": 0.5923, "step": 26080 }, { "epoch": 0.7614668184870514, "grad_norm": 0.5119123892554348, "learning_rate": 1.3252230332522303e-05, "loss": 0.5523, "step": 26081 }, { "epoch": 0.7614960147148988, "grad_norm": 0.4968589924721524, "learning_rate": 1.3250608272506083e-05, "loss": 0.5419, "step": 26082 }, { "epoch": 0.7615252109427462, "grad_norm": 0.5479481049764436, "learning_rate": 1.3248986212489865e-05, "loss": 0.6238, "step": 26083 }, { "epoch": 0.7615544071705935, "grad_norm": 0.5170893110629988, "learning_rate": 1.3247364152473643e-05, "loss": 0.5401, "step": 26084 }, { "epoch": 0.7615836033984409, "grad_norm": 0.5502988257127741, "learning_rate": 1.3245742092457421e-05, "loss": 0.6448, "step": 26085 }, { "epoch": 0.7616127996262883, "grad_norm": 0.5453215407763922, "learning_rate": 1.3244120032441201e-05, "loss": 0.6054, "step": 26086 }, { "epoch": 0.7616419958541356, "grad_norm": 0.517292949649086, "learning_rate": 1.324249797242498e-05, "loss": 0.5772, "step": 26087 }, { "epoch": 0.761671192081983, "grad_norm": 0.5382147971525868, "learning_rate": 1.324087591240876e-05, "loss": 0.6265, "step": 26088 }, { "epoch": 0.7617003883098303, "grad_norm": 0.5102301432380488, "learning_rate": 1.3239253852392538e-05, "loss": 0.5619, "step": 26089 }, { "epoch": 0.7617295845376777, "grad_norm": 0.5496494751360833, "learning_rate": 1.3237631792376318e-05, "loss": 0.6291, "step": 26090 }, { "epoch": 0.7617587807655251, "grad_norm": 0.5220257150480063, "learning_rate": 1.3236009732360097e-05, "loss": 0.5909, "step": 26091 }, { "epoch": 0.7617879769933724, "grad_norm": 0.5071971227810609, "learning_rate": 1.3234387672343878e-05, "loss": 0.536, "step": 26092 }, { "epoch": 0.7618171732212198, "grad_norm": 0.5185640426262971, "learning_rate": 1.3232765612327659e-05, "loss": 0.5383, "step": 26093 }, { "epoch": 0.7618463694490671, "grad_norm": 0.5125439850444153, "learning_rate": 1.3231143552311437e-05, "loss": 0.5914, "step": 26094 }, { "epoch": 0.7618755656769145, "grad_norm": 0.4887021429246377, "learning_rate": 1.3229521492295215e-05, "loss": 0.5521, "step": 26095 }, { "epoch": 0.7619047619047619, "grad_norm": 0.5051809411771067, "learning_rate": 1.3227899432278995e-05, "loss": 0.481, "step": 26096 }, { "epoch": 0.7619339581326092, "grad_norm": 0.5252655468043694, "learning_rate": 1.3226277372262774e-05, "loss": 0.6134, "step": 26097 }, { "epoch": 0.7619631543604566, "grad_norm": 0.5721637516585717, "learning_rate": 1.3224655312246554e-05, "loss": 0.6343, "step": 26098 }, { "epoch": 0.761992350588304, "grad_norm": 0.5150223558036573, "learning_rate": 1.3223033252230332e-05, "loss": 0.5446, "step": 26099 }, { "epoch": 0.7620215468161513, "grad_norm": 0.591181807958455, "learning_rate": 1.3221411192214112e-05, "loss": 0.6826, "step": 26100 }, { "epoch": 0.7620507430439987, "grad_norm": 0.46993620259599367, "learning_rate": 1.321978913219789e-05, "loss": 0.4918, "step": 26101 }, { "epoch": 0.762079939271846, "grad_norm": 0.5084023343776873, "learning_rate": 1.3218167072181672e-05, "loss": 0.5626, "step": 26102 }, { "epoch": 0.7621091354996934, "grad_norm": 0.4950669992827323, "learning_rate": 1.321654501216545e-05, "loss": 0.5261, "step": 26103 }, { "epoch": 0.7621383317275408, "grad_norm": 0.48717230784620613, "learning_rate": 1.321492295214923e-05, "loss": 0.5335, "step": 26104 }, { "epoch": 0.7621675279553881, "grad_norm": 0.48285538577297976, "learning_rate": 1.321330089213301e-05, "loss": 0.5013, "step": 26105 }, { "epoch": 0.7621967241832355, "grad_norm": 0.5260133709969139, "learning_rate": 1.321167883211679e-05, "loss": 0.5979, "step": 26106 }, { "epoch": 0.7622259204110828, "grad_norm": 0.5349329263627207, "learning_rate": 1.3210056772100568e-05, "loss": 0.605, "step": 26107 }, { "epoch": 0.7622551166389302, "grad_norm": 0.5516537301484099, "learning_rate": 1.3208434712084348e-05, "loss": 0.6254, "step": 26108 }, { "epoch": 0.7622843128667776, "grad_norm": 0.5083786462604599, "learning_rate": 1.3206812652068126e-05, "loss": 0.5521, "step": 26109 }, { "epoch": 0.7623135090946249, "grad_norm": 0.50973855723688, "learning_rate": 1.3205190592051906e-05, "loss": 0.5951, "step": 26110 }, { "epoch": 0.7623427053224724, "grad_norm": 0.5172734049915245, "learning_rate": 1.3203568532035688e-05, "loss": 0.5836, "step": 26111 }, { "epoch": 0.7623719015503198, "grad_norm": 0.5427585155282674, "learning_rate": 1.3201946472019466e-05, "loss": 0.6202, "step": 26112 }, { "epoch": 0.7624010977781671, "grad_norm": 0.4750783967040919, "learning_rate": 1.3200324412003245e-05, "loss": 0.5166, "step": 26113 }, { "epoch": 0.7624302940060145, "grad_norm": 0.5435096313665364, "learning_rate": 1.3198702351987025e-05, "loss": 0.6185, "step": 26114 }, { "epoch": 0.7624594902338618, "grad_norm": 0.467681928023457, "learning_rate": 1.3197080291970803e-05, "loss": 0.5015, "step": 26115 }, { "epoch": 0.7624886864617092, "grad_norm": 0.5332538903876802, "learning_rate": 1.3195458231954583e-05, "loss": 0.5778, "step": 26116 }, { "epoch": 0.7625178826895566, "grad_norm": 0.5323600260869179, "learning_rate": 1.3193836171938362e-05, "loss": 0.5572, "step": 26117 }, { "epoch": 0.7625470789174039, "grad_norm": 0.5289509376250257, "learning_rate": 1.3192214111922142e-05, "loss": 0.6039, "step": 26118 }, { "epoch": 0.7625762751452513, "grad_norm": 0.5131132081680964, "learning_rate": 1.319059205190592e-05, "loss": 0.5398, "step": 26119 }, { "epoch": 0.7626054713730986, "grad_norm": 0.48364429357515654, "learning_rate": 1.3188969991889698e-05, "loss": 0.4916, "step": 26120 }, { "epoch": 0.762634667600946, "grad_norm": 0.5414118746710446, "learning_rate": 1.3187347931873482e-05, "loss": 0.6292, "step": 26121 }, { "epoch": 0.7626638638287934, "grad_norm": 0.5216995692636319, "learning_rate": 1.318572587185726e-05, "loss": 0.5744, "step": 26122 }, { "epoch": 0.7626930600566407, "grad_norm": 0.4980468892079646, "learning_rate": 1.3184103811841039e-05, "loss": 0.57, "step": 26123 }, { "epoch": 0.7627222562844881, "grad_norm": 0.5351631223946032, "learning_rate": 1.3182481751824819e-05, "loss": 0.6284, "step": 26124 }, { "epoch": 0.7627514525123354, "grad_norm": 0.5348373023632151, "learning_rate": 1.3180859691808597e-05, "loss": 0.5892, "step": 26125 }, { "epoch": 0.7627806487401828, "grad_norm": 0.5148707314392373, "learning_rate": 1.3179237631792377e-05, "loss": 0.5713, "step": 26126 }, { "epoch": 0.7628098449680302, "grad_norm": 0.5271636000369059, "learning_rate": 1.3177615571776155e-05, "loss": 0.5715, "step": 26127 }, { "epoch": 0.7628390411958775, "grad_norm": 0.5229754688104131, "learning_rate": 1.3175993511759936e-05, "loss": 0.6, "step": 26128 }, { "epoch": 0.7628682374237249, "grad_norm": 0.4941177237484301, "learning_rate": 1.3174371451743714e-05, "loss": 0.5356, "step": 26129 }, { "epoch": 0.7628974336515723, "grad_norm": 0.5217933893636744, "learning_rate": 1.3172749391727496e-05, "loss": 0.5864, "step": 26130 }, { "epoch": 0.7629266298794196, "grad_norm": 0.49701135355011294, "learning_rate": 1.3171127331711276e-05, "loss": 0.5436, "step": 26131 }, { "epoch": 0.762955826107267, "grad_norm": 0.5428025038007479, "learning_rate": 1.3169505271695054e-05, "loss": 0.6273, "step": 26132 }, { "epoch": 0.7629850223351143, "grad_norm": 0.5115215010778561, "learning_rate": 1.3167883211678832e-05, "loss": 0.6041, "step": 26133 }, { "epoch": 0.7630142185629617, "grad_norm": 0.5287335038446894, "learning_rate": 1.3166261151662613e-05, "loss": 0.5616, "step": 26134 }, { "epoch": 0.7630434147908091, "grad_norm": 0.5097956361996143, "learning_rate": 1.3164639091646391e-05, "loss": 0.5938, "step": 26135 }, { "epoch": 0.7630726110186564, "grad_norm": 0.5255778288597512, "learning_rate": 1.3163017031630171e-05, "loss": 0.5573, "step": 26136 }, { "epoch": 0.7631018072465038, "grad_norm": 0.5156134671925051, "learning_rate": 1.316139497161395e-05, "loss": 0.5857, "step": 26137 }, { "epoch": 0.7631310034743511, "grad_norm": 0.5019146672074472, "learning_rate": 1.315977291159773e-05, "loss": 0.5574, "step": 26138 }, { "epoch": 0.7631601997021985, "grad_norm": 0.5042314685145936, "learning_rate": 1.3158150851581511e-05, "loss": 0.5268, "step": 26139 }, { "epoch": 0.7631893959300459, "grad_norm": 0.5561350913948012, "learning_rate": 1.315652879156529e-05, "loss": 0.6962, "step": 26140 }, { "epoch": 0.7632185921578932, "grad_norm": 0.5213078200176618, "learning_rate": 1.3154906731549068e-05, "loss": 0.5776, "step": 26141 }, { "epoch": 0.7632477883857406, "grad_norm": 0.48689521419197107, "learning_rate": 1.3153284671532848e-05, "loss": 0.5109, "step": 26142 }, { "epoch": 0.763276984613588, "grad_norm": 0.4679522849232103, "learning_rate": 1.3151662611516626e-05, "loss": 0.5071, "step": 26143 }, { "epoch": 0.7633061808414353, "grad_norm": 0.5211536365811025, "learning_rate": 1.3150040551500406e-05, "loss": 0.5771, "step": 26144 }, { "epoch": 0.7633353770692827, "grad_norm": 0.5370317004874001, "learning_rate": 1.3148418491484185e-05, "loss": 0.6171, "step": 26145 }, { "epoch": 0.76336457329713, "grad_norm": 0.5024454711576356, "learning_rate": 1.3146796431467965e-05, "loss": 0.5691, "step": 26146 }, { "epoch": 0.7633937695249774, "grad_norm": 0.5017221270410124, "learning_rate": 1.3145174371451743e-05, "loss": 0.5433, "step": 26147 }, { "epoch": 0.7634229657528248, "grad_norm": 0.531808252860733, "learning_rate": 1.3143552311435522e-05, "loss": 0.6399, "step": 26148 }, { "epoch": 0.7634521619806721, "grad_norm": 0.5201639097381158, "learning_rate": 1.3141930251419305e-05, "loss": 0.5592, "step": 26149 }, { "epoch": 0.7634813582085195, "grad_norm": 0.5401841832468892, "learning_rate": 1.3140308191403083e-05, "loss": 0.642, "step": 26150 }, { "epoch": 0.7635105544363668, "grad_norm": 0.5180620887561526, "learning_rate": 1.3138686131386862e-05, "loss": 0.5602, "step": 26151 }, { "epoch": 0.7635397506642142, "grad_norm": 0.49765754557973707, "learning_rate": 1.3137064071370642e-05, "loss": 0.5009, "step": 26152 }, { "epoch": 0.7635689468920616, "grad_norm": 0.5254020861565853, "learning_rate": 1.313544201135442e-05, "loss": 0.6217, "step": 26153 }, { "epoch": 0.7635981431199089, "grad_norm": 0.5439056479268488, "learning_rate": 1.31338199513382e-05, "loss": 0.6025, "step": 26154 }, { "epoch": 0.7636273393477563, "grad_norm": 0.5178499986770855, "learning_rate": 1.3132197891321979e-05, "loss": 0.541, "step": 26155 }, { "epoch": 0.7636565355756036, "grad_norm": 0.5326010794143266, "learning_rate": 1.3130575831305759e-05, "loss": 0.5841, "step": 26156 }, { "epoch": 0.763685731803451, "grad_norm": 0.496103127640581, "learning_rate": 1.3128953771289537e-05, "loss": 0.5325, "step": 26157 }, { "epoch": 0.7637149280312984, "grad_norm": 0.526710321121201, "learning_rate": 1.3127331711273319e-05, "loss": 0.5668, "step": 26158 }, { "epoch": 0.7637441242591457, "grad_norm": 0.5128633642682789, "learning_rate": 1.3125709651257099e-05, "loss": 0.5343, "step": 26159 }, { "epoch": 0.7637733204869931, "grad_norm": 0.5277465402991878, "learning_rate": 1.3124087591240877e-05, "loss": 0.5741, "step": 26160 }, { "epoch": 0.7638025167148405, "grad_norm": 0.49260797299107173, "learning_rate": 1.3122465531224656e-05, "loss": 0.539, "step": 26161 }, { "epoch": 0.7638317129426878, "grad_norm": 0.5373036911010319, "learning_rate": 1.3120843471208436e-05, "loss": 0.5512, "step": 26162 }, { "epoch": 0.7638609091705352, "grad_norm": 0.5336960204089622, "learning_rate": 1.3119221411192214e-05, "loss": 0.5822, "step": 26163 }, { "epoch": 0.7638901053983825, "grad_norm": 0.5158810372359997, "learning_rate": 1.3117599351175994e-05, "loss": 0.5897, "step": 26164 }, { "epoch": 0.7639193016262299, "grad_norm": 0.5144659420571229, "learning_rate": 1.3115977291159773e-05, "loss": 0.5244, "step": 26165 }, { "epoch": 0.7639484978540773, "grad_norm": 0.5398092879044336, "learning_rate": 1.3114355231143553e-05, "loss": 0.6234, "step": 26166 }, { "epoch": 0.7639776940819246, "grad_norm": 0.537418727774196, "learning_rate": 1.3112733171127331e-05, "loss": 0.6277, "step": 26167 }, { "epoch": 0.764006890309772, "grad_norm": 0.5164742980988132, "learning_rate": 1.3111111111111113e-05, "loss": 0.5533, "step": 26168 }, { "epoch": 0.7640360865376193, "grad_norm": 0.5177315632070566, "learning_rate": 1.3109489051094891e-05, "loss": 0.5793, "step": 26169 }, { "epoch": 0.7640652827654667, "grad_norm": 0.5633859364124693, "learning_rate": 1.3107866991078671e-05, "loss": 0.6521, "step": 26170 }, { "epoch": 0.7640944789933141, "grad_norm": 0.5412215794815132, "learning_rate": 1.310624493106245e-05, "loss": 0.6055, "step": 26171 }, { "epoch": 0.7641236752211614, "grad_norm": 0.5084650353817133, "learning_rate": 1.310462287104623e-05, "loss": 0.5525, "step": 26172 }, { "epoch": 0.7641528714490088, "grad_norm": 0.5500037900084211, "learning_rate": 1.3103000811030008e-05, "loss": 0.6705, "step": 26173 }, { "epoch": 0.7641820676768561, "grad_norm": 0.5135734019383474, "learning_rate": 1.3101378751013788e-05, "loss": 0.5648, "step": 26174 }, { "epoch": 0.7642112639047035, "grad_norm": 0.5430185112514998, "learning_rate": 1.3099756690997567e-05, "loss": 0.6447, "step": 26175 }, { "epoch": 0.7642404601325509, "grad_norm": 0.5007484144010894, "learning_rate": 1.3098134630981347e-05, "loss": 0.5189, "step": 26176 }, { "epoch": 0.7642696563603982, "grad_norm": 0.47514564858396957, "learning_rate": 1.3096512570965128e-05, "loss": 0.5035, "step": 26177 }, { "epoch": 0.7642988525882456, "grad_norm": 0.5264888996558378, "learning_rate": 1.3094890510948907e-05, "loss": 0.6126, "step": 26178 }, { "epoch": 0.764328048816093, "grad_norm": 0.5113365800155691, "learning_rate": 1.3093268450932685e-05, "loss": 0.5379, "step": 26179 }, { "epoch": 0.7643572450439403, "grad_norm": 0.517993175387211, "learning_rate": 1.3091646390916465e-05, "loss": 0.5623, "step": 26180 }, { "epoch": 0.7643864412717877, "grad_norm": 0.5098333198060611, "learning_rate": 1.3090024330900244e-05, "loss": 0.5737, "step": 26181 }, { "epoch": 0.764415637499635, "grad_norm": 0.5539900548231895, "learning_rate": 1.3088402270884024e-05, "loss": 0.6639, "step": 26182 }, { "epoch": 0.7644448337274824, "grad_norm": 0.5183425812423353, "learning_rate": 1.3086780210867802e-05, "loss": 0.5856, "step": 26183 }, { "epoch": 0.7644740299553298, "grad_norm": 0.5022564797641604, "learning_rate": 1.3085158150851582e-05, "loss": 0.5242, "step": 26184 }, { "epoch": 0.7645032261831771, "grad_norm": 0.5383311575580453, "learning_rate": 1.308353609083536e-05, "loss": 0.616, "step": 26185 }, { "epoch": 0.7645324224110245, "grad_norm": 0.5815558830081435, "learning_rate": 1.3081914030819139e-05, "loss": 0.7014, "step": 26186 }, { "epoch": 0.7645616186388718, "grad_norm": 0.5178543238383524, "learning_rate": 1.3080291970802922e-05, "loss": 0.6133, "step": 26187 }, { "epoch": 0.7645908148667192, "grad_norm": 0.587269282054199, "learning_rate": 1.30786699107867e-05, "loss": 0.7068, "step": 26188 }, { "epoch": 0.7646200110945666, "grad_norm": 0.508326439360481, "learning_rate": 1.3077047850770479e-05, "loss": 0.5499, "step": 26189 }, { "epoch": 0.7646492073224139, "grad_norm": 0.5444934140090225, "learning_rate": 1.3075425790754259e-05, "loss": 0.6163, "step": 26190 }, { "epoch": 0.7646784035502613, "grad_norm": 0.5411094552047685, "learning_rate": 1.3073803730738037e-05, "loss": 0.645, "step": 26191 }, { "epoch": 0.7647075997781086, "grad_norm": 0.5281811024959837, "learning_rate": 1.3072181670721818e-05, "loss": 0.5923, "step": 26192 }, { "epoch": 0.764736796005956, "grad_norm": 0.5099310070157269, "learning_rate": 1.3070559610705596e-05, "loss": 0.5717, "step": 26193 }, { "epoch": 0.7647659922338034, "grad_norm": 0.5395277689382485, "learning_rate": 1.3068937550689376e-05, "loss": 0.6441, "step": 26194 }, { "epoch": 0.7647951884616507, "grad_norm": 0.5370062084158894, "learning_rate": 1.3067315490673154e-05, "loss": 0.6266, "step": 26195 }, { "epoch": 0.7648243846894981, "grad_norm": 0.5124186496769876, "learning_rate": 1.3065693430656936e-05, "loss": 0.5424, "step": 26196 }, { "epoch": 0.7648535809173455, "grad_norm": 0.5766067736662666, "learning_rate": 1.3064071370640714e-05, "loss": 0.6584, "step": 26197 }, { "epoch": 0.7648827771451928, "grad_norm": 0.5482262332852276, "learning_rate": 1.3062449310624495e-05, "loss": 0.5797, "step": 26198 }, { "epoch": 0.7649119733730402, "grad_norm": 0.5290440676857913, "learning_rate": 1.3060827250608273e-05, "loss": 0.611, "step": 26199 }, { "epoch": 0.7649411696008875, "grad_norm": 0.5190234040868258, "learning_rate": 1.3059205190592053e-05, "loss": 0.5724, "step": 26200 }, { "epoch": 0.7649703658287349, "grad_norm": 0.513447887521929, "learning_rate": 1.3057583130575831e-05, "loss": 0.5317, "step": 26201 }, { "epoch": 0.7649995620565823, "grad_norm": 0.5576597131254188, "learning_rate": 1.3055961070559611e-05, "loss": 0.5876, "step": 26202 }, { "epoch": 0.7650287582844296, "grad_norm": 0.560712396662689, "learning_rate": 1.305433901054339e-05, "loss": 0.6378, "step": 26203 }, { "epoch": 0.765057954512277, "grad_norm": 0.5417637957169142, "learning_rate": 1.305271695052717e-05, "loss": 0.6066, "step": 26204 }, { "epoch": 0.7650871507401243, "grad_norm": 0.5682392269436335, "learning_rate": 1.3051094890510952e-05, "loss": 0.6742, "step": 26205 }, { "epoch": 0.7651163469679717, "grad_norm": 0.5270896673929323, "learning_rate": 1.304947283049473e-05, "loss": 0.5806, "step": 26206 }, { "epoch": 0.7651455431958191, "grad_norm": 0.491778755383811, "learning_rate": 1.3047850770478508e-05, "loss": 0.5282, "step": 26207 }, { "epoch": 0.7651747394236664, "grad_norm": 0.5731003634993402, "learning_rate": 1.3046228710462288e-05, "loss": 0.6891, "step": 26208 }, { "epoch": 0.7652039356515138, "grad_norm": 0.5713800184183698, "learning_rate": 1.3044606650446067e-05, "loss": 0.6133, "step": 26209 }, { "epoch": 0.7652331318793612, "grad_norm": 0.5586160904790214, "learning_rate": 1.3042984590429847e-05, "loss": 0.6546, "step": 26210 }, { "epoch": 0.7652623281072085, "grad_norm": 0.4974159174515373, "learning_rate": 1.3041362530413625e-05, "loss": 0.544, "step": 26211 }, { "epoch": 0.7652915243350559, "grad_norm": 0.5330136664208465, "learning_rate": 1.3039740470397405e-05, "loss": 0.6203, "step": 26212 }, { "epoch": 0.7653207205629032, "grad_norm": 0.5845587416571091, "learning_rate": 1.3038118410381184e-05, "loss": 0.7073, "step": 26213 }, { "epoch": 0.7653499167907506, "grad_norm": 0.4778389152345451, "learning_rate": 1.3036496350364962e-05, "loss": 0.4835, "step": 26214 }, { "epoch": 0.765379113018598, "grad_norm": 0.528390542166911, "learning_rate": 1.3034874290348746e-05, "loss": 0.5878, "step": 26215 }, { "epoch": 0.7654083092464453, "grad_norm": 0.506879602444249, "learning_rate": 1.3033252230332524e-05, "loss": 0.4998, "step": 26216 }, { "epoch": 0.7654375054742927, "grad_norm": 0.5460634433146568, "learning_rate": 1.3031630170316302e-05, "loss": 0.6552, "step": 26217 }, { "epoch": 0.76546670170214, "grad_norm": 0.5365022580870893, "learning_rate": 1.3030008110300082e-05, "loss": 0.6002, "step": 26218 }, { "epoch": 0.7654958979299874, "grad_norm": 0.5011871151442168, "learning_rate": 1.302838605028386e-05, "loss": 0.5727, "step": 26219 }, { "epoch": 0.7655250941578348, "grad_norm": 0.5079372524405562, "learning_rate": 1.302676399026764e-05, "loss": 0.5763, "step": 26220 }, { "epoch": 0.7655542903856821, "grad_norm": 0.46828898060219976, "learning_rate": 1.302514193025142e-05, "loss": 0.4874, "step": 26221 }, { "epoch": 0.7655834866135295, "grad_norm": 0.5392638158023815, "learning_rate": 1.30235198702352e-05, "loss": 0.6419, "step": 26222 }, { "epoch": 0.7656126828413768, "grad_norm": 0.527202179699356, "learning_rate": 1.3021897810218978e-05, "loss": 0.6261, "step": 26223 }, { "epoch": 0.7656418790692242, "grad_norm": 0.5551875605368951, "learning_rate": 1.302027575020276e-05, "loss": 0.627, "step": 26224 }, { "epoch": 0.7656710752970716, "grad_norm": 0.4796930044279101, "learning_rate": 1.3018653690186538e-05, "loss": 0.495, "step": 26225 }, { "epoch": 0.7657002715249189, "grad_norm": 0.5244602852770206, "learning_rate": 1.3017031630170318e-05, "loss": 0.5936, "step": 26226 }, { "epoch": 0.7657294677527663, "grad_norm": 0.5515556776350345, "learning_rate": 1.3015409570154096e-05, "loss": 0.6495, "step": 26227 }, { "epoch": 0.7657586639806137, "grad_norm": 0.5548574445867984, "learning_rate": 1.3013787510137876e-05, "loss": 0.6981, "step": 26228 }, { "epoch": 0.765787860208461, "grad_norm": 0.49488514214052354, "learning_rate": 1.3012165450121655e-05, "loss": 0.5412, "step": 26229 }, { "epoch": 0.7658170564363084, "grad_norm": 0.5319808450946417, "learning_rate": 1.3010543390105435e-05, "loss": 0.6437, "step": 26230 }, { "epoch": 0.7658462526641558, "grad_norm": 0.5443180589902091, "learning_rate": 1.3008921330089213e-05, "loss": 0.6372, "step": 26231 }, { "epoch": 0.7658754488920032, "grad_norm": 0.5171826395056609, "learning_rate": 1.3007299270072993e-05, "loss": 0.5621, "step": 26232 }, { "epoch": 0.7659046451198506, "grad_norm": 0.5488854479583972, "learning_rate": 1.3005677210056772e-05, "loss": 0.6513, "step": 26233 }, { "epoch": 0.7659338413476979, "grad_norm": 0.5122684818399326, "learning_rate": 1.3004055150040553e-05, "loss": 0.5899, "step": 26234 }, { "epoch": 0.7659630375755453, "grad_norm": 0.5409755200494458, "learning_rate": 1.3002433090024332e-05, "loss": 0.6363, "step": 26235 }, { "epoch": 0.7659922338033927, "grad_norm": 0.5034159233248652, "learning_rate": 1.3000811030008112e-05, "loss": 0.5599, "step": 26236 }, { "epoch": 0.76602143003124, "grad_norm": 0.5581168283437473, "learning_rate": 1.299918896999189e-05, "loss": 0.6235, "step": 26237 }, { "epoch": 0.7660506262590874, "grad_norm": 0.5018257299071668, "learning_rate": 1.299756690997567e-05, "loss": 0.5751, "step": 26238 }, { "epoch": 0.7660798224869347, "grad_norm": 0.5357483556372561, "learning_rate": 1.2995944849959449e-05, "loss": 0.6206, "step": 26239 }, { "epoch": 0.7661090187147821, "grad_norm": 0.4879773055735714, "learning_rate": 1.2994322789943229e-05, "loss": 0.5125, "step": 26240 }, { "epoch": 0.7661382149426295, "grad_norm": 0.5160213240885957, "learning_rate": 1.2992700729927007e-05, "loss": 0.5899, "step": 26241 }, { "epoch": 0.7661674111704768, "grad_norm": 0.5200191001338588, "learning_rate": 1.2991078669910785e-05, "loss": 0.5777, "step": 26242 }, { "epoch": 0.7661966073983242, "grad_norm": 0.5096981399148022, "learning_rate": 1.2989456609894569e-05, "loss": 0.6069, "step": 26243 }, { "epoch": 0.7662258036261715, "grad_norm": 0.4947302099532044, "learning_rate": 1.2987834549878347e-05, "loss": 0.5252, "step": 26244 }, { "epoch": 0.7662549998540189, "grad_norm": 0.4859984179015247, "learning_rate": 1.2986212489862126e-05, "loss": 0.5221, "step": 26245 }, { "epoch": 0.7662841960818663, "grad_norm": 0.4911540402077459, "learning_rate": 1.2984590429845906e-05, "loss": 0.5336, "step": 26246 }, { "epoch": 0.7663133923097136, "grad_norm": 0.5357885854139811, "learning_rate": 1.2982968369829684e-05, "loss": 0.5932, "step": 26247 }, { "epoch": 0.766342588537561, "grad_norm": 0.5156220216509931, "learning_rate": 1.2981346309813464e-05, "loss": 0.6107, "step": 26248 }, { "epoch": 0.7663717847654083, "grad_norm": 0.5374394398113165, "learning_rate": 1.2979724249797242e-05, "loss": 0.6309, "step": 26249 }, { "epoch": 0.7664009809932557, "grad_norm": 0.5017412802543029, "learning_rate": 1.2978102189781022e-05, "loss": 0.4852, "step": 26250 }, { "epoch": 0.7664301772211031, "grad_norm": 0.5249565292811915, "learning_rate": 1.2976480129764801e-05, "loss": 0.6125, "step": 26251 }, { "epoch": 0.7664593734489504, "grad_norm": 0.5405608691380511, "learning_rate": 1.297485806974858e-05, "loss": 0.6099, "step": 26252 }, { "epoch": 0.7664885696767978, "grad_norm": 0.5185389705095388, "learning_rate": 1.2973236009732363e-05, "loss": 0.5738, "step": 26253 }, { "epoch": 0.7665177659046452, "grad_norm": 0.5569564485969294, "learning_rate": 1.2971613949716141e-05, "loss": 0.6296, "step": 26254 }, { "epoch": 0.7665469621324925, "grad_norm": 0.49759728597373226, "learning_rate": 1.296999188969992e-05, "loss": 0.5676, "step": 26255 }, { "epoch": 0.7665761583603399, "grad_norm": 0.5149565604077406, "learning_rate": 1.29683698296837e-05, "loss": 0.5615, "step": 26256 }, { "epoch": 0.7666053545881872, "grad_norm": 0.5209233420316494, "learning_rate": 1.2966747769667478e-05, "loss": 0.6034, "step": 26257 }, { "epoch": 0.7666345508160346, "grad_norm": 0.5267475347559107, "learning_rate": 1.2965125709651258e-05, "loss": 0.6079, "step": 26258 }, { "epoch": 0.766663747043882, "grad_norm": 0.529116793683219, "learning_rate": 1.2963503649635036e-05, "loss": 0.6138, "step": 26259 }, { "epoch": 0.7666929432717293, "grad_norm": 0.5302887766613267, "learning_rate": 1.2961881589618816e-05, "loss": 0.613, "step": 26260 }, { "epoch": 0.7667221394995767, "grad_norm": 0.5446202565706778, "learning_rate": 1.2960259529602595e-05, "loss": 0.6175, "step": 26261 }, { "epoch": 0.766751335727424, "grad_norm": 0.5290266721212794, "learning_rate": 1.2958637469586377e-05, "loss": 0.5827, "step": 26262 }, { "epoch": 0.7667805319552714, "grad_norm": 0.5486193334043881, "learning_rate": 1.2957015409570155e-05, "loss": 0.6221, "step": 26263 }, { "epoch": 0.7668097281831188, "grad_norm": 0.5451358527356938, "learning_rate": 1.2955393349553935e-05, "loss": 0.6438, "step": 26264 }, { "epoch": 0.7668389244109661, "grad_norm": 0.5329765696549315, "learning_rate": 1.2953771289537713e-05, "loss": 0.6084, "step": 26265 }, { "epoch": 0.7668681206388135, "grad_norm": 0.5423494434856424, "learning_rate": 1.2952149229521493e-05, "loss": 0.602, "step": 26266 }, { "epoch": 0.7668973168666608, "grad_norm": 0.5144376417162915, "learning_rate": 1.2950527169505272e-05, "loss": 0.6157, "step": 26267 }, { "epoch": 0.7669265130945082, "grad_norm": 0.49421841203565064, "learning_rate": 1.2948905109489052e-05, "loss": 0.5379, "step": 26268 }, { "epoch": 0.7669557093223556, "grad_norm": 0.4968763083830003, "learning_rate": 1.294728304947283e-05, "loss": 0.5694, "step": 26269 }, { "epoch": 0.7669849055502029, "grad_norm": 0.5162829739588828, "learning_rate": 1.2945660989456609e-05, "loss": 0.5902, "step": 26270 }, { "epoch": 0.7670141017780503, "grad_norm": 0.48194477393236845, "learning_rate": 1.2944038929440389e-05, "loss": 0.5266, "step": 26271 }, { "epoch": 0.7670432980058977, "grad_norm": 0.48898702034157293, "learning_rate": 1.294241686942417e-05, "loss": 0.5264, "step": 26272 }, { "epoch": 0.767072494233745, "grad_norm": 0.5235888602202942, "learning_rate": 1.2940794809407949e-05, "loss": 0.5914, "step": 26273 }, { "epoch": 0.7671016904615924, "grad_norm": 0.4825098724268049, "learning_rate": 1.2939172749391729e-05, "loss": 0.4988, "step": 26274 }, { "epoch": 0.7671308866894397, "grad_norm": 0.5542632667650061, "learning_rate": 1.2937550689375507e-05, "loss": 0.6158, "step": 26275 }, { "epoch": 0.7671600829172871, "grad_norm": 0.5191640724150055, "learning_rate": 1.2935928629359287e-05, "loss": 0.5585, "step": 26276 }, { "epoch": 0.7671892791451345, "grad_norm": 0.5786346471665231, "learning_rate": 1.2934306569343066e-05, "loss": 0.7379, "step": 26277 }, { "epoch": 0.7672184753729818, "grad_norm": 0.5049457826459871, "learning_rate": 1.2932684509326846e-05, "loss": 0.5798, "step": 26278 }, { "epoch": 0.7672476716008292, "grad_norm": 0.4923460512465151, "learning_rate": 1.2931062449310624e-05, "loss": 0.5515, "step": 26279 }, { "epoch": 0.7672768678286765, "grad_norm": 0.48858159158057984, "learning_rate": 1.2929440389294403e-05, "loss": 0.5355, "step": 26280 }, { "epoch": 0.7673060640565239, "grad_norm": 0.5080667784040753, "learning_rate": 1.2927818329278186e-05, "loss": 0.5766, "step": 26281 }, { "epoch": 0.7673352602843713, "grad_norm": 0.5242750674392773, "learning_rate": 1.2926196269261964e-05, "loss": 0.5972, "step": 26282 }, { "epoch": 0.7673644565122186, "grad_norm": 0.4937512008370672, "learning_rate": 1.2924574209245743e-05, "loss": 0.5413, "step": 26283 }, { "epoch": 0.767393652740066, "grad_norm": 0.5011289391987895, "learning_rate": 1.2922952149229523e-05, "loss": 0.5327, "step": 26284 }, { "epoch": 0.7674228489679134, "grad_norm": 0.5244545278600344, "learning_rate": 1.2921330089213301e-05, "loss": 0.5752, "step": 26285 }, { "epoch": 0.7674520451957607, "grad_norm": 0.5276954603688992, "learning_rate": 1.2919708029197081e-05, "loss": 0.5669, "step": 26286 }, { "epoch": 0.7674812414236081, "grad_norm": 0.5540631752672245, "learning_rate": 1.291808596918086e-05, "loss": 0.6154, "step": 26287 }, { "epoch": 0.7675104376514554, "grad_norm": 0.5737795562445847, "learning_rate": 1.291646390916464e-05, "loss": 0.6492, "step": 26288 }, { "epoch": 0.7675396338793028, "grad_norm": 0.5169127190809516, "learning_rate": 1.2914841849148418e-05, "loss": 0.5773, "step": 26289 }, { "epoch": 0.7675688301071502, "grad_norm": 0.49936928356060795, "learning_rate": 1.29132197891322e-05, "loss": 0.5387, "step": 26290 }, { "epoch": 0.7675980263349975, "grad_norm": 0.501227883146706, "learning_rate": 1.2911597729115978e-05, "loss": 0.5386, "step": 26291 }, { "epoch": 0.7676272225628449, "grad_norm": 0.5686624713063125, "learning_rate": 1.2909975669099758e-05, "loss": 0.6785, "step": 26292 }, { "epoch": 0.7676564187906922, "grad_norm": 0.5229301602552626, "learning_rate": 1.2908353609083537e-05, "loss": 0.5847, "step": 26293 }, { "epoch": 0.7676856150185396, "grad_norm": 0.5532021758086357, "learning_rate": 1.2906731549067317e-05, "loss": 0.6625, "step": 26294 }, { "epoch": 0.767714811246387, "grad_norm": 0.5329601606777445, "learning_rate": 1.2905109489051095e-05, "loss": 0.6354, "step": 26295 }, { "epoch": 0.7677440074742343, "grad_norm": 0.5408047236507197, "learning_rate": 1.2903487429034875e-05, "loss": 0.6264, "step": 26296 }, { "epoch": 0.7677732037020817, "grad_norm": 0.49135524881714143, "learning_rate": 1.2901865369018654e-05, "loss": 0.54, "step": 26297 }, { "epoch": 0.767802399929929, "grad_norm": 0.5273508567629412, "learning_rate": 1.2900243309002432e-05, "loss": 0.5909, "step": 26298 }, { "epoch": 0.7678315961577764, "grad_norm": 0.5480453782984006, "learning_rate": 1.2898621248986212e-05, "loss": 0.6566, "step": 26299 }, { "epoch": 0.7678607923856238, "grad_norm": 0.5079041779324788, "learning_rate": 1.2896999188969994e-05, "loss": 0.5754, "step": 26300 }, { "epoch": 0.7678899886134711, "grad_norm": 0.6217118245063328, "learning_rate": 1.2895377128953772e-05, "loss": 0.6852, "step": 26301 }, { "epoch": 0.7679191848413185, "grad_norm": 0.5435151003039238, "learning_rate": 1.2893755068937552e-05, "loss": 0.5701, "step": 26302 }, { "epoch": 0.7679483810691659, "grad_norm": 0.5567208281428286, "learning_rate": 1.289213300892133e-05, "loss": 0.6589, "step": 26303 }, { "epoch": 0.7679775772970132, "grad_norm": 0.5689841827172268, "learning_rate": 1.289051094890511e-05, "loss": 0.6835, "step": 26304 }, { "epoch": 0.7680067735248606, "grad_norm": 0.5007046833392005, "learning_rate": 1.2888888888888889e-05, "loss": 0.5665, "step": 26305 }, { "epoch": 0.7680359697527079, "grad_norm": 0.541226130027547, "learning_rate": 1.2887266828872669e-05, "loss": 0.6646, "step": 26306 }, { "epoch": 0.7680651659805553, "grad_norm": 0.4923121233789896, "learning_rate": 1.2885644768856447e-05, "loss": 0.5724, "step": 26307 }, { "epoch": 0.7680943622084027, "grad_norm": 0.5867444339498735, "learning_rate": 1.2884022708840226e-05, "loss": 0.7237, "step": 26308 }, { "epoch": 0.76812355843625, "grad_norm": 0.5156276435588525, "learning_rate": 1.288240064882401e-05, "loss": 0.5578, "step": 26309 }, { "epoch": 0.7681527546640974, "grad_norm": 0.517740364206779, "learning_rate": 1.2880778588807788e-05, "loss": 0.5914, "step": 26310 }, { "epoch": 0.7681819508919447, "grad_norm": 0.5049232611982919, "learning_rate": 1.2879156528791566e-05, "loss": 0.5296, "step": 26311 }, { "epoch": 0.7682111471197921, "grad_norm": 0.5118064462731211, "learning_rate": 1.2877534468775346e-05, "loss": 0.564, "step": 26312 }, { "epoch": 0.7682403433476395, "grad_norm": 0.5543498047044405, "learning_rate": 1.2875912408759124e-05, "loss": 0.6699, "step": 26313 }, { "epoch": 0.7682695395754868, "grad_norm": 0.5318944119736833, "learning_rate": 1.2874290348742904e-05, "loss": 0.5549, "step": 26314 }, { "epoch": 0.7682987358033342, "grad_norm": 0.51498773507136, "learning_rate": 1.2872668288726683e-05, "loss": 0.5529, "step": 26315 }, { "epoch": 0.7683279320311815, "grad_norm": 0.4841287678352486, "learning_rate": 1.2871046228710463e-05, "loss": 0.5215, "step": 26316 }, { "epoch": 0.7683571282590289, "grad_norm": 0.5186195139325872, "learning_rate": 1.2869424168694241e-05, "loss": 0.5917, "step": 26317 }, { "epoch": 0.7683863244868763, "grad_norm": 0.5468443896183256, "learning_rate": 1.286780210867802e-05, "loss": 0.6249, "step": 26318 }, { "epoch": 0.7684155207147236, "grad_norm": 0.4874345202217483, "learning_rate": 1.2866180048661801e-05, "loss": 0.5005, "step": 26319 }, { "epoch": 0.768444716942571, "grad_norm": 0.5103340784000882, "learning_rate": 1.2864557988645582e-05, "loss": 0.5895, "step": 26320 }, { "epoch": 0.7684739131704184, "grad_norm": 0.47630926365538057, "learning_rate": 1.286293592862936e-05, "loss": 0.5062, "step": 26321 }, { "epoch": 0.7685031093982657, "grad_norm": 0.5041449177398831, "learning_rate": 1.286131386861314e-05, "loss": 0.5393, "step": 26322 }, { "epoch": 0.7685323056261131, "grad_norm": 0.5463894000481656, "learning_rate": 1.2859691808596918e-05, "loss": 0.6021, "step": 26323 }, { "epoch": 0.7685615018539604, "grad_norm": 0.5300075491615643, "learning_rate": 1.2858069748580698e-05, "loss": 0.5678, "step": 26324 }, { "epoch": 0.7685906980818078, "grad_norm": 0.5227198357834695, "learning_rate": 1.2856447688564477e-05, "loss": 0.5944, "step": 26325 }, { "epoch": 0.7686198943096552, "grad_norm": 0.5272882937179109, "learning_rate": 1.2854825628548257e-05, "loss": 0.6186, "step": 26326 }, { "epoch": 0.7686490905375025, "grad_norm": 0.5158746102789851, "learning_rate": 1.2853203568532035e-05, "loss": 0.5584, "step": 26327 }, { "epoch": 0.7686782867653499, "grad_norm": 0.47989637483069447, "learning_rate": 1.2851581508515817e-05, "loss": 0.5054, "step": 26328 }, { "epoch": 0.7687074829931972, "grad_norm": 0.5188520101828007, "learning_rate": 1.2849959448499595e-05, "loss": 0.6012, "step": 26329 }, { "epoch": 0.7687366792210446, "grad_norm": 0.5583726510859897, "learning_rate": 1.2848337388483375e-05, "loss": 0.6078, "step": 26330 }, { "epoch": 0.768765875448892, "grad_norm": 0.5100008452588459, "learning_rate": 1.2846715328467154e-05, "loss": 0.5611, "step": 26331 }, { "epoch": 0.7687950716767393, "grad_norm": 0.5488556308567909, "learning_rate": 1.2845093268450934e-05, "loss": 0.626, "step": 26332 }, { "epoch": 0.7688242679045867, "grad_norm": 0.5252276767629177, "learning_rate": 1.2843471208434712e-05, "loss": 0.5865, "step": 26333 }, { "epoch": 0.768853464132434, "grad_norm": 0.5056518400704174, "learning_rate": 1.2841849148418492e-05, "loss": 0.5421, "step": 26334 }, { "epoch": 0.7688826603602814, "grad_norm": 0.5201568382908714, "learning_rate": 1.284022708840227e-05, "loss": 0.5619, "step": 26335 }, { "epoch": 0.7689118565881288, "grad_norm": 0.5031742309628927, "learning_rate": 1.2838605028386049e-05, "loss": 0.5614, "step": 26336 }, { "epoch": 0.7689410528159761, "grad_norm": 0.504908898241799, "learning_rate": 1.2836982968369829e-05, "loss": 0.5609, "step": 26337 }, { "epoch": 0.7689702490438235, "grad_norm": 0.5239761391518063, "learning_rate": 1.2835360908353611e-05, "loss": 0.5859, "step": 26338 }, { "epoch": 0.7689994452716709, "grad_norm": 0.4976667870322926, "learning_rate": 1.283373884833739e-05, "loss": 0.5366, "step": 26339 }, { "epoch": 0.7690286414995182, "grad_norm": 0.5583454312229662, "learning_rate": 1.283211678832117e-05, "loss": 0.6906, "step": 26340 }, { "epoch": 0.7690578377273656, "grad_norm": 0.5152458398219856, "learning_rate": 1.2830494728304948e-05, "loss": 0.5683, "step": 26341 }, { "epoch": 0.7690870339552129, "grad_norm": 0.5318186286267119, "learning_rate": 1.2828872668288728e-05, "loss": 0.6329, "step": 26342 }, { "epoch": 0.7691162301830603, "grad_norm": 0.5047001699118584, "learning_rate": 1.2827250608272506e-05, "loss": 0.5598, "step": 26343 }, { "epoch": 0.7691454264109077, "grad_norm": 0.5044943420109759, "learning_rate": 1.2825628548256286e-05, "loss": 0.5603, "step": 26344 }, { "epoch": 0.769174622638755, "grad_norm": 0.5414625018706501, "learning_rate": 1.2824006488240065e-05, "loss": 0.5981, "step": 26345 }, { "epoch": 0.7692038188666024, "grad_norm": 0.545476387761699, "learning_rate": 1.2822384428223843e-05, "loss": 0.6042, "step": 26346 }, { "epoch": 0.7692330150944497, "grad_norm": 0.5293870976295801, "learning_rate": 1.2820762368207625e-05, "loss": 0.6067, "step": 26347 }, { "epoch": 0.7692622113222971, "grad_norm": 0.5084608194700659, "learning_rate": 1.2819140308191405e-05, "loss": 0.5354, "step": 26348 }, { "epoch": 0.7692914075501445, "grad_norm": 0.5392057682250317, "learning_rate": 1.2817518248175183e-05, "loss": 0.6338, "step": 26349 }, { "epoch": 0.7693206037779918, "grad_norm": 0.5426319947275987, "learning_rate": 1.2815896188158963e-05, "loss": 0.6342, "step": 26350 }, { "epoch": 0.7693498000058392, "grad_norm": 0.518694970993585, "learning_rate": 1.2814274128142742e-05, "loss": 0.6224, "step": 26351 }, { "epoch": 0.7693789962336867, "grad_norm": 0.5275521288214529, "learning_rate": 1.2812652068126522e-05, "loss": 0.5866, "step": 26352 }, { "epoch": 0.769408192461534, "grad_norm": 0.5315685858539746, "learning_rate": 1.28110300081103e-05, "loss": 0.6333, "step": 26353 }, { "epoch": 0.7694373886893814, "grad_norm": 0.5021625261774971, "learning_rate": 1.280940794809408e-05, "loss": 0.5788, "step": 26354 }, { "epoch": 0.7694665849172287, "grad_norm": 0.6058254886299386, "learning_rate": 1.2807785888077858e-05, "loss": 0.6934, "step": 26355 }, { "epoch": 0.7694957811450761, "grad_norm": 0.5213069471650097, "learning_rate": 1.280616382806164e-05, "loss": 0.6041, "step": 26356 }, { "epoch": 0.7695249773729235, "grad_norm": 0.5236174830194623, "learning_rate": 1.2804541768045419e-05, "loss": 0.5951, "step": 26357 }, { "epoch": 0.7695541736007708, "grad_norm": 0.571967786331096, "learning_rate": 1.2802919708029199e-05, "loss": 0.6904, "step": 26358 }, { "epoch": 0.7695833698286182, "grad_norm": 0.5468342955572397, "learning_rate": 1.2801297648012977e-05, "loss": 0.6654, "step": 26359 }, { "epoch": 0.7696125660564656, "grad_norm": 0.556287545342831, "learning_rate": 1.2799675587996757e-05, "loss": 0.6722, "step": 26360 }, { "epoch": 0.7696417622843129, "grad_norm": 0.5191653406340924, "learning_rate": 1.2798053527980536e-05, "loss": 0.5987, "step": 26361 }, { "epoch": 0.7696709585121603, "grad_norm": 0.5270018579798271, "learning_rate": 1.2796431467964316e-05, "loss": 0.6081, "step": 26362 }, { "epoch": 0.7697001547400076, "grad_norm": 0.5258013819893559, "learning_rate": 1.2794809407948094e-05, "loss": 0.6236, "step": 26363 }, { "epoch": 0.769729350967855, "grad_norm": 0.5042442171655958, "learning_rate": 1.2793187347931872e-05, "loss": 0.517, "step": 26364 }, { "epoch": 0.7697585471957024, "grad_norm": 0.5370345134619897, "learning_rate": 1.2791565287915652e-05, "loss": 0.6259, "step": 26365 }, { "epoch": 0.7697877434235497, "grad_norm": 0.5552170035651157, "learning_rate": 1.2789943227899434e-05, "loss": 0.6605, "step": 26366 }, { "epoch": 0.7698169396513971, "grad_norm": 0.5505944314568201, "learning_rate": 1.2788321167883213e-05, "loss": 0.6227, "step": 26367 }, { "epoch": 0.7698461358792444, "grad_norm": 0.5091000359222545, "learning_rate": 1.2786699107866993e-05, "loss": 0.5813, "step": 26368 }, { "epoch": 0.7698753321070918, "grad_norm": 0.5007687584463182, "learning_rate": 1.2785077047850771e-05, "loss": 0.5521, "step": 26369 }, { "epoch": 0.7699045283349392, "grad_norm": 0.5293000984701024, "learning_rate": 1.2783454987834551e-05, "loss": 0.5885, "step": 26370 }, { "epoch": 0.7699337245627865, "grad_norm": 0.5304394213941926, "learning_rate": 1.278183292781833e-05, "loss": 0.5693, "step": 26371 }, { "epoch": 0.7699629207906339, "grad_norm": 0.5188582647128697, "learning_rate": 1.278021086780211e-05, "loss": 0.5724, "step": 26372 }, { "epoch": 0.7699921170184812, "grad_norm": 0.5477600243060052, "learning_rate": 1.2778588807785888e-05, "loss": 0.6594, "step": 26373 }, { "epoch": 0.7700213132463286, "grad_norm": 0.48592143442326974, "learning_rate": 1.2776966747769666e-05, "loss": 0.5205, "step": 26374 }, { "epoch": 0.770050509474176, "grad_norm": 0.4836705798002861, "learning_rate": 1.277534468775345e-05, "loss": 0.5229, "step": 26375 }, { "epoch": 0.7700797057020233, "grad_norm": 0.5609950040488303, "learning_rate": 1.2773722627737228e-05, "loss": 0.6915, "step": 26376 }, { "epoch": 0.7701089019298707, "grad_norm": 0.5218243550164318, "learning_rate": 1.2772100567721006e-05, "loss": 0.6565, "step": 26377 }, { "epoch": 0.770138098157718, "grad_norm": 0.5258552853399965, "learning_rate": 1.2770478507704786e-05, "loss": 0.6195, "step": 26378 }, { "epoch": 0.7701672943855654, "grad_norm": 0.7376555319710038, "learning_rate": 1.2768856447688565e-05, "loss": 0.6017, "step": 26379 }, { "epoch": 0.7701964906134128, "grad_norm": 0.493102124526461, "learning_rate": 1.2767234387672345e-05, "loss": 0.5116, "step": 26380 }, { "epoch": 0.7702256868412601, "grad_norm": 0.5411583431259067, "learning_rate": 1.2765612327656123e-05, "loss": 0.6344, "step": 26381 }, { "epoch": 0.7702548830691075, "grad_norm": 0.5379588271644586, "learning_rate": 1.2763990267639903e-05, "loss": 0.6377, "step": 26382 }, { "epoch": 0.7702840792969549, "grad_norm": 0.5318910670370612, "learning_rate": 1.2762368207623682e-05, "loss": 0.6066, "step": 26383 }, { "epoch": 0.7703132755248022, "grad_norm": 0.5189985312562657, "learning_rate": 1.276074614760746e-05, "loss": 0.5846, "step": 26384 }, { "epoch": 0.7703424717526496, "grad_norm": 0.5329732282829649, "learning_rate": 1.2759124087591242e-05, "loss": 0.6138, "step": 26385 }, { "epoch": 0.7703716679804969, "grad_norm": 0.6587447296720721, "learning_rate": 1.2757502027575022e-05, "loss": 0.6865, "step": 26386 }, { "epoch": 0.7704008642083443, "grad_norm": 0.5284425736438977, "learning_rate": 1.27558799675588e-05, "loss": 0.6048, "step": 26387 }, { "epoch": 0.7704300604361917, "grad_norm": 0.4619346571755145, "learning_rate": 1.275425790754258e-05, "loss": 0.4641, "step": 26388 }, { "epoch": 0.770459256664039, "grad_norm": 0.5111269256580777, "learning_rate": 1.2752635847526359e-05, "loss": 0.5408, "step": 26389 }, { "epoch": 0.7704884528918864, "grad_norm": 0.5007356193593779, "learning_rate": 1.2751013787510139e-05, "loss": 0.5587, "step": 26390 }, { "epoch": 0.7705176491197337, "grad_norm": 0.5340831448264801, "learning_rate": 1.2749391727493917e-05, "loss": 0.5985, "step": 26391 }, { "epoch": 0.7705468453475811, "grad_norm": 0.5430537607007901, "learning_rate": 1.2747769667477696e-05, "loss": 0.6074, "step": 26392 }, { "epoch": 0.7705760415754285, "grad_norm": 0.49952172254991756, "learning_rate": 1.2746147607461476e-05, "loss": 0.5301, "step": 26393 }, { "epoch": 0.7706052378032758, "grad_norm": 0.5215811104768182, "learning_rate": 1.2744525547445257e-05, "loss": 0.5867, "step": 26394 }, { "epoch": 0.7706344340311232, "grad_norm": 0.5507931740891526, "learning_rate": 1.2742903487429036e-05, "loss": 0.6408, "step": 26395 }, { "epoch": 0.7706636302589706, "grad_norm": 0.5327362553368686, "learning_rate": 1.2741281427412816e-05, "loss": 0.6326, "step": 26396 }, { "epoch": 0.7706928264868179, "grad_norm": 0.4832366691390597, "learning_rate": 1.2739659367396594e-05, "loss": 0.5026, "step": 26397 }, { "epoch": 0.7707220227146653, "grad_norm": 0.5101519127077896, "learning_rate": 1.2738037307380374e-05, "loss": 0.5355, "step": 26398 }, { "epoch": 0.7707512189425126, "grad_norm": 0.5287411488130839, "learning_rate": 1.2736415247364153e-05, "loss": 0.621, "step": 26399 }, { "epoch": 0.77078041517036, "grad_norm": 0.5726288595581843, "learning_rate": 1.2734793187347933e-05, "loss": 0.7196, "step": 26400 }, { "epoch": 0.7708096113982074, "grad_norm": 0.5166998220969898, "learning_rate": 1.2733171127331711e-05, "loss": 0.5902, "step": 26401 }, { "epoch": 0.7708388076260547, "grad_norm": 0.511102726005718, "learning_rate": 1.273154906731549e-05, "loss": 0.5884, "step": 26402 }, { "epoch": 0.7708680038539021, "grad_norm": 0.5142424536784987, "learning_rate": 1.272992700729927e-05, "loss": 0.538, "step": 26403 }, { "epoch": 0.7708972000817494, "grad_norm": 0.5231467478716693, "learning_rate": 1.2728304947283051e-05, "loss": 0.6107, "step": 26404 }, { "epoch": 0.7709263963095968, "grad_norm": 0.5308381258752829, "learning_rate": 1.272668288726683e-05, "loss": 0.6061, "step": 26405 }, { "epoch": 0.7709555925374442, "grad_norm": 0.5499757632311949, "learning_rate": 1.272506082725061e-05, "loss": 0.6618, "step": 26406 }, { "epoch": 0.7709847887652915, "grad_norm": 0.5359350294394808, "learning_rate": 1.2723438767234388e-05, "loss": 0.6016, "step": 26407 }, { "epoch": 0.7710139849931389, "grad_norm": 0.524692331530006, "learning_rate": 1.2721816707218168e-05, "loss": 0.6031, "step": 26408 }, { "epoch": 0.7710431812209863, "grad_norm": 0.5632127418607314, "learning_rate": 1.2720194647201947e-05, "loss": 0.6419, "step": 26409 }, { "epoch": 0.7710723774488336, "grad_norm": 0.5052515703974721, "learning_rate": 1.2718572587185727e-05, "loss": 0.5903, "step": 26410 }, { "epoch": 0.771101573676681, "grad_norm": 0.5590655095867217, "learning_rate": 1.2716950527169505e-05, "loss": 0.6873, "step": 26411 }, { "epoch": 0.7711307699045283, "grad_norm": 0.5851122610716677, "learning_rate": 1.2715328467153283e-05, "loss": 0.7102, "step": 26412 }, { "epoch": 0.7711599661323757, "grad_norm": 0.5460645691012418, "learning_rate": 1.2713706407137065e-05, "loss": 0.5931, "step": 26413 }, { "epoch": 0.7711891623602231, "grad_norm": 0.5309073696075036, "learning_rate": 1.2712084347120845e-05, "loss": 0.6167, "step": 26414 }, { "epoch": 0.7712183585880704, "grad_norm": 0.5427449333611928, "learning_rate": 1.2710462287104624e-05, "loss": 0.6146, "step": 26415 }, { "epoch": 0.7712475548159178, "grad_norm": 0.5478517541331394, "learning_rate": 1.2708840227088404e-05, "loss": 0.5797, "step": 26416 }, { "epoch": 0.7712767510437651, "grad_norm": 0.5015443223772116, "learning_rate": 1.2707218167072182e-05, "loss": 0.566, "step": 26417 }, { "epoch": 0.7713059472716125, "grad_norm": 0.5029315600963166, "learning_rate": 1.2705596107055962e-05, "loss": 0.5662, "step": 26418 }, { "epoch": 0.7713351434994599, "grad_norm": 0.5773677958136765, "learning_rate": 1.270397404703974e-05, "loss": 0.6135, "step": 26419 }, { "epoch": 0.7713643397273072, "grad_norm": 0.517328531044408, "learning_rate": 1.2702351987023519e-05, "loss": 0.5917, "step": 26420 }, { "epoch": 0.7713935359551546, "grad_norm": 0.4951726045010563, "learning_rate": 1.2700729927007299e-05, "loss": 0.5297, "step": 26421 }, { "epoch": 0.771422732183002, "grad_norm": 0.5572102607327051, "learning_rate": 1.2699107866991077e-05, "loss": 0.7065, "step": 26422 }, { "epoch": 0.7714519284108493, "grad_norm": 0.5239390746625501, "learning_rate": 1.2697485806974859e-05, "loss": 0.5755, "step": 26423 }, { "epoch": 0.7714811246386967, "grad_norm": 0.5369281743611528, "learning_rate": 1.2695863746958639e-05, "loss": 0.5929, "step": 26424 }, { "epoch": 0.771510320866544, "grad_norm": 0.5255566573992988, "learning_rate": 1.2694241686942418e-05, "loss": 0.5711, "step": 26425 }, { "epoch": 0.7715395170943914, "grad_norm": 0.5096670428208783, "learning_rate": 1.2692619626926198e-05, "loss": 0.5787, "step": 26426 }, { "epoch": 0.7715687133222388, "grad_norm": 0.5270595355207089, "learning_rate": 1.2690997566909976e-05, "loss": 0.5933, "step": 26427 }, { "epoch": 0.7715979095500861, "grad_norm": 0.5662069077208143, "learning_rate": 1.2689375506893756e-05, "loss": 0.6508, "step": 26428 }, { "epoch": 0.7716271057779335, "grad_norm": 0.5594560604318121, "learning_rate": 1.2687753446877534e-05, "loss": 0.6333, "step": 26429 }, { "epoch": 0.7716563020057808, "grad_norm": 0.506848251584099, "learning_rate": 1.2686131386861313e-05, "loss": 0.5714, "step": 26430 }, { "epoch": 0.7716854982336282, "grad_norm": 0.5328612420369288, "learning_rate": 1.2684509326845093e-05, "loss": 0.6005, "step": 26431 }, { "epoch": 0.7717146944614756, "grad_norm": 0.5243052959624874, "learning_rate": 1.2682887266828875e-05, "loss": 0.6057, "step": 26432 }, { "epoch": 0.7717438906893229, "grad_norm": 0.5354571672216274, "learning_rate": 1.2681265206812653e-05, "loss": 0.5879, "step": 26433 }, { "epoch": 0.7717730869171703, "grad_norm": 0.5295852967773056, "learning_rate": 1.2679643146796433e-05, "loss": 0.5792, "step": 26434 }, { "epoch": 0.7718022831450176, "grad_norm": 0.5071373929144758, "learning_rate": 1.2678021086780211e-05, "loss": 0.5584, "step": 26435 }, { "epoch": 0.771831479372865, "grad_norm": 0.5109879140593101, "learning_rate": 1.2676399026763991e-05, "loss": 0.5693, "step": 26436 }, { "epoch": 0.7718606756007124, "grad_norm": 0.5675500378280717, "learning_rate": 1.267477696674777e-05, "loss": 0.643, "step": 26437 }, { "epoch": 0.7718898718285597, "grad_norm": 0.549877619959842, "learning_rate": 1.267315490673155e-05, "loss": 0.6269, "step": 26438 }, { "epoch": 0.7719190680564071, "grad_norm": 0.5561388728490521, "learning_rate": 1.2671532846715328e-05, "loss": 0.6367, "step": 26439 }, { "epoch": 0.7719482642842544, "grad_norm": 0.5076474739759845, "learning_rate": 1.2669910786699107e-05, "loss": 0.5459, "step": 26440 }, { "epoch": 0.7719774605121018, "grad_norm": 0.5280048446105771, "learning_rate": 1.2668288726682888e-05, "loss": 0.5849, "step": 26441 }, { "epoch": 0.7720066567399492, "grad_norm": 0.48996976353244964, "learning_rate": 1.2666666666666668e-05, "loss": 0.5339, "step": 26442 }, { "epoch": 0.7720358529677965, "grad_norm": 0.5296380248236603, "learning_rate": 1.2665044606650447e-05, "loss": 0.586, "step": 26443 }, { "epoch": 0.7720650491956439, "grad_norm": 0.4948997668628045, "learning_rate": 1.2663422546634227e-05, "loss": 0.5613, "step": 26444 }, { "epoch": 0.7720942454234913, "grad_norm": 0.529640505759437, "learning_rate": 1.2661800486618005e-05, "loss": 0.5865, "step": 26445 }, { "epoch": 0.7721234416513386, "grad_norm": 0.5606397618001887, "learning_rate": 1.2660178426601785e-05, "loss": 0.6439, "step": 26446 }, { "epoch": 0.772152637879186, "grad_norm": 0.5173319293249513, "learning_rate": 1.2658556366585564e-05, "loss": 0.5645, "step": 26447 }, { "epoch": 0.7721818341070333, "grad_norm": 0.5176367755063667, "learning_rate": 1.2656934306569344e-05, "loss": 0.5677, "step": 26448 }, { "epoch": 0.7722110303348807, "grad_norm": 0.5143151115187423, "learning_rate": 1.2655312246553122e-05, "loss": 0.597, "step": 26449 }, { "epoch": 0.7722402265627281, "grad_norm": 0.5013020248641546, "learning_rate": 1.26536901865369e-05, "loss": 0.5729, "step": 26450 }, { "epoch": 0.7722694227905754, "grad_norm": 0.5259445569726339, "learning_rate": 1.2652068126520682e-05, "loss": 0.6079, "step": 26451 }, { "epoch": 0.7722986190184228, "grad_norm": 0.5688166019588652, "learning_rate": 1.2650446066504462e-05, "loss": 0.6192, "step": 26452 }, { "epoch": 0.7723278152462701, "grad_norm": 0.5187338203771772, "learning_rate": 1.264882400648824e-05, "loss": 0.5611, "step": 26453 }, { "epoch": 0.7723570114741175, "grad_norm": 0.5349921822085324, "learning_rate": 1.264720194647202e-05, "loss": 0.5901, "step": 26454 }, { "epoch": 0.7723862077019649, "grad_norm": 0.535191917624464, "learning_rate": 1.26455798864558e-05, "loss": 0.6047, "step": 26455 }, { "epoch": 0.7724154039298122, "grad_norm": 0.5346738562267218, "learning_rate": 1.264395782643958e-05, "loss": 0.6145, "step": 26456 }, { "epoch": 0.7724446001576596, "grad_norm": 0.5223177657324333, "learning_rate": 1.2642335766423358e-05, "loss": 0.5896, "step": 26457 }, { "epoch": 0.772473796385507, "grad_norm": 0.5560440671770707, "learning_rate": 1.2640713706407136e-05, "loss": 0.6437, "step": 26458 }, { "epoch": 0.7725029926133543, "grad_norm": 0.5393237454701627, "learning_rate": 1.2639091646390916e-05, "loss": 0.5896, "step": 26459 }, { "epoch": 0.7725321888412017, "grad_norm": 0.5430629610686087, "learning_rate": 1.2637469586374698e-05, "loss": 0.6384, "step": 26460 }, { "epoch": 0.772561385069049, "grad_norm": 0.5704366250118622, "learning_rate": 1.2635847526358476e-05, "loss": 0.6496, "step": 26461 }, { "epoch": 0.7725905812968964, "grad_norm": 0.5108299250058428, "learning_rate": 1.2634225466342256e-05, "loss": 0.5232, "step": 26462 }, { "epoch": 0.7726197775247438, "grad_norm": 0.5309989836163339, "learning_rate": 1.2632603406326035e-05, "loss": 0.5862, "step": 26463 }, { "epoch": 0.7726489737525911, "grad_norm": 0.5261570797478371, "learning_rate": 1.2630981346309815e-05, "loss": 0.5467, "step": 26464 }, { "epoch": 0.7726781699804385, "grad_norm": 0.5213985853705518, "learning_rate": 1.2629359286293593e-05, "loss": 0.5744, "step": 26465 }, { "epoch": 0.7727073662082858, "grad_norm": 0.5222282721161664, "learning_rate": 1.2627737226277373e-05, "loss": 0.5957, "step": 26466 }, { "epoch": 0.7727365624361332, "grad_norm": 0.48024018828234627, "learning_rate": 1.2626115166261152e-05, "loss": 0.4884, "step": 26467 }, { "epoch": 0.7727657586639806, "grad_norm": 0.5270117188511533, "learning_rate": 1.262449310624493e-05, "loss": 0.5764, "step": 26468 }, { "epoch": 0.7727949548918279, "grad_norm": 0.5037104147279577, "learning_rate": 1.262287104622871e-05, "loss": 0.5552, "step": 26469 }, { "epoch": 0.7728241511196753, "grad_norm": 0.47965090822373596, "learning_rate": 1.2621248986212492e-05, "loss": 0.5119, "step": 26470 }, { "epoch": 0.7728533473475226, "grad_norm": 0.5150408903411019, "learning_rate": 1.261962692619627e-05, "loss": 0.5641, "step": 26471 }, { "epoch": 0.77288254357537, "grad_norm": 0.5422535719290069, "learning_rate": 1.261800486618005e-05, "loss": 0.631, "step": 26472 }, { "epoch": 0.7729117398032175, "grad_norm": 0.5349835654312051, "learning_rate": 1.2616382806163829e-05, "loss": 0.6465, "step": 26473 }, { "epoch": 0.7729409360310648, "grad_norm": 0.5301730384347371, "learning_rate": 1.2614760746147609e-05, "loss": 0.5989, "step": 26474 }, { "epoch": 0.7729701322589122, "grad_norm": 0.5231396807504888, "learning_rate": 1.2613138686131387e-05, "loss": 0.5895, "step": 26475 }, { "epoch": 0.7729993284867596, "grad_norm": 0.5299335949341247, "learning_rate": 1.2611516626115167e-05, "loss": 0.6318, "step": 26476 }, { "epoch": 0.7730285247146069, "grad_norm": 0.5664240419786918, "learning_rate": 1.2609894566098945e-05, "loss": 0.6753, "step": 26477 }, { "epoch": 0.7730577209424543, "grad_norm": 0.49705353835124105, "learning_rate": 1.2608272506082724e-05, "loss": 0.5461, "step": 26478 }, { "epoch": 0.7730869171703016, "grad_norm": 0.5260431093907364, "learning_rate": 1.2606650446066506e-05, "loss": 0.641, "step": 26479 }, { "epoch": 0.773116113398149, "grad_norm": 0.5122670094547285, "learning_rate": 1.2605028386050286e-05, "loss": 0.5573, "step": 26480 }, { "epoch": 0.7731453096259964, "grad_norm": 0.5301131456154767, "learning_rate": 1.2603406326034064e-05, "loss": 0.6347, "step": 26481 }, { "epoch": 0.7731745058538437, "grad_norm": 0.49501315291294123, "learning_rate": 1.2601784266017844e-05, "loss": 0.5172, "step": 26482 }, { "epoch": 0.7732037020816911, "grad_norm": 0.5232904365438443, "learning_rate": 1.2600162206001622e-05, "loss": 0.6201, "step": 26483 }, { "epoch": 0.7732328983095385, "grad_norm": 0.5213393726898771, "learning_rate": 1.2598540145985403e-05, "loss": 0.5975, "step": 26484 }, { "epoch": 0.7732620945373858, "grad_norm": 0.5826043271602139, "learning_rate": 1.2596918085969181e-05, "loss": 0.6476, "step": 26485 }, { "epoch": 0.7732912907652332, "grad_norm": 0.49165054659785973, "learning_rate": 1.259529602595296e-05, "loss": 0.5546, "step": 26486 }, { "epoch": 0.7733204869930805, "grad_norm": 0.5268740635387105, "learning_rate": 1.259367396593674e-05, "loss": 0.5949, "step": 26487 }, { "epoch": 0.7733496832209279, "grad_norm": 0.558160942367789, "learning_rate": 1.2592051905920518e-05, "loss": 0.6026, "step": 26488 }, { "epoch": 0.7733788794487753, "grad_norm": 0.5627877587093948, "learning_rate": 1.25904298459043e-05, "loss": 0.6584, "step": 26489 }, { "epoch": 0.7734080756766226, "grad_norm": 0.5585648164022344, "learning_rate": 1.258880778588808e-05, "loss": 0.5908, "step": 26490 }, { "epoch": 0.77343727190447, "grad_norm": 0.5221842393637443, "learning_rate": 1.2587185725871858e-05, "loss": 0.6103, "step": 26491 }, { "epoch": 0.7734664681323173, "grad_norm": 0.5144913264787363, "learning_rate": 1.2585563665855638e-05, "loss": 0.5764, "step": 26492 }, { "epoch": 0.7734956643601647, "grad_norm": 0.5112601324356378, "learning_rate": 1.2583941605839416e-05, "loss": 0.6102, "step": 26493 }, { "epoch": 0.7735248605880121, "grad_norm": 0.49423451364333765, "learning_rate": 1.2582319545823196e-05, "loss": 0.5557, "step": 26494 }, { "epoch": 0.7735540568158594, "grad_norm": 0.5499361005805927, "learning_rate": 1.2580697485806975e-05, "loss": 0.6483, "step": 26495 }, { "epoch": 0.7735832530437068, "grad_norm": 0.5334578667949623, "learning_rate": 1.2579075425790753e-05, "loss": 0.601, "step": 26496 }, { "epoch": 0.7736124492715541, "grad_norm": 0.5280535271815603, "learning_rate": 1.2577453365774533e-05, "loss": 0.6328, "step": 26497 }, { "epoch": 0.7736416454994015, "grad_norm": 0.5146476589756827, "learning_rate": 1.2575831305758315e-05, "loss": 0.5798, "step": 26498 }, { "epoch": 0.7736708417272489, "grad_norm": 0.5148378775640164, "learning_rate": 1.2574209245742093e-05, "loss": 0.5711, "step": 26499 }, { "epoch": 0.7737000379550962, "grad_norm": 0.5139825143245015, "learning_rate": 1.2572587185725873e-05, "loss": 0.6315, "step": 26500 }, { "epoch": 0.7737292341829436, "grad_norm": 0.4925056126217381, "learning_rate": 1.2570965125709652e-05, "loss": 0.5352, "step": 26501 }, { "epoch": 0.773758430410791, "grad_norm": 0.5121925653750972, "learning_rate": 1.2569343065693432e-05, "loss": 0.5525, "step": 26502 }, { "epoch": 0.7737876266386383, "grad_norm": 0.501807248739045, "learning_rate": 1.256772100567721e-05, "loss": 0.5424, "step": 26503 }, { "epoch": 0.7738168228664857, "grad_norm": 0.5092821183606179, "learning_rate": 1.256609894566099e-05, "loss": 0.5828, "step": 26504 }, { "epoch": 0.773846019094333, "grad_norm": 0.5206241009740452, "learning_rate": 1.2564476885644769e-05, "loss": 0.58, "step": 26505 }, { "epoch": 0.7738752153221804, "grad_norm": 0.5577507548711336, "learning_rate": 1.2562854825628547e-05, "loss": 0.5973, "step": 26506 }, { "epoch": 0.7739044115500278, "grad_norm": 0.5468608204590688, "learning_rate": 1.2561232765612329e-05, "loss": 0.6464, "step": 26507 }, { "epoch": 0.7739336077778751, "grad_norm": 0.5053248211447854, "learning_rate": 1.2559610705596109e-05, "loss": 0.5868, "step": 26508 }, { "epoch": 0.7739628040057225, "grad_norm": 0.5065021256161295, "learning_rate": 1.2557988645579887e-05, "loss": 0.5235, "step": 26509 }, { "epoch": 0.7739920002335698, "grad_norm": 0.4967616962085548, "learning_rate": 1.2556366585563667e-05, "loss": 0.5626, "step": 26510 }, { "epoch": 0.7740211964614172, "grad_norm": 0.5195876557976755, "learning_rate": 1.2554744525547446e-05, "loss": 0.5749, "step": 26511 }, { "epoch": 0.7740503926892646, "grad_norm": 0.5488595686075062, "learning_rate": 1.2553122465531226e-05, "loss": 0.6626, "step": 26512 }, { "epoch": 0.7740795889171119, "grad_norm": 0.5096503215744926, "learning_rate": 1.2551500405515004e-05, "loss": 0.5776, "step": 26513 }, { "epoch": 0.7741087851449593, "grad_norm": 0.5722655854954125, "learning_rate": 1.2549878345498783e-05, "loss": 0.6235, "step": 26514 }, { "epoch": 0.7741379813728066, "grad_norm": 0.5255266031873195, "learning_rate": 1.2548256285482563e-05, "loss": 0.5836, "step": 26515 }, { "epoch": 0.774167177600654, "grad_norm": 0.5516626194667559, "learning_rate": 1.2546634225466341e-05, "loss": 0.6223, "step": 26516 }, { "epoch": 0.7741963738285014, "grad_norm": 0.5443375300200552, "learning_rate": 1.2545012165450123e-05, "loss": 0.5988, "step": 26517 }, { "epoch": 0.7742255700563487, "grad_norm": 0.5069964620576111, "learning_rate": 1.2543390105433903e-05, "loss": 0.5627, "step": 26518 }, { "epoch": 0.7742547662841961, "grad_norm": 0.5041123942588804, "learning_rate": 1.2541768045417681e-05, "loss": 0.5393, "step": 26519 }, { "epoch": 0.7742839625120435, "grad_norm": 0.5118046969109693, "learning_rate": 1.2540145985401461e-05, "loss": 0.5628, "step": 26520 }, { "epoch": 0.7743131587398908, "grad_norm": 0.47132960865711193, "learning_rate": 1.253852392538524e-05, "loss": 0.4907, "step": 26521 }, { "epoch": 0.7743423549677382, "grad_norm": 0.4964693494287974, "learning_rate": 1.253690186536902e-05, "loss": 0.5246, "step": 26522 }, { "epoch": 0.7743715511955855, "grad_norm": 0.5276513521587728, "learning_rate": 1.2535279805352798e-05, "loss": 0.5516, "step": 26523 }, { "epoch": 0.7744007474234329, "grad_norm": 0.528858877191213, "learning_rate": 1.2533657745336576e-05, "loss": 0.5808, "step": 26524 }, { "epoch": 0.7744299436512803, "grad_norm": 0.5046052309755421, "learning_rate": 1.2532035685320357e-05, "loss": 0.5331, "step": 26525 }, { "epoch": 0.7744591398791276, "grad_norm": 0.5147565470051434, "learning_rate": 1.2530413625304138e-05, "loss": 0.575, "step": 26526 }, { "epoch": 0.774488336106975, "grad_norm": 0.49512734278525716, "learning_rate": 1.2528791565287917e-05, "loss": 0.5342, "step": 26527 }, { "epoch": 0.7745175323348223, "grad_norm": 0.5060707138342639, "learning_rate": 1.2527169505271697e-05, "loss": 0.5001, "step": 26528 }, { "epoch": 0.7745467285626697, "grad_norm": 0.5351735933951034, "learning_rate": 1.2525547445255475e-05, "loss": 0.6183, "step": 26529 }, { "epoch": 0.7745759247905171, "grad_norm": 0.5411606647584062, "learning_rate": 1.2523925385239255e-05, "loss": 0.6173, "step": 26530 }, { "epoch": 0.7746051210183644, "grad_norm": 0.5472085760306189, "learning_rate": 1.2522303325223034e-05, "loss": 0.5222, "step": 26531 }, { "epoch": 0.7746343172462118, "grad_norm": 0.5566866710683581, "learning_rate": 1.2520681265206814e-05, "loss": 0.6482, "step": 26532 }, { "epoch": 0.7746635134740592, "grad_norm": 0.5167539152717715, "learning_rate": 1.2519059205190592e-05, "loss": 0.5197, "step": 26533 }, { "epoch": 0.7746927097019065, "grad_norm": 0.5146892183258037, "learning_rate": 1.251743714517437e-05, "loss": 0.5561, "step": 26534 }, { "epoch": 0.7747219059297539, "grad_norm": 0.5283121064742301, "learning_rate": 1.251581508515815e-05, "loss": 0.5665, "step": 26535 }, { "epoch": 0.7747511021576012, "grad_norm": 0.5164621576587141, "learning_rate": 1.2514193025141932e-05, "loss": 0.5366, "step": 26536 }, { "epoch": 0.7747802983854486, "grad_norm": 0.5028908839009582, "learning_rate": 1.251257096512571e-05, "loss": 0.5416, "step": 26537 }, { "epoch": 0.774809494613296, "grad_norm": 0.5822615568987032, "learning_rate": 1.251094890510949e-05, "loss": 0.6901, "step": 26538 }, { "epoch": 0.7748386908411433, "grad_norm": 0.5021954783269055, "learning_rate": 1.2509326845093269e-05, "loss": 0.5374, "step": 26539 }, { "epoch": 0.7748678870689907, "grad_norm": 0.5319793033728126, "learning_rate": 1.2507704785077049e-05, "loss": 0.6177, "step": 26540 }, { "epoch": 0.774897083296838, "grad_norm": 0.5620310766936276, "learning_rate": 1.2506082725060827e-05, "loss": 0.6328, "step": 26541 }, { "epoch": 0.7749262795246854, "grad_norm": 0.49164676065762664, "learning_rate": 1.2504460665044606e-05, "loss": 0.5347, "step": 26542 }, { "epoch": 0.7749554757525328, "grad_norm": 0.5067901944707675, "learning_rate": 1.2502838605028386e-05, "loss": 0.5688, "step": 26543 }, { "epoch": 0.7749846719803801, "grad_norm": 0.5800214079080673, "learning_rate": 1.2501216545012164e-05, "loss": 0.6645, "step": 26544 }, { "epoch": 0.7750138682082275, "grad_norm": 0.5152976545851412, "learning_rate": 1.2499594484995946e-05, "loss": 0.5858, "step": 26545 }, { "epoch": 0.7750430644360748, "grad_norm": 0.5023404034911071, "learning_rate": 1.2497972424979724e-05, "loss": 0.5309, "step": 26546 }, { "epoch": 0.7750722606639222, "grad_norm": 0.4994089238652433, "learning_rate": 1.2496350364963503e-05, "loss": 0.5085, "step": 26547 }, { "epoch": 0.7751014568917696, "grad_norm": 0.5433469337055928, "learning_rate": 1.2494728304947285e-05, "loss": 0.6345, "step": 26548 }, { "epoch": 0.7751306531196169, "grad_norm": 0.5603248149755045, "learning_rate": 1.2493106244931063e-05, "loss": 0.6963, "step": 26549 }, { "epoch": 0.7751598493474643, "grad_norm": 0.4927658573702582, "learning_rate": 1.2491484184914843e-05, "loss": 0.5555, "step": 26550 }, { "epoch": 0.7751890455753117, "grad_norm": 0.543121695264759, "learning_rate": 1.2489862124898621e-05, "loss": 0.6294, "step": 26551 }, { "epoch": 0.775218241803159, "grad_norm": 0.5169526281265272, "learning_rate": 1.2488240064882401e-05, "loss": 0.5636, "step": 26552 }, { "epoch": 0.7752474380310064, "grad_norm": 0.4894613451972047, "learning_rate": 1.2486618004866182e-05, "loss": 0.4901, "step": 26553 }, { "epoch": 0.7752766342588537, "grad_norm": 0.5262490379993462, "learning_rate": 1.248499594484996e-05, "loss": 0.6138, "step": 26554 }, { "epoch": 0.7753058304867011, "grad_norm": 0.5532058827954724, "learning_rate": 1.248337388483374e-05, "loss": 0.6061, "step": 26555 }, { "epoch": 0.7753350267145485, "grad_norm": 0.5377558579965769, "learning_rate": 1.2481751824817518e-05, "loss": 0.6199, "step": 26556 }, { "epoch": 0.7753642229423958, "grad_norm": 0.5100449429617546, "learning_rate": 1.2480129764801298e-05, "loss": 0.5461, "step": 26557 }, { "epoch": 0.7753934191702432, "grad_norm": 0.521915263619602, "learning_rate": 1.2478507704785078e-05, "loss": 0.5669, "step": 26558 }, { "epoch": 0.7754226153980905, "grad_norm": 0.5362770134751841, "learning_rate": 1.2476885644768857e-05, "loss": 0.5956, "step": 26559 }, { "epoch": 0.7754518116259379, "grad_norm": 0.5639174170482758, "learning_rate": 1.2475263584752637e-05, "loss": 0.6783, "step": 26560 }, { "epoch": 0.7754810078537853, "grad_norm": 0.5421699777788447, "learning_rate": 1.2473641524736415e-05, "loss": 0.6236, "step": 26561 }, { "epoch": 0.7755102040816326, "grad_norm": 0.4992203540900668, "learning_rate": 1.2472019464720195e-05, "loss": 0.5522, "step": 26562 }, { "epoch": 0.77553940030948, "grad_norm": 0.5066656465966005, "learning_rate": 1.2470397404703975e-05, "loss": 0.5844, "step": 26563 }, { "epoch": 0.7755685965373273, "grad_norm": 0.5005947620335411, "learning_rate": 1.2468775344687754e-05, "loss": 0.5406, "step": 26564 }, { "epoch": 0.7755977927651747, "grad_norm": 0.5139031587176061, "learning_rate": 1.2467153284671534e-05, "loss": 0.5644, "step": 26565 }, { "epoch": 0.7756269889930221, "grad_norm": 0.5094303450351286, "learning_rate": 1.2465531224655314e-05, "loss": 0.5564, "step": 26566 }, { "epoch": 0.7756561852208694, "grad_norm": 0.5115405197993359, "learning_rate": 1.2463909164639092e-05, "loss": 0.5496, "step": 26567 }, { "epoch": 0.7756853814487168, "grad_norm": 0.5448757776716874, "learning_rate": 1.2462287104622872e-05, "loss": 0.6534, "step": 26568 }, { "epoch": 0.7757145776765642, "grad_norm": 0.4974272802668247, "learning_rate": 1.246066504460665e-05, "loss": 0.5374, "step": 26569 }, { "epoch": 0.7757437739044115, "grad_norm": 0.5222873398037897, "learning_rate": 1.245904298459043e-05, "loss": 0.5707, "step": 26570 }, { "epoch": 0.7757729701322589, "grad_norm": 0.5620368891342736, "learning_rate": 1.2457420924574211e-05, "loss": 0.6242, "step": 26571 }, { "epoch": 0.7758021663601062, "grad_norm": 0.5243871501275796, "learning_rate": 1.245579886455799e-05, "loss": 0.5676, "step": 26572 }, { "epoch": 0.7758313625879536, "grad_norm": 0.45906199534933695, "learning_rate": 1.245417680454177e-05, "loss": 0.4809, "step": 26573 }, { "epoch": 0.775860558815801, "grad_norm": 0.489209186417521, "learning_rate": 1.2452554744525548e-05, "loss": 0.5105, "step": 26574 }, { "epoch": 0.7758897550436483, "grad_norm": 0.529356624386572, "learning_rate": 1.2450932684509326e-05, "loss": 0.5635, "step": 26575 }, { "epoch": 0.7759189512714957, "grad_norm": 0.5368330030432594, "learning_rate": 1.2449310624493108e-05, "loss": 0.5708, "step": 26576 }, { "epoch": 0.775948147499343, "grad_norm": 0.534992205540062, "learning_rate": 1.2447688564476886e-05, "loss": 0.556, "step": 26577 }, { "epoch": 0.7759773437271904, "grad_norm": 0.5645325494117865, "learning_rate": 1.2446066504460666e-05, "loss": 0.6224, "step": 26578 }, { "epoch": 0.7760065399550378, "grad_norm": 0.5416754475489917, "learning_rate": 1.2444444444444445e-05, "loss": 0.6116, "step": 26579 }, { "epoch": 0.7760357361828851, "grad_norm": 0.5493158291655863, "learning_rate": 1.2442822384428223e-05, "loss": 0.6563, "step": 26580 }, { "epoch": 0.7760649324107325, "grad_norm": 0.505010260312372, "learning_rate": 1.2441200324412005e-05, "loss": 0.5662, "step": 26581 }, { "epoch": 0.7760941286385798, "grad_norm": 0.5425036332604187, "learning_rate": 1.2439578264395783e-05, "loss": 0.5865, "step": 26582 }, { "epoch": 0.7761233248664272, "grad_norm": 0.5184121095652567, "learning_rate": 1.2437956204379563e-05, "loss": 0.5798, "step": 26583 }, { "epoch": 0.7761525210942746, "grad_norm": 0.5525917194220207, "learning_rate": 1.2436334144363342e-05, "loss": 0.6532, "step": 26584 }, { "epoch": 0.7761817173221219, "grad_norm": 0.6443734712152622, "learning_rate": 1.2434712084347122e-05, "loss": 0.6363, "step": 26585 }, { "epoch": 0.7762109135499693, "grad_norm": 0.5018108002296299, "learning_rate": 1.2433090024330902e-05, "loss": 0.4942, "step": 26586 }, { "epoch": 0.7762401097778167, "grad_norm": 0.527697893233679, "learning_rate": 1.243146796431468e-05, "loss": 0.5692, "step": 26587 }, { "epoch": 0.776269306005664, "grad_norm": 0.5412269101393452, "learning_rate": 1.242984590429846e-05, "loss": 0.6373, "step": 26588 }, { "epoch": 0.7762985022335114, "grad_norm": 0.5335484095699051, "learning_rate": 1.2428223844282239e-05, "loss": 0.6135, "step": 26589 }, { "epoch": 0.7763276984613587, "grad_norm": 0.5040659115366878, "learning_rate": 1.2426601784266019e-05, "loss": 0.5548, "step": 26590 }, { "epoch": 0.7763568946892061, "grad_norm": 0.5241760144814823, "learning_rate": 1.2424979724249799e-05, "loss": 0.6013, "step": 26591 }, { "epoch": 0.7763860909170535, "grad_norm": 0.5122735838351081, "learning_rate": 1.2423357664233577e-05, "loss": 0.5244, "step": 26592 }, { "epoch": 0.7764152871449009, "grad_norm": 0.5013514410087709, "learning_rate": 1.2421735604217357e-05, "loss": 0.5845, "step": 26593 }, { "epoch": 0.7764444833727483, "grad_norm": 0.5518677542839132, "learning_rate": 1.2420113544201135e-05, "loss": 0.6491, "step": 26594 }, { "epoch": 0.7764736796005957, "grad_norm": 0.5482096458324793, "learning_rate": 1.2418491484184916e-05, "loss": 0.6315, "step": 26595 }, { "epoch": 0.776502875828443, "grad_norm": 0.5344223898364002, "learning_rate": 1.2416869424168696e-05, "loss": 0.5716, "step": 26596 }, { "epoch": 0.7765320720562904, "grad_norm": 0.5333312853408267, "learning_rate": 1.2415247364152474e-05, "loss": 0.6275, "step": 26597 }, { "epoch": 0.7765612682841377, "grad_norm": 0.5538281625417648, "learning_rate": 1.2413625304136254e-05, "loss": 0.6364, "step": 26598 }, { "epoch": 0.7765904645119851, "grad_norm": 0.5701978324291231, "learning_rate": 1.2412003244120032e-05, "loss": 0.6361, "step": 26599 }, { "epoch": 0.7766196607398325, "grad_norm": 0.5286734962564292, "learning_rate": 1.2410381184103813e-05, "loss": 0.5737, "step": 26600 }, { "epoch": 0.7766488569676798, "grad_norm": 0.5260787745704367, "learning_rate": 1.2408759124087593e-05, "loss": 0.5889, "step": 26601 }, { "epoch": 0.7766780531955272, "grad_norm": 0.5369934024940258, "learning_rate": 1.2407137064071371e-05, "loss": 0.6039, "step": 26602 }, { "epoch": 0.7767072494233745, "grad_norm": 0.5634584213760909, "learning_rate": 1.240551500405515e-05, "loss": 0.6273, "step": 26603 }, { "epoch": 0.7767364456512219, "grad_norm": 0.5465778902210799, "learning_rate": 1.2403892944038931e-05, "loss": 0.6285, "step": 26604 }, { "epoch": 0.7767656418790693, "grad_norm": 0.5310849986496455, "learning_rate": 1.240227088402271e-05, "loss": 0.597, "step": 26605 }, { "epoch": 0.7767948381069166, "grad_norm": 0.538381539386432, "learning_rate": 1.240064882400649e-05, "loss": 0.6101, "step": 26606 }, { "epoch": 0.776824034334764, "grad_norm": 0.522649468165385, "learning_rate": 1.2399026763990268e-05, "loss": 0.5524, "step": 26607 }, { "epoch": 0.7768532305626114, "grad_norm": 0.5128224960185297, "learning_rate": 1.2397404703974046e-05, "loss": 0.5685, "step": 26608 }, { "epoch": 0.7768824267904587, "grad_norm": 0.5040105653393154, "learning_rate": 1.2395782643957828e-05, "loss": 0.5503, "step": 26609 }, { "epoch": 0.7769116230183061, "grad_norm": 0.56078426970826, "learning_rate": 1.2394160583941606e-05, "loss": 0.6437, "step": 26610 }, { "epoch": 0.7769408192461534, "grad_norm": 0.48092656038651643, "learning_rate": 1.2392538523925386e-05, "loss": 0.5277, "step": 26611 }, { "epoch": 0.7769700154740008, "grad_norm": 0.5338120082713671, "learning_rate": 1.2390916463909165e-05, "loss": 0.6357, "step": 26612 }, { "epoch": 0.7769992117018482, "grad_norm": 0.5219039031793249, "learning_rate": 1.2389294403892943e-05, "loss": 0.5775, "step": 26613 }, { "epoch": 0.7770284079296955, "grad_norm": 0.6372076700348146, "learning_rate": 1.2387672343876725e-05, "loss": 0.6269, "step": 26614 }, { "epoch": 0.7770576041575429, "grad_norm": 0.5317674812923897, "learning_rate": 1.2386050283860503e-05, "loss": 0.5836, "step": 26615 }, { "epoch": 0.7770868003853902, "grad_norm": 0.530606393764915, "learning_rate": 1.2384428223844283e-05, "loss": 0.6057, "step": 26616 }, { "epoch": 0.7771159966132376, "grad_norm": 0.502716317483855, "learning_rate": 1.2382806163828062e-05, "loss": 0.5523, "step": 26617 }, { "epoch": 0.777145192841085, "grad_norm": 0.5327726576875814, "learning_rate": 1.2381184103811842e-05, "loss": 0.5841, "step": 26618 }, { "epoch": 0.7771743890689323, "grad_norm": 0.578068747807718, "learning_rate": 1.2379562043795622e-05, "loss": 0.6529, "step": 26619 }, { "epoch": 0.7772035852967797, "grad_norm": 0.4848779863646579, "learning_rate": 1.23779399837794e-05, "loss": 0.4919, "step": 26620 }, { "epoch": 0.777232781524627, "grad_norm": 0.49414680860698773, "learning_rate": 1.237631792376318e-05, "loss": 0.5661, "step": 26621 }, { "epoch": 0.7772619777524744, "grad_norm": 0.5677526215332934, "learning_rate": 1.2374695863746959e-05, "loss": 0.6214, "step": 26622 }, { "epoch": 0.7772911739803218, "grad_norm": 0.5660564877679694, "learning_rate": 1.2373073803730739e-05, "loss": 0.6692, "step": 26623 }, { "epoch": 0.7773203702081691, "grad_norm": 0.5467315696790135, "learning_rate": 1.2371451743714519e-05, "loss": 0.6395, "step": 26624 }, { "epoch": 0.7773495664360165, "grad_norm": 0.5566863316878792, "learning_rate": 1.2369829683698297e-05, "loss": 0.68, "step": 26625 }, { "epoch": 0.7773787626638639, "grad_norm": 0.49742595896712105, "learning_rate": 1.2368207623682077e-05, "loss": 0.5571, "step": 26626 }, { "epoch": 0.7774079588917112, "grad_norm": 0.5145197949852743, "learning_rate": 1.2366585563665856e-05, "loss": 0.5478, "step": 26627 }, { "epoch": 0.7774371551195586, "grad_norm": 0.5114232532507921, "learning_rate": 1.2364963503649636e-05, "loss": 0.5519, "step": 26628 }, { "epoch": 0.7774663513474059, "grad_norm": 0.5082482542117929, "learning_rate": 1.2363341443633416e-05, "loss": 0.5358, "step": 26629 }, { "epoch": 0.7774955475752533, "grad_norm": 0.49654359044726415, "learning_rate": 1.2361719383617194e-05, "loss": 0.5425, "step": 26630 }, { "epoch": 0.7775247438031007, "grad_norm": 0.5410762398570444, "learning_rate": 1.2360097323600974e-05, "loss": 0.6336, "step": 26631 }, { "epoch": 0.777553940030948, "grad_norm": 0.5426245562138465, "learning_rate": 1.2358475263584753e-05, "loss": 0.6661, "step": 26632 }, { "epoch": 0.7775831362587954, "grad_norm": 0.5091343886300914, "learning_rate": 1.2356853203568533e-05, "loss": 0.587, "step": 26633 }, { "epoch": 0.7776123324866427, "grad_norm": 0.5672948561047442, "learning_rate": 1.2355231143552313e-05, "loss": 0.6337, "step": 26634 }, { "epoch": 0.7776415287144901, "grad_norm": 0.520668714735144, "learning_rate": 1.2353609083536091e-05, "loss": 0.5904, "step": 26635 }, { "epoch": 0.7776707249423375, "grad_norm": 0.5098702667777743, "learning_rate": 1.235198702351987e-05, "loss": 0.5842, "step": 26636 }, { "epoch": 0.7776999211701848, "grad_norm": 0.5373093309504611, "learning_rate": 1.2350364963503651e-05, "loss": 0.5869, "step": 26637 }, { "epoch": 0.7777291173980322, "grad_norm": 0.4967671611833856, "learning_rate": 1.234874290348743e-05, "loss": 0.5423, "step": 26638 }, { "epoch": 0.7777583136258795, "grad_norm": 0.5560147442363903, "learning_rate": 1.234712084347121e-05, "loss": 0.5874, "step": 26639 }, { "epoch": 0.7777875098537269, "grad_norm": 0.5093489729585302, "learning_rate": 1.2345498783454988e-05, "loss": 0.5747, "step": 26640 }, { "epoch": 0.7778167060815743, "grad_norm": 0.5221961118975065, "learning_rate": 1.2343876723438766e-05, "loss": 0.5883, "step": 26641 }, { "epoch": 0.7778459023094216, "grad_norm": 0.5315695199436864, "learning_rate": 1.2342254663422548e-05, "loss": 0.6285, "step": 26642 }, { "epoch": 0.777875098537269, "grad_norm": 0.5133884156112674, "learning_rate": 1.2340632603406327e-05, "loss": 0.5831, "step": 26643 }, { "epoch": 0.7779042947651164, "grad_norm": 0.5194342967945077, "learning_rate": 1.2339010543390107e-05, "loss": 0.5833, "step": 26644 }, { "epoch": 0.7779334909929637, "grad_norm": 0.524492368918228, "learning_rate": 1.2337388483373885e-05, "loss": 0.58, "step": 26645 }, { "epoch": 0.7779626872208111, "grad_norm": 0.5467816868246674, "learning_rate": 1.2335766423357663e-05, "loss": 0.6503, "step": 26646 }, { "epoch": 0.7779918834486584, "grad_norm": 0.5358148081526003, "learning_rate": 1.2334144363341445e-05, "loss": 0.6298, "step": 26647 }, { "epoch": 0.7780210796765058, "grad_norm": 0.5132324244078995, "learning_rate": 1.2332522303325224e-05, "loss": 0.5658, "step": 26648 }, { "epoch": 0.7780502759043532, "grad_norm": 0.5566908493849991, "learning_rate": 1.2330900243309004e-05, "loss": 0.6492, "step": 26649 }, { "epoch": 0.7780794721322005, "grad_norm": 0.5114587980149405, "learning_rate": 1.2329278183292782e-05, "loss": 0.5454, "step": 26650 }, { "epoch": 0.7781086683600479, "grad_norm": 0.5417185740137195, "learning_rate": 1.2327656123276562e-05, "loss": 0.6598, "step": 26651 }, { "epoch": 0.7781378645878952, "grad_norm": 0.5173252339363371, "learning_rate": 1.2326034063260342e-05, "loss": 0.5848, "step": 26652 }, { "epoch": 0.7781670608157426, "grad_norm": 0.5528946633745472, "learning_rate": 1.232441200324412e-05, "loss": 0.6297, "step": 26653 }, { "epoch": 0.77819625704359, "grad_norm": 0.4994668732237252, "learning_rate": 1.23227899432279e-05, "loss": 0.5533, "step": 26654 }, { "epoch": 0.7782254532714373, "grad_norm": 0.5486037095070293, "learning_rate": 1.2321167883211679e-05, "loss": 0.6257, "step": 26655 }, { "epoch": 0.7782546494992847, "grad_norm": 0.5284516493776089, "learning_rate": 1.2319545823195459e-05, "loss": 0.5863, "step": 26656 }, { "epoch": 0.778283845727132, "grad_norm": 0.5571125634445665, "learning_rate": 1.2317923763179239e-05, "loss": 0.6105, "step": 26657 }, { "epoch": 0.7783130419549794, "grad_norm": 0.508852264400747, "learning_rate": 1.2316301703163017e-05, "loss": 0.5724, "step": 26658 }, { "epoch": 0.7783422381828268, "grad_norm": 0.5561799172954681, "learning_rate": 1.2314679643146798e-05, "loss": 0.6236, "step": 26659 }, { "epoch": 0.7783714344106741, "grad_norm": 0.4892753125991969, "learning_rate": 1.2313057583130576e-05, "loss": 0.5444, "step": 26660 }, { "epoch": 0.7784006306385215, "grad_norm": 0.505983442390945, "learning_rate": 1.2311435523114356e-05, "loss": 0.5511, "step": 26661 }, { "epoch": 0.7784298268663689, "grad_norm": 0.5016935125265521, "learning_rate": 1.2309813463098136e-05, "loss": 0.5383, "step": 26662 }, { "epoch": 0.7784590230942162, "grad_norm": 0.5567544941133922, "learning_rate": 1.2308191403081914e-05, "loss": 0.6535, "step": 26663 }, { "epoch": 0.7784882193220636, "grad_norm": 0.497946625794, "learning_rate": 1.2306569343065693e-05, "loss": 0.5554, "step": 26664 }, { "epoch": 0.7785174155499109, "grad_norm": 0.5257065861496311, "learning_rate": 1.2304947283049473e-05, "loss": 0.5874, "step": 26665 }, { "epoch": 0.7785466117777583, "grad_norm": 0.5137652811193933, "learning_rate": 1.2303325223033253e-05, "loss": 0.5669, "step": 26666 }, { "epoch": 0.7785758080056057, "grad_norm": 0.5335121403380487, "learning_rate": 1.2301703163017033e-05, "loss": 0.6266, "step": 26667 }, { "epoch": 0.778605004233453, "grad_norm": 0.5025845179403651, "learning_rate": 1.2300081103000811e-05, "loss": 0.5251, "step": 26668 }, { "epoch": 0.7786342004613004, "grad_norm": 0.5260556700317783, "learning_rate": 1.229845904298459e-05, "loss": 0.6025, "step": 26669 }, { "epoch": 0.7786633966891477, "grad_norm": 0.5379489255792997, "learning_rate": 1.2296836982968372e-05, "loss": 0.6578, "step": 26670 }, { "epoch": 0.7786925929169951, "grad_norm": 0.5435618455519317, "learning_rate": 1.229521492295215e-05, "loss": 0.652, "step": 26671 }, { "epoch": 0.7787217891448425, "grad_norm": 0.5065009249137737, "learning_rate": 1.229359286293593e-05, "loss": 0.5353, "step": 26672 }, { "epoch": 0.7787509853726898, "grad_norm": 0.4968016310361678, "learning_rate": 1.2291970802919708e-05, "loss": 0.539, "step": 26673 }, { "epoch": 0.7787801816005372, "grad_norm": 0.5224451690975503, "learning_rate": 1.2290348742903487e-05, "loss": 0.6073, "step": 26674 }, { "epoch": 0.7788093778283846, "grad_norm": 0.5207429442210534, "learning_rate": 1.2288726682887268e-05, "loss": 0.5811, "step": 26675 }, { "epoch": 0.7788385740562319, "grad_norm": 0.5746954964823626, "learning_rate": 1.2287104622871047e-05, "loss": 0.6445, "step": 26676 }, { "epoch": 0.7788677702840793, "grad_norm": 0.5442907743638993, "learning_rate": 1.2285482562854827e-05, "loss": 0.6366, "step": 26677 }, { "epoch": 0.7788969665119266, "grad_norm": 0.5521098972196613, "learning_rate": 1.2283860502838605e-05, "loss": 0.6211, "step": 26678 }, { "epoch": 0.778926162739774, "grad_norm": 0.49682936642913633, "learning_rate": 1.2282238442822384e-05, "loss": 0.5275, "step": 26679 }, { "epoch": 0.7789553589676214, "grad_norm": 0.4881895630218786, "learning_rate": 1.2280616382806165e-05, "loss": 0.5607, "step": 26680 }, { "epoch": 0.7789845551954687, "grad_norm": 0.5154113445780313, "learning_rate": 1.2278994322789944e-05, "loss": 0.5611, "step": 26681 }, { "epoch": 0.7790137514233161, "grad_norm": 0.5207270046678965, "learning_rate": 1.2277372262773724e-05, "loss": 0.5493, "step": 26682 }, { "epoch": 0.7790429476511634, "grad_norm": 0.5138121186876253, "learning_rate": 1.2275750202757502e-05, "loss": 0.5621, "step": 26683 }, { "epoch": 0.7790721438790108, "grad_norm": 0.5311124481262288, "learning_rate": 1.2274128142741282e-05, "loss": 0.6102, "step": 26684 }, { "epoch": 0.7791013401068582, "grad_norm": 0.5860778290925605, "learning_rate": 1.2272506082725062e-05, "loss": 0.6733, "step": 26685 }, { "epoch": 0.7791305363347055, "grad_norm": 0.5595512448374594, "learning_rate": 1.227088402270884e-05, "loss": 0.6607, "step": 26686 }, { "epoch": 0.7791597325625529, "grad_norm": 0.5279234315723806, "learning_rate": 1.226926196269262e-05, "loss": 0.59, "step": 26687 }, { "epoch": 0.7791889287904002, "grad_norm": 0.5357970132854071, "learning_rate": 1.22676399026764e-05, "loss": 0.6231, "step": 26688 }, { "epoch": 0.7792181250182476, "grad_norm": 0.4799154622348946, "learning_rate": 1.226601784266018e-05, "loss": 0.5001, "step": 26689 }, { "epoch": 0.779247321246095, "grad_norm": 0.5529820047540717, "learning_rate": 1.226439578264396e-05, "loss": 0.685, "step": 26690 }, { "epoch": 0.7792765174739423, "grad_norm": 0.45718637576688764, "learning_rate": 1.2262773722627738e-05, "loss": 0.4653, "step": 26691 }, { "epoch": 0.7793057137017897, "grad_norm": 0.5163048500260429, "learning_rate": 1.2261151662611518e-05, "loss": 0.5834, "step": 26692 }, { "epoch": 0.779334909929637, "grad_norm": 0.5378120321766247, "learning_rate": 1.2259529602595296e-05, "loss": 0.5992, "step": 26693 }, { "epoch": 0.7793641061574844, "grad_norm": 0.5029849398342116, "learning_rate": 1.2257907542579076e-05, "loss": 0.5446, "step": 26694 }, { "epoch": 0.7793933023853318, "grad_norm": 0.5531336534218518, "learning_rate": 1.2256285482562856e-05, "loss": 0.6602, "step": 26695 }, { "epoch": 0.7794224986131791, "grad_norm": 0.5133767991023136, "learning_rate": 1.2254663422546635e-05, "loss": 0.5619, "step": 26696 }, { "epoch": 0.7794516948410265, "grad_norm": 0.5599188992952043, "learning_rate": 1.2253041362530413e-05, "loss": 0.6865, "step": 26697 }, { "epoch": 0.7794808910688739, "grad_norm": 0.5317612575834733, "learning_rate": 1.2251419302514193e-05, "loss": 0.6089, "step": 26698 }, { "epoch": 0.7795100872967212, "grad_norm": 0.5258451529401317, "learning_rate": 1.2249797242497973e-05, "loss": 0.6259, "step": 26699 }, { "epoch": 0.7795392835245686, "grad_norm": 0.5484659223714772, "learning_rate": 1.2248175182481753e-05, "loss": 0.5983, "step": 26700 }, { "epoch": 0.7795684797524159, "grad_norm": 0.5493498397939636, "learning_rate": 1.2246553122465532e-05, "loss": 0.6225, "step": 26701 }, { "epoch": 0.7795976759802633, "grad_norm": 0.5573462563835382, "learning_rate": 1.224493106244931e-05, "loss": 0.6645, "step": 26702 }, { "epoch": 0.7796268722081107, "grad_norm": 0.528471101418062, "learning_rate": 1.2243309002433092e-05, "loss": 0.5797, "step": 26703 }, { "epoch": 0.779656068435958, "grad_norm": 0.5212554528943552, "learning_rate": 1.224168694241687e-05, "loss": 0.5759, "step": 26704 }, { "epoch": 0.7796852646638054, "grad_norm": 0.4976494031141364, "learning_rate": 1.224006488240065e-05, "loss": 0.5477, "step": 26705 }, { "epoch": 0.7797144608916527, "grad_norm": 0.5199544929015841, "learning_rate": 1.2238442822384429e-05, "loss": 0.5588, "step": 26706 }, { "epoch": 0.7797436571195001, "grad_norm": 0.48671344728080473, "learning_rate": 1.2236820762368207e-05, "loss": 0.5341, "step": 26707 }, { "epoch": 0.7797728533473475, "grad_norm": 0.5060134420137417, "learning_rate": 1.2235198702351989e-05, "loss": 0.5704, "step": 26708 }, { "epoch": 0.7798020495751948, "grad_norm": 0.48378122259148043, "learning_rate": 1.2233576642335767e-05, "loss": 0.5377, "step": 26709 }, { "epoch": 0.7798312458030422, "grad_norm": 0.5615139013002931, "learning_rate": 1.2231954582319547e-05, "loss": 0.6647, "step": 26710 }, { "epoch": 0.7798604420308896, "grad_norm": 0.5091437381675431, "learning_rate": 1.2230332522303326e-05, "loss": 0.5532, "step": 26711 }, { "epoch": 0.7798896382587369, "grad_norm": 0.4951258830030047, "learning_rate": 1.2228710462287104e-05, "loss": 0.5459, "step": 26712 }, { "epoch": 0.7799188344865843, "grad_norm": 0.5240023804326456, "learning_rate": 1.2227088402270886e-05, "loss": 0.5849, "step": 26713 }, { "epoch": 0.7799480307144318, "grad_norm": 0.5295880251626681, "learning_rate": 1.2225466342254664e-05, "loss": 0.6402, "step": 26714 }, { "epoch": 0.7799772269422791, "grad_norm": 0.508523222003442, "learning_rate": 1.2223844282238444e-05, "loss": 0.5583, "step": 26715 }, { "epoch": 0.7800064231701265, "grad_norm": 0.5627019850400762, "learning_rate": 1.2222222222222222e-05, "loss": 0.7059, "step": 26716 }, { "epoch": 0.7800356193979738, "grad_norm": 0.5572806494740077, "learning_rate": 1.2220600162206003e-05, "loss": 0.6217, "step": 26717 }, { "epoch": 0.7800648156258212, "grad_norm": 0.5664771649317946, "learning_rate": 1.2218978102189783e-05, "loss": 0.6912, "step": 26718 }, { "epoch": 0.7800940118536686, "grad_norm": 0.5299147069184715, "learning_rate": 1.2217356042173561e-05, "loss": 0.6226, "step": 26719 }, { "epoch": 0.7801232080815159, "grad_norm": 0.5093730606550088, "learning_rate": 1.2215733982157341e-05, "loss": 0.5807, "step": 26720 }, { "epoch": 0.7801524043093633, "grad_norm": 0.5349661061323342, "learning_rate": 1.221411192214112e-05, "loss": 0.5821, "step": 26721 }, { "epoch": 0.7801816005372106, "grad_norm": 0.4869364917204987, "learning_rate": 1.22124898621249e-05, "loss": 0.5057, "step": 26722 }, { "epoch": 0.780210796765058, "grad_norm": 0.47169970103028236, "learning_rate": 1.221086780210868e-05, "loss": 0.4713, "step": 26723 }, { "epoch": 0.7802399929929054, "grad_norm": 0.5184271048574933, "learning_rate": 1.2209245742092458e-05, "loss": 0.5652, "step": 26724 }, { "epoch": 0.7802691892207527, "grad_norm": 0.5164798325852701, "learning_rate": 1.2207623682076236e-05, "loss": 0.5849, "step": 26725 }, { "epoch": 0.7802983854486001, "grad_norm": 0.47811369548963506, "learning_rate": 1.2206001622060016e-05, "loss": 0.5397, "step": 26726 }, { "epoch": 0.7803275816764474, "grad_norm": 0.5575018641563328, "learning_rate": 1.2204379562043796e-05, "loss": 0.6329, "step": 26727 }, { "epoch": 0.7803567779042948, "grad_norm": 0.5248406011692383, "learning_rate": 1.2202757502027577e-05, "loss": 0.5893, "step": 26728 }, { "epoch": 0.7803859741321422, "grad_norm": 0.49772372242453616, "learning_rate": 1.2201135442011355e-05, "loss": 0.5396, "step": 26729 }, { "epoch": 0.7804151703599895, "grad_norm": 0.5153007842812772, "learning_rate": 1.2199513381995133e-05, "loss": 0.5197, "step": 26730 }, { "epoch": 0.7804443665878369, "grad_norm": 0.4989704420629186, "learning_rate": 1.2197891321978913e-05, "loss": 0.5489, "step": 26731 }, { "epoch": 0.7804735628156843, "grad_norm": 0.5362291412476899, "learning_rate": 1.2196269261962693e-05, "loss": 0.6152, "step": 26732 }, { "epoch": 0.7805027590435316, "grad_norm": 0.5307207816254437, "learning_rate": 1.2194647201946473e-05, "loss": 0.5988, "step": 26733 }, { "epoch": 0.780531955271379, "grad_norm": 0.5061545851207464, "learning_rate": 1.2193025141930252e-05, "loss": 0.5803, "step": 26734 }, { "epoch": 0.7805611514992263, "grad_norm": 0.520565869570895, "learning_rate": 1.219140308191403e-05, "loss": 0.5676, "step": 26735 }, { "epoch": 0.7805903477270737, "grad_norm": 0.5348385902032291, "learning_rate": 1.2189781021897812e-05, "loss": 0.6408, "step": 26736 }, { "epoch": 0.7806195439549211, "grad_norm": 0.5304844063442127, "learning_rate": 1.218815896188159e-05, "loss": 0.5998, "step": 26737 }, { "epoch": 0.7806487401827684, "grad_norm": 0.5360967716667464, "learning_rate": 1.218653690186537e-05, "loss": 0.6324, "step": 26738 }, { "epoch": 0.7806779364106158, "grad_norm": 0.5177957496291485, "learning_rate": 1.2184914841849149e-05, "loss": 0.5526, "step": 26739 }, { "epoch": 0.7807071326384631, "grad_norm": 0.535772853342325, "learning_rate": 1.2183292781832927e-05, "loss": 0.6135, "step": 26740 }, { "epoch": 0.7807363288663105, "grad_norm": 0.527167651519648, "learning_rate": 1.2181670721816709e-05, "loss": 0.5757, "step": 26741 }, { "epoch": 0.7807655250941579, "grad_norm": 0.5201491012852687, "learning_rate": 1.2180048661800487e-05, "loss": 0.5811, "step": 26742 }, { "epoch": 0.7807947213220052, "grad_norm": 0.48554795382731203, "learning_rate": 1.2178426601784267e-05, "loss": 0.5373, "step": 26743 }, { "epoch": 0.7808239175498526, "grad_norm": 0.5756530015955914, "learning_rate": 1.2176804541768046e-05, "loss": 0.6542, "step": 26744 }, { "epoch": 0.7808531137777, "grad_norm": 0.5301862397417514, "learning_rate": 1.2175182481751824e-05, "loss": 0.6026, "step": 26745 }, { "epoch": 0.7808823100055473, "grad_norm": 0.5361239052910621, "learning_rate": 1.2173560421735606e-05, "loss": 0.5831, "step": 26746 }, { "epoch": 0.7809115062333947, "grad_norm": 0.5017404704206824, "learning_rate": 1.2171938361719384e-05, "loss": 0.5278, "step": 26747 }, { "epoch": 0.780940702461242, "grad_norm": 0.5468483524118497, "learning_rate": 1.2170316301703164e-05, "loss": 0.6285, "step": 26748 }, { "epoch": 0.7809698986890894, "grad_norm": 0.524102366917627, "learning_rate": 1.2168694241686943e-05, "loss": 0.585, "step": 26749 }, { "epoch": 0.7809990949169368, "grad_norm": 0.47929172250957636, "learning_rate": 1.2167072181670721e-05, "loss": 0.5007, "step": 26750 }, { "epoch": 0.7810282911447841, "grad_norm": 0.5920063992693392, "learning_rate": 1.2165450121654503e-05, "loss": 0.7408, "step": 26751 }, { "epoch": 0.7810574873726315, "grad_norm": 0.5182586997335845, "learning_rate": 1.2163828061638281e-05, "loss": 0.5569, "step": 26752 }, { "epoch": 0.7810866836004788, "grad_norm": 0.4913981887654073, "learning_rate": 1.216220600162206e-05, "loss": 0.5346, "step": 26753 }, { "epoch": 0.7811158798283262, "grad_norm": 0.4926003135434211, "learning_rate": 1.216058394160584e-05, "loss": 0.5224, "step": 26754 }, { "epoch": 0.7811450760561736, "grad_norm": 0.5353107859037135, "learning_rate": 1.215896188158962e-05, "loss": 0.622, "step": 26755 }, { "epoch": 0.7811742722840209, "grad_norm": 0.5173993480115523, "learning_rate": 1.21573398215734e-05, "loss": 0.606, "step": 26756 }, { "epoch": 0.7812034685118683, "grad_norm": 0.5375598343667554, "learning_rate": 1.2155717761557178e-05, "loss": 0.6089, "step": 26757 }, { "epoch": 0.7812326647397156, "grad_norm": 0.5155339061691548, "learning_rate": 1.2154095701540957e-05, "loss": 0.5907, "step": 26758 }, { "epoch": 0.781261860967563, "grad_norm": 0.45389889830809743, "learning_rate": 1.2152473641524737e-05, "loss": 0.4138, "step": 26759 }, { "epoch": 0.7812910571954104, "grad_norm": 0.5051027477131947, "learning_rate": 1.2150851581508517e-05, "loss": 0.5644, "step": 26760 }, { "epoch": 0.7813202534232577, "grad_norm": 0.4741767337165462, "learning_rate": 1.2149229521492297e-05, "loss": 0.5065, "step": 26761 }, { "epoch": 0.7813494496511051, "grad_norm": 0.5474703453056186, "learning_rate": 1.2147607461476075e-05, "loss": 0.6348, "step": 26762 }, { "epoch": 0.7813786458789524, "grad_norm": 0.5031315511473224, "learning_rate": 1.2145985401459853e-05, "loss": 0.5167, "step": 26763 }, { "epoch": 0.7814078421067998, "grad_norm": 0.541065410835154, "learning_rate": 1.2144363341443634e-05, "loss": 0.6078, "step": 26764 }, { "epoch": 0.7814370383346472, "grad_norm": 0.5047627515177344, "learning_rate": 1.2142741281427414e-05, "loss": 0.5469, "step": 26765 }, { "epoch": 0.7814662345624945, "grad_norm": 0.47702417364011473, "learning_rate": 1.2141119221411194e-05, "loss": 0.4993, "step": 26766 }, { "epoch": 0.7814954307903419, "grad_norm": 0.527901090413867, "learning_rate": 1.2139497161394972e-05, "loss": 0.6022, "step": 26767 }, { "epoch": 0.7815246270181893, "grad_norm": 0.4879270471385065, "learning_rate": 1.213787510137875e-05, "loss": 0.5215, "step": 26768 }, { "epoch": 0.7815538232460366, "grad_norm": 0.482023680496775, "learning_rate": 1.2136253041362532e-05, "loss": 0.5131, "step": 26769 }, { "epoch": 0.781583019473884, "grad_norm": 0.5155762251871453, "learning_rate": 1.213463098134631e-05, "loss": 0.556, "step": 26770 }, { "epoch": 0.7816122157017313, "grad_norm": 0.4916133886162993, "learning_rate": 1.213300892133009e-05, "loss": 0.5546, "step": 26771 }, { "epoch": 0.7816414119295787, "grad_norm": 0.4861002594456092, "learning_rate": 1.2131386861313869e-05, "loss": 0.5216, "step": 26772 }, { "epoch": 0.7816706081574261, "grad_norm": 0.5377458442958359, "learning_rate": 1.2129764801297647e-05, "loss": 0.6309, "step": 26773 }, { "epoch": 0.7816998043852734, "grad_norm": 0.5199071404257295, "learning_rate": 1.2128142741281429e-05, "loss": 0.5938, "step": 26774 }, { "epoch": 0.7817290006131208, "grad_norm": 0.5327938977321747, "learning_rate": 1.2126520681265208e-05, "loss": 0.6036, "step": 26775 }, { "epoch": 0.7817581968409681, "grad_norm": 0.48824589365949467, "learning_rate": 1.2124898621248988e-05, "loss": 0.5386, "step": 26776 }, { "epoch": 0.7817873930688155, "grad_norm": 0.5215629187227465, "learning_rate": 1.2123276561232766e-05, "loss": 0.5561, "step": 26777 }, { "epoch": 0.7818165892966629, "grad_norm": 0.5198161131033929, "learning_rate": 1.2121654501216544e-05, "loss": 0.5796, "step": 26778 }, { "epoch": 0.7818457855245102, "grad_norm": 0.5540500106194796, "learning_rate": 1.2120032441200326e-05, "loss": 0.6297, "step": 26779 }, { "epoch": 0.7818749817523576, "grad_norm": 0.5363727840786461, "learning_rate": 1.2118410381184104e-05, "loss": 0.6223, "step": 26780 }, { "epoch": 0.781904177980205, "grad_norm": 0.5059534274388151, "learning_rate": 1.2116788321167885e-05, "loss": 0.5531, "step": 26781 }, { "epoch": 0.7819333742080523, "grad_norm": 0.5330976668667294, "learning_rate": 1.2115166261151663e-05, "loss": 0.6196, "step": 26782 }, { "epoch": 0.7819625704358997, "grad_norm": 0.4936032253893733, "learning_rate": 1.2113544201135441e-05, "loss": 0.5148, "step": 26783 }, { "epoch": 0.781991766663747, "grad_norm": 0.5296193156855734, "learning_rate": 1.2111922141119223e-05, "loss": 0.6013, "step": 26784 }, { "epoch": 0.7820209628915944, "grad_norm": 0.48487406107478986, "learning_rate": 1.2110300081103001e-05, "loss": 0.5313, "step": 26785 }, { "epoch": 0.7820501591194418, "grad_norm": 0.5406626952316762, "learning_rate": 1.210867802108678e-05, "loss": 0.5177, "step": 26786 }, { "epoch": 0.7820793553472891, "grad_norm": 0.5369815321758366, "learning_rate": 1.210705596107056e-05, "loss": 0.6456, "step": 26787 }, { "epoch": 0.7821085515751365, "grad_norm": 0.5348361716045702, "learning_rate": 1.210543390105434e-05, "loss": 0.6352, "step": 26788 }, { "epoch": 0.7821377478029838, "grad_norm": 0.5366502633932332, "learning_rate": 1.210381184103812e-05, "loss": 0.6481, "step": 26789 }, { "epoch": 0.7821669440308312, "grad_norm": 0.5276639513104114, "learning_rate": 1.2102189781021898e-05, "loss": 0.5632, "step": 26790 }, { "epoch": 0.7821961402586786, "grad_norm": 0.4845162731075211, "learning_rate": 1.2100567721005677e-05, "loss": 0.5012, "step": 26791 }, { "epoch": 0.7822253364865259, "grad_norm": 0.4968044125626402, "learning_rate": 1.2098945660989457e-05, "loss": 0.5885, "step": 26792 }, { "epoch": 0.7822545327143733, "grad_norm": 0.5162584608483837, "learning_rate": 1.2097323600973237e-05, "loss": 0.6038, "step": 26793 }, { "epoch": 0.7822837289422206, "grad_norm": 0.49026018584135234, "learning_rate": 1.2095701540957017e-05, "loss": 0.5485, "step": 26794 }, { "epoch": 0.782312925170068, "grad_norm": 0.5820624100318075, "learning_rate": 1.2094079480940795e-05, "loss": 0.6874, "step": 26795 }, { "epoch": 0.7823421213979154, "grad_norm": 0.5234773819484795, "learning_rate": 1.2092457420924574e-05, "loss": 0.5829, "step": 26796 }, { "epoch": 0.7823713176257627, "grad_norm": 0.5243837259708664, "learning_rate": 1.2090835360908354e-05, "loss": 0.5604, "step": 26797 }, { "epoch": 0.7824005138536101, "grad_norm": 0.5374622098871803, "learning_rate": 1.2089213300892134e-05, "loss": 0.5775, "step": 26798 }, { "epoch": 0.7824297100814575, "grad_norm": 0.48331295634810423, "learning_rate": 1.2087591240875914e-05, "loss": 0.5073, "step": 26799 }, { "epoch": 0.7824589063093048, "grad_norm": 0.5232657041879359, "learning_rate": 1.2085969180859692e-05, "loss": 0.56, "step": 26800 }, { "epoch": 0.7824881025371522, "grad_norm": 0.5549789917207517, "learning_rate": 1.208434712084347e-05, "loss": 0.6552, "step": 26801 }, { "epoch": 0.7825172987649995, "grad_norm": 0.5064686693400985, "learning_rate": 1.2082725060827252e-05, "loss": 0.5752, "step": 26802 }, { "epoch": 0.7825464949928469, "grad_norm": 0.5301325106378477, "learning_rate": 1.208110300081103e-05, "loss": 0.5581, "step": 26803 }, { "epoch": 0.7825756912206943, "grad_norm": 0.521508598705397, "learning_rate": 1.2079480940794811e-05, "loss": 0.561, "step": 26804 }, { "epoch": 0.7826048874485416, "grad_norm": 0.5141868971935295, "learning_rate": 1.207785888077859e-05, "loss": 0.5964, "step": 26805 }, { "epoch": 0.782634083676389, "grad_norm": 0.555230221080729, "learning_rate": 1.2076236820762368e-05, "loss": 0.6282, "step": 26806 }, { "epoch": 0.7826632799042363, "grad_norm": 0.48357625353013833, "learning_rate": 1.207461476074615e-05, "loss": 0.5038, "step": 26807 }, { "epoch": 0.7826924761320837, "grad_norm": 0.4914349166794172, "learning_rate": 1.2072992700729928e-05, "loss": 0.5171, "step": 26808 }, { "epoch": 0.7827216723599311, "grad_norm": 0.5085721592483595, "learning_rate": 1.2071370640713708e-05, "loss": 0.562, "step": 26809 }, { "epoch": 0.7827508685877784, "grad_norm": 0.5385278208904267, "learning_rate": 1.2069748580697486e-05, "loss": 0.6216, "step": 26810 }, { "epoch": 0.7827800648156258, "grad_norm": 0.5426695856063255, "learning_rate": 1.2068126520681265e-05, "loss": 0.6045, "step": 26811 }, { "epoch": 0.7828092610434731, "grad_norm": 0.5375522305329756, "learning_rate": 1.2066504460665046e-05, "loss": 0.6335, "step": 26812 }, { "epoch": 0.7828384572713205, "grad_norm": 0.5437768525197241, "learning_rate": 1.2064882400648825e-05, "loss": 0.616, "step": 26813 }, { "epoch": 0.7828676534991679, "grad_norm": 0.53881213310649, "learning_rate": 1.2063260340632603e-05, "loss": 0.6203, "step": 26814 }, { "epoch": 0.7828968497270152, "grad_norm": 0.5415236769401762, "learning_rate": 1.2061638280616383e-05, "loss": 0.6046, "step": 26815 }, { "epoch": 0.7829260459548626, "grad_norm": 0.584685545523622, "learning_rate": 1.2060016220600162e-05, "loss": 0.6837, "step": 26816 }, { "epoch": 0.78295524218271, "grad_norm": 0.5765278524363943, "learning_rate": 1.2058394160583943e-05, "loss": 0.6517, "step": 26817 }, { "epoch": 0.7829844384105573, "grad_norm": 0.5499128517955508, "learning_rate": 1.2056772100567722e-05, "loss": 0.5854, "step": 26818 }, { "epoch": 0.7830136346384047, "grad_norm": 0.5123348164166207, "learning_rate": 1.20551500405515e-05, "loss": 0.5586, "step": 26819 }, { "epoch": 0.783042830866252, "grad_norm": 0.5517255016306223, "learning_rate": 1.205352798053528e-05, "loss": 0.6534, "step": 26820 }, { "epoch": 0.7830720270940994, "grad_norm": 0.4976538248343822, "learning_rate": 1.205190592051906e-05, "loss": 0.5447, "step": 26821 }, { "epoch": 0.7831012233219468, "grad_norm": 0.4974741113017916, "learning_rate": 1.205028386050284e-05, "loss": 0.5484, "step": 26822 }, { "epoch": 0.7831304195497941, "grad_norm": 0.5147164405273456, "learning_rate": 1.2048661800486619e-05, "loss": 0.5628, "step": 26823 }, { "epoch": 0.7831596157776415, "grad_norm": 0.47808710242823405, "learning_rate": 1.2047039740470397e-05, "loss": 0.5364, "step": 26824 }, { "epoch": 0.7831888120054888, "grad_norm": 0.539495853582079, "learning_rate": 1.2045417680454177e-05, "loss": 0.6304, "step": 26825 }, { "epoch": 0.7832180082333362, "grad_norm": 0.5260446912079848, "learning_rate": 1.2043795620437957e-05, "loss": 0.5743, "step": 26826 }, { "epoch": 0.7832472044611836, "grad_norm": 0.4939284769540415, "learning_rate": 1.2042173560421737e-05, "loss": 0.5323, "step": 26827 }, { "epoch": 0.7832764006890309, "grad_norm": 0.5144419257511201, "learning_rate": 1.2040551500405516e-05, "loss": 0.5405, "step": 26828 }, { "epoch": 0.7833055969168783, "grad_norm": 0.5791791258510541, "learning_rate": 1.2038929440389294e-05, "loss": 0.7015, "step": 26829 }, { "epoch": 0.7833347931447256, "grad_norm": 0.520449393275681, "learning_rate": 1.2037307380373074e-05, "loss": 0.5632, "step": 26830 }, { "epoch": 0.783363989372573, "grad_norm": 0.588474872065898, "learning_rate": 1.2035685320356854e-05, "loss": 0.6187, "step": 26831 }, { "epoch": 0.7833931856004204, "grad_norm": 0.5521161755361146, "learning_rate": 1.2034063260340634e-05, "loss": 0.5653, "step": 26832 }, { "epoch": 0.7834223818282677, "grad_norm": 0.5518634714753458, "learning_rate": 1.2032441200324412e-05, "loss": 0.6721, "step": 26833 }, { "epoch": 0.7834515780561152, "grad_norm": 0.5114066747548786, "learning_rate": 1.2030819140308191e-05, "loss": 0.5601, "step": 26834 }, { "epoch": 0.7834807742839626, "grad_norm": 0.5559972709266842, "learning_rate": 1.2029197080291973e-05, "loss": 0.6317, "step": 26835 }, { "epoch": 0.7835099705118099, "grad_norm": 0.4982622770343277, "learning_rate": 1.2027575020275751e-05, "loss": 0.54, "step": 26836 }, { "epoch": 0.7835391667396573, "grad_norm": 0.551889420798276, "learning_rate": 1.2025952960259531e-05, "loss": 0.6346, "step": 26837 }, { "epoch": 0.7835683629675047, "grad_norm": 0.5357160387489579, "learning_rate": 1.202433090024331e-05, "loss": 0.5749, "step": 26838 }, { "epoch": 0.783597559195352, "grad_norm": 0.5261091648229724, "learning_rate": 1.2022708840227088e-05, "loss": 0.5713, "step": 26839 }, { "epoch": 0.7836267554231994, "grad_norm": 0.5459088977127188, "learning_rate": 1.202108678021087e-05, "loss": 0.6379, "step": 26840 }, { "epoch": 0.7836559516510467, "grad_norm": 0.5569791272176735, "learning_rate": 1.2019464720194648e-05, "loss": 0.6501, "step": 26841 }, { "epoch": 0.7836851478788941, "grad_norm": 0.5315736897217958, "learning_rate": 1.2017842660178428e-05, "loss": 0.6239, "step": 26842 }, { "epoch": 0.7837143441067415, "grad_norm": 0.5392237739030776, "learning_rate": 1.2016220600162206e-05, "loss": 0.6214, "step": 26843 }, { "epoch": 0.7837435403345888, "grad_norm": 0.4891519522318994, "learning_rate": 1.2014598540145985e-05, "loss": 0.5297, "step": 26844 }, { "epoch": 0.7837727365624362, "grad_norm": 0.5341118980267088, "learning_rate": 1.2012976480129767e-05, "loss": 0.6015, "step": 26845 }, { "epoch": 0.7838019327902835, "grad_norm": 0.5388260981581551, "learning_rate": 1.2011354420113545e-05, "loss": 0.6163, "step": 26846 }, { "epoch": 0.7838311290181309, "grad_norm": 0.5603179915673063, "learning_rate": 1.2009732360097323e-05, "loss": 0.6037, "step": 26847 }, { "epoch": 0.7838603252459783, "grad_norm": 0.5298486014856192, "learning_rate": 1.2008110300081103e-05, "loss": 0.5874, "step": 26848 }, { "epoch": 0.7838895214738256, "grad_norm": 0.5680658097765156, "learning_rate": 1.2006488240064882e-05, "loss": 0.6627, "step": 26849 }, { "epoch": 0.783918717701673, "grad_norm": 0.5355904097629742, "learning_rate": 1.2004866180048663e-05, "loss": 0.6173, "step": 26850 }, { "epoch": 0.7839479139295203, "grad_norm": 0.5003619595551254, "learning_rate": 1.2003244120032442e-05, "loss": 0.5372, "step": 26851 }, { "epoch": 0.7839771101573677, "grad_norm": 0.5107338980826334, "learning_rate": 1.200162206001622e-05, "loss": 0.5502, "step": 26852 }, { "epoch": 0.7840063063852151, "grad_norm": 0.5303023233914624, "learning_rate": 1.2e-05, "loss": 0.5585, "step": 26853 }, { "epoch": 0.7840355026130624, "grad_norm": 0.544838316219257, "learning_rate": 1.199837793998378e-05, "loss": 0.6107, "step": 26854 }, { "epoch": 0.7840646988409098, "grad_norm": 0.5181402503997758, "learning_rate": 1.199675587996756e-05, "loss": 0.5477, "step": 26855 }, { "epoch": 0.7840938950687572, "grad_norm": 0.4954135323484899, "learning_rate": 1.1995133819951339e-05, "loss": 0.5495, "step": 26856 }, { "epoch": 0.7841230912966045, "grad_norm": 0.510178251927871, "learning_rate": 1.1993511759935117e-05, "loss": 0.5566, "step": 26857 }, { "epoch": 0.7841522875244519, "grad_norm": 0.533112074512289, "learning_rate": 1.1991889699918897e-05, "loss": 0.597, "step": 26858 }, { "epoch": 0.7841814837522992, "grad_norm": 0.5399087230060625, "learning_rate": 1.1990267639902677e-05, "loss": 0.6352, "step": 26859 }, { "epoch": 0.7842106799801466, "grad_norm": 0.5334885268183722, "learning_rate": 1.1988645579886457e-05, "loss": 0.6103, "step": 26860 }, { "epoch": 0.784239876207994, "grad_norm": 0.5450630179144398, "learning_rate": 1.1987023519870236e-05, "loss": 0.6574, "step": 26861 }, { "epoch": 0.7842690724358413, "grad_norm": 0.5422851237088164, "learning_rate": 1.1985401459854014e-05, "loss": 0.629, "step": 26862 }, { "epoch": 0.7842982686636887, "grad_norm": 0.5346817332401131, "learning_rate": 1.1983779399837794e-05, "loss": 0.5417, "step": 26863 }, { "epoch": 0.784327464891536, "grad_norm": 0.5435215423045885, "learning_rate": 1.1982157339821574e-05, "loss": 0.6532, "step": 26864 }, { "epoch": 0.7843566611193834, "grad_norm": 0.6129287590024576, "learning_rate": 1.1980535279805354e-05, "loss": 0.687, "step": 26865 }, { "epoch": 0.7843858573472308, "grad_norm": 0.5211308747511992, "learning_rate": 1.1978913219789133e-05, "loss": 0.575, "step": 26866 }, { "epoch": 0.7844150535750781, "grad_norm": 0.47955747019013034, "learning_rate": 1.1977291159772911e-05, "loss": 0.5067, "step": 26867 }, { "epoch": 0.7844442498029255, "grad_norm": 0.5225454001202071, "learning_rate": 1.1975669099756691e-05, "loss": 0.5447, "step": 26868 }, { "epoch": 0.7844734460307728, "grad_norm": 0.510041586234668, "learning_rate": 1.1974047039740471e-05, "loss": 0.5498, "step": 26869 }, { "epoch": 0.7845026422586202, "grad_norm": 0.5378649086276178, "learning_rate": 1.1972424979724251e-05, "loss": 0.5885, "step": 26870 }, { "epoch": 0.7845318384864676, "grad_norm": 0.5616481077160258, "learning_rate": 1.197080291970803e-05, "loss": 0.6045, "step": 26871 }, { "epoch": 0.7845610347143149, "grad_norm": 0.49001942822326755, "learning_rate": 1.1969180859691808e-05, "loss": 0.5118, "step": 26872 }, { "epoch": 0.7845902309421623, "grad_norm": 0.5051712791290807, "learning_rate": 1.196755879967559e-05, "loss": 0.5108, "step": 26873 }, { "epoch": 0.7846194271700097, "grad_norm": 0.49876106692611955, "learning_rate": 1.1965936739659368e-05, "loss": 0.5849, "step": 26874 }, { "epoch": 0.784648623397857, "grad_norm": 0.4952368375938146, "learning_rate": 1.1964314679643147e-05, "loss": 0.5436, "step": 26875 }, { "epoch": 0.7846778196257044, "grad_norm": 0.5318533937228731, "learning_rate": 1.1962692619626927e-05, "loss": 0.5975, "step": 26876 }, { "epoch": 0.7847070158535517, "grad_norm": 0.5060363491428014, "learning_rate": 1.1961070559610705e-05, "loss": 0.5703, "step": 26877 }, { "epoch": 0.7847362120813991, "grad_norm": 0.5049856353842451, "learning_rate": 1.1959448499594487e-05, "loss": 0.5763, "step": 26878 }, { "epoch": 0.7847654083092465, "grad_norm": 0.5373281247952254, "learning_rate": 1.1957826439578265e-05, "loss": 0.6177, "step": 26879 }, { "epoch": 0.7847946045370938, "grad_norm": 0.508569967185101, "learning_rate": 1.1956204379562044e-05, "loss": 0.5524, "step": 26880 }, { "epoch": 0.7848238007649412, "grad_norm": 0.5534914792581915, "learning_rate": 1.1954582319545824e-05, "loss": 0.647, "step": 26881 }, { "epoch": 0.7848529969927885, "grad_norm": 0.4865281824606447, "learning_rate": 1.1952960259529602e-05, "loss": 0.5326, "step": 26882 }, { "epoch": 0.7848821932206359, "grad_norm": 0.5189968248903398, "learning_rate": 1.1951338199513384e-05, "loss": 0.5363, "step": 26883 }, { "epoch": 0.7849113894484833, "grad_norm": 0.5205411722932224, "learning_rate": 1.1949716139497162e-05, "loss": 0.5898, "step": 26884 }, { "epoch": 0.7849405856763306, "grad_norm": 0.5582124825539657, "learning_rate": 1.194809407948094e-05, "loss": 0.6183, "step": 26885 }, { "epoch": 0.784969781904178, "grad_norm": 0.5213768750725241, "learning_rate": 1.194647201946472e-05, "loss": 0.5969, "step": 26886 }, { "epoch": 0.7849989781320253, "grad_norm": 0.5126689239967971, "learning_rate": 1.19448499594485e-05, "loss": 0.5667, "step": 26887 }, { "epoch": 0.7850281743598727, "grad_norm": 0.49899578238234316, "learning_rate": 1.194322789943228e-05, "loss": 0.5659, "step": 26888 }, { "epoch": 0.7850573705877201, "grad_norm": 0.5135717138418829, "learning_rate": 1.1941605839416059e-05, "loss": 0.5814, "step": 26889 }, { "epoch": 0.7850865668155674, "grad_norm": 0.5177087522064193, "learning_rate": 1.1939983779399837e-05, "loss": 0.5875, "step": 26890 }, { "epoch": 0.7851157630434148, "grad_norm": 0.5493134740147602, "learning_rate": 1.1938361719383617e-05, "loss": 0.6333, "step": 26891 }, { "epoch": 0.7851449592712622, "grad_norm": 0.5074727683188822, "learning_rate": 1.1936739659367398e-05, "loss": 0.5539, "step": 26892 }, { "epoch": 0.7851741554991095, "grad_norm": 0.5106054179106871, "learning_rate": 1.1935117599351178e-05, "loss": 0.5502, "step": 26893 }, { "epoch": 0.7852033517269569, "grad_norm": 0.5575432829662968, "learning_rate": 1.1933495539334956e-05, "loss": 0.6722, "step": 26894 }, { "epoch": 0.7852325479548042, "grad_norm": 0.5453781179409488, "learning_rate": 1.1931873479318734e-05, "loss": 0.592, "step": 26895 }, { "epoch": 0.7852617441826516, "grad_norm": 0.5130533117056375, "learning_rate": 1.1930251419302514e-05, "loss": 0.5295, "step": 26896 }, { "epoch": 0.785290940410499, "grad_norm": 0.5105179636106025, "learning_rate": 1.1928629359286294e-05, "loss": 0.5358, "step": 26897 }, { "epoch": 0.7853201366383463, "grad_norm": 0.506204469917539, "learning_rate": 1.1927007299270075e-05, "loss": 0.5466, "step": 26898 }, { "epoch": 0.7853493328661937, "grad_norm": 0.5529904314161588, "learning_rate": 1.1925385239253853e-05, "loss": 0.6446, "step": 26899 }, { "epoch": 0.785378529094041, "grad_norm": 0.4936802060767515, "learning_rate": 1.1923763179237631e-05, "loss": 0.5683, "step": 26900 }, { "epoch": 0.7854077253218884, "grad_norm": 0.5729997766800743, "learning_rate": 1.1922141119221411e-05, "loss": 0.6709, "step": 26901 }, { "epoch": 0.7854369215497358, "grad_norm": 0.5086720773290107, "learning_rate": 1.1920519059205191e-05, "loss": 0.5384, "step": 26902 }, { "epoch": 0.7854661177775831, "grad_norm": 0.5210208072074748, "learning_rate": 1.1918896999188972e-05, "loss": 0.5692, "step": 26903 }, { "epoch": 0.7854953140054305, "grad_norm": 0.5545522708737923, "learning_rate": 1.191727493917275e-05, "loss": 0.6595, "step": 26904 }, { "epoch": 0.7855245102332779, "grad_norm": 0.5528024580576439, "learning_rate": 1.1915652879156528e-05, "loss": 0.6183, "step": 26905 }, { "epoch": 0.7855537064611252, "grad_norm": 0.4928808715845826, "learning_rate": 1.191403081914031e-05, "loss": 0.5767, "step": 26906 }, { "epoch": 0.7855829026889726, "grad_norm": 0.5010804394985644, "learning_rate": 1.1912408759124088e-05, "loss": 0.5535, "step": 26907 }, { "epoch": 0.7856120989168199, "grad_norm": 0.5503284589854051, "learning_rate": 1.1910786699107867e-05, "loss": 0.6499, "step": 26908 }, { "epoch": 0.7856412951446673, "grad_norm": 0.5515611958016519, "learning_rate": 1.1909164639091647e-05, "loss": 0.6143, "step": 26909 }, { "epoch": 0.7856704913725147, "grad_norm": 0.5326151071156203, "learning_rate": 1.1907542579075425e-05, "loss": 0.6002, "step": 26910 }, { "epoch": 0.785699687600362, "grad_norm": 0.517197054964217, "learning_rate": 1.1905920519059207e-05, "loss": 0.587, "step": 26911 }, { "epoch": 0.7857288838282094, "grad_norm": 0.5124277469531185, "learning_rate": 1.1904298459042985e-05, "loss": 0.59, "step": 26912 }, { "epoch": 0.7857580800560567, "grad_norm": 0.5174753628240425, "learning_rate": 1.1902676399026764e-05, "loss": 0.5554, "step": 26913 }, { "epoch": 0.7857872762839041, "grad_norm": 0.5284703245447328, "learning_rate": 1.1901054339010544e-05, "loss": 0.6143, "step": 26914 }, { "epoch": 0.7858164725117515, "grad_norm": 0.5529486803019089, "learning_rate": 1.1899432278994322e-05, "loss": 0.6172, "step": 26915 }, { "epoch": 0.7858456687395988, "grad_norm": 0.5591891247493874, "learning_rate": 1.1897810218978104e-05, "loss": 0.6157, "step": 26916 }, { "epoch": 0.7858748649674462, "grad_norm": 0.5095933510589519, "learning_rate": 1.1896188158961882e-05, "loss": 0.5723, "step": 26917 }, { "epoch": 0.7859040611952935, "grad_norm": 0.5240351215046801, "learning_rate": 1.189456609894566e-05, "loss": 0.5961, "step": 26918 }, { "epoch": 0.7859332574231409, "grad_norm": 0.5758114592775845, "learning_rate": 1.189294403892944e-05, "loss": 0.7271, "step": 26919 }, { "epoch": 0.7859624536509883, "grad_norm": 0.521178645292273, "learning_rate": 1.189132197891322e-05, "loss": 0.5867, "step": 26920 }, { "epoch": 0.7859916498788356, "grad_norm": 0.5094438054011767, "learning_rate": 1.1889699918897001e-05, "loss": 0.5393, "step": 26921 }, { "epoch": 0.786020846106683, "grad_norm": 0.5097374762768858, "learning_rate": 1.188807785888078e-05, "loss": 0.5114, "step": 26922 }, { "epoch": 0.7860500423345304, "grad_norm": 0.5850192752128534, "learning_rate": 1.1886455798864558e-05, "loss": 0.6848, "step": 26923 }, { "epoch": 0.7860792385623777, "grad_norm": 0.5225142729632002, "learning_rate": 1.1884833738848338e-05, "loss": 0.6147, "step": 26924 }, { "epoch": 0.7861084347902251, "grad_norm": 0.5440438641266049, "learning_rate": 1.1883211678832118e-05, "loss": 0.6115, "step": 26925 }, { "epoch": 0.7861376310180724, "grad_norm": 0.5157116440399335, "learning_rate": 1.1881589618815898e-05, "loss": 0.5801, "step": 26926 }, { "epoch": 0.7861668272459198, "grad_norm": 0.4996122695568855, "learning_rate": 1.1879967558799676e-05, "loss": 0.5415, "step": 26927 }, { "epoch": 0.7861960234737672, "grad_norm": 0.5032611234779549, "learning_rate": 1.1878345498783455e-05, "loss": 0.5589, "step": 26928 }, { "epoch": 0.7862252197016145, "grad_norm": 0.5023503214207666, "learning_rate": 1.1876723438767235e-05, "loss": 0.542, "step": 26929 }, { "epoch": 0.7862544159294619, "grad_norm": 0.5192455814464876, "learning_rate": 1.1875101378751015e-05, "loss": 0.5824, "step": 26930 }, { "epoch": 0.7862836121573092, "grad_norm": 0.49455325548661844, "learning_rate": 1.1873479318734795e-05, "loss": 0.5149, "step": 26931 }, { "epoch": 0.7863128083851566, "grad_norm": 0.49794206730902835, "learning_rate": 1.1871857258718573e-05, "loss": 0.5551, "step": 26932 }, { "epoch": 0.786342004613004, "grad_norm": 0.4972761974204094, "learning_rate": 1.1870235198702352e-05, "loss": 0.5455, "step": 26933 }, { "epoch": 0.7863712008408513, "grad_norm": 0.5426214668570927, "learning_rate": 1.1868613138686132e-05, "loss": 0.6098, "step": 26934 }, { "epoch": 0.7864003970686987, "grad_norm": 0.5314278853713171, "learning_rate": 1.1866991078669912e-05, "loss": 0.62, "step": 26935 }, { "epoch": 0.786429593296546, "grad_norm": 0.4620463405831771, "learning_rate": 1.186536901865369e-05, "loss": 0.4725, "step": 26936 }, { "epoch": 0.7864587895243934, "grad_norm": 0.5326989441529425, "learning_rate": 1.186374695863747e-05, "loss": 0.6055, "step": 26937 }, { "epoch": 0.7864879857522408, "grad_norm": 0.514413168338868, "learning_rate": 1.1862124898621248e-05, "loss": 0.5601, "step": 26938 }, { "epoch": 0.7865171819800881, "grad_norm": 0.534986596947759, "learning_rate": 1.186050283860503e-05, "loss": 0.5912, "step": 26939 }, { "epoch": 0.7865463782079355, "grad_norm": 0.5185356139361703, "learning_rate": 1.1858880778588809e-05, "loss": 0.5352, "step": 26940 }, { "epoch": 0.7865755744357829, "grad_norm": 0.49000262278259304, "learning_rate": 1.1857258718572587e-05, "loss": 0.5245, "step": 26941 }, { "epoch": 0.7866047706636302, "grad_norm": 0.5028255638087483, "learning_rate": 1.1855636658556367e-05, "loss": 0.5678, "step": 26942 }, { "epoch": 0.7866339668914776, "grad_norm": 0.5435453506238513, "learning_rate": 1.1854014598540145e-05, "loss": 0.6418, "step": 26943 }, { "epoch": 0.7866631631193249, "grad_norm": 0.5245590321762063, "learning_rate": 1.1852392538523927e-05, "loss": 0.5912, "step": 26944 }, { "epoch": 0.7866923593471723, "grad_norm": 0.502021050730173, "learning_rate": 1.1850770478507706e-05, "loss": 0.5398, "step": 26945 }, { "epoch": 0.7867215555750197, "grad_norm": 0.5676441033836184, "learning_rate": 1.1849148418491484e-05, "loss": 0.6724, "step": 26946 }, { "epoch": 0.786750751802867, "grad_norm": 0.5058894286423609, "learning_rate": 1.1847526358475264e-05, "loss": 0.5289, "step": 26947 }, { "epoch": 0.7867799480307144, "grad_norm": 0.5119395919868005, "learning_rate": 1.1845904298459042e-05, "loss": 0.519, "step": 26948 }, { "epoch": 0.7868091442585617, "grad_norm": 0.5580194898210673, "learning_rate": 1.1844282238442824e-05, "loss": 0.597, "step": 26949 }, { "epoch": 0.7868383404864091, "grad_norm": 0.506845772115562, "learning_rate": 1.1842660178426603e-05, "loss": 0.5767, "step": 26950 }, { "epoch": 0.7868675367142565, "grad_norm": 0.5343340706203948, "learning_rate": 1.1841038118410381e-05, "loss": 0.6084, "step": 26951 }, { "epoch": 0.7868967329421038, "grad_norm": 0.5446148344973468, "learning_rate": 1.1839416058394161e-05, "loss": 0.6324, "step": 26952 }, { "epoch": 0.7869259291699512, "grad_norm": 0.5281686495385978, "learning_rate": 1.1837793998377941e-05, "loss": 0.61, "step": 26953 }, { "epoch": 0.7869551253977985, "grad_norm": 0.515392996642118, "learning_rate": 1.1836171938361721e-05, "loss": 0.5985, "step": 26954 }, { "epoch": 0.786984321625646, "grad_norm": 0.49709829618397794, "learning_rate": 1.18345498783455e-05, "loss": 0.5216, "step": 26955 }, { "epoch": 0.7870135178534934, "grad_norm": 0.5294025914564834, "learning_rate": 1.1832927818329278e-05, "loss": 0.5466, "step": 26956 }, { "epoch": 0.7870427140813407, "grad_norm": 0.5571170203300941, "learning_rate": 1.1831305758313058e-05, "loss": 0.642, "step": 26957 }, { "epoch": 0.7870719103091881, "grad_norm": 0.5393155481955952, "learning_rate": 1.1829683698296838e-05, "loss": 0.6205, "step": 26958 }, { "epoch": 0.7871011065370355, "grad_norm": 0.48484023206291954, "learning_rate": 1.1828061638280618e-05, "loss": 0.5079, "step": 26959 }, { "epoch": 0.7871303027648828, "grad_norm": 0.5063171799972357, "learning_rate": 1.1826439578264396e-05, "loss": 0.5546, "step": 26960 }, { "epoch": 0.7871594989927302, "grad_norm": 0.5423762736884178, "learning_rate": 1.1824817518248175e-05, "loss": 0.626, "step": 26961 }, { "epoch": 0.7871886952205776, "grad_norm": 0.4942156757508021, "learning_rate": 1.1823195458231955e-05, "loss": 0.5093, "step": 26962 }, { "epoch": 0.7872178914484249, "grad_norm": 0.5161244955244744, "learning_rate": 1.1821573398215735e-05, "loss": 0.5497, "step": 26963 }, { "epoch": 0.7872470876762723, "grad_norm": 0.4778220069234747, "learning_rate": 1.1819951338199515e-05, "loss": 0.5039, "step": 26964 }, { "epoch": 0.7872762839041196, "grad_norm": 0.4982098253865026, "learning_rate": 1.1818329278183293e-05, "loss": 0.5481, "step": 26965 }, { "epoch": 0.787305480131967, "grad_norm": 0.5754641028662654, "learning_rate": 1.1816707218167072e-05, "loss": 0.6875, "step": 26966 }, { "epoch": 0.7873346763598144, "grad_norm": 0.5553837131174264, "learning_rate": 1.1815085158150852e-05, "loss": 0.6141, "step": 26967 }, { "epoch": 0.7873638725876617, "grad_norm": 0.5405148622428804, "learning_rate": 1.1813463098134632e-05, "loss": 0.6421, "step": 26968 }, { "epoch": 0.7873930688155091, "grad_norm": 0.5088617852595655, "learning_rate": 1.181184103811841e-05, "loss": 0.5344, "step": 26969 }, { "epoch": 0.7874222650433564, "grad_norm": 0.5800366637861837, "learning_rate": 1.181021897810219e-05, "loss": 0.6571, "step": 26970 }, { "epoch": 0.7874514612712038, "grad_norm": 0.5258832226568345, "learning_rate": 1.1808596918085969e-05, "loss": 0.5979, "step": 26971 }, { "epoch": 0.7874806574990512, "grad_norm": 0.515780225309035, "learning_rate": 1.180697485806975e-05, "loss": 0.5659, "step": 26972 }, { "epoch": 0.7875098537268985, "grad_norm": 0.49879300829750817, "learning_rate": 1.1805352798053529e-05, "loss": 0.5711, "step": 26973 }, { "epoch": 0.7875390499547459, "grad_norm": 0.5015906774601733, "learning_rate": 1.1803730738037307e-05, "loss": 0.5384, "step": 26974 }, { "epoch": 0.7875682461825932, "grad_norm": 0.5167561420230586, "learning_rate": 1.1802108678021087e-05, "loss": 0.5951, "step": 26975 }, { "epoch": 0.7875974424104406, "grad_norm": 0.546759291774294, "learning_rate": 1.1800486618004866e-05, "loss": 0.639, "step": 26976 }, { "epoch": 0.787626638638288, "grad_norm": 0.5722401152702441, "learning_rate": 1.1798864557988647e-05, "loss": 0.6528, "step": 26977 }, { "epoch": 0.7876558348661353, "grad_norm": 0.5263911238992652, "learning_rate": 1.1797242497972426e-05, "loss": 0.6244, "step": 26978 }, { "epoch": 0.7876850310939827, "grad_norm": 0.49032786407913914, "learning_rate": 1.1795620437956204e-05, "loss": 0.5244, "step": 26979 }, { "epoch": 0.78771422732183, "grad_norm": 0.5134396429958776, "learning_rate": 1.1793998377939984e-05, "loss": 0.5719, "step": 26980 }, { "epoch": 0.7877434235496774, "grad_norm": 0.8292327662830752, "learning_rate": 1.1792376317923763e-05, "loss": 0.658, "step": 26981 }, { "epoch": 0.7877726197775248, "grad_norm": 0.5119154635321493, "learning_rate": 1.1790754257907544e-05, "loss": 0.5616, "step": 26982 }, { "epoch": 0.7878018160053721, "grad_norm": 0.586339487074122, "learning_rate": 1.1789132197891323e-05, "loss": 0.7547, "step": 26983 }, { "epoch": 0.7878310122332195, "grad_norm": 0.5375151351922719, "learning_rate": 1.1787510137875101e-05, "loss": 0.5946, "step": 26984 }, { "epoch": 0.7878602084610669, "grad_norm": 0.5249264943299823, "learning_rate": 1.1785888077858881e-05, "loss": 0.5851, "step": 26985 }, { "epoch": 0.7878894046889142, "grad_norm": 0.5582372050578887, "learning_rate": 1.1784266017842661e-05, "loss": 0.662, "step": 26986 }, { "epoch": 0.7879186009167616, "grad_norm": 0.49727440110228927, "learning_rate": 1.1782643957826441e-05, "loss": 0.5442, "step": 26987 }, { "epoch": 0.7879477971446089, "grad_norm": 0.5804183409514072, "learning_rate": 1.178102189781022e-05, "loss": 0.7001, "step": 26988 }, { "epoch": 0.7879769933724563, "grad_norm": 0.4968383211696517, "learning_rate": 1.1779399837793998e-05, "loss": 0.5745, "step": 26989 }, { "epoch": 0.7880061896003037, "grad_norm": 0.5321610547666494, "learning_rate": 1.1777777777777778e-05, "loss": 0.6185, "step": 26990 }, { "epoch": 0.788035385828151, "grad_norm": 0.5197574082780096, "learning_rate": 1.1776155717761558e-05, "loss": 0.5729, "step": 26991 }, { "epoch": 0.7880645820559984, "grad_norm": 0.5417806265440771, "learning_rate": 1.1774533657745338e-05, "loss": 0.63, "step": 26992 }, { "epoch": 0.7880937782838457, "grad_norm": 0.5052438306715936, "learning_rate": 1.1772911597729117e-05, "loss": 0.5392, "step": 26993 }, { "epoch": 0.7881229745116931, "grad_norm": 0.49577502013979796, "learning_rate": 1.1771289537712895e-05, "loss": 0.5354, "step": 26994 }, { "epoch": 0.7881521707395405, "grad_norm": 0.49461823681797146, "learning_rate": 1.1769667477696675e-05, "loss": 0.5476, "step": 26995 }, { "epoch": 0.7881813669673878, "grad_norm": 0.5331913363062278, "learning_rate": 1.1768045417680455e-05, "loss": 0.6056, "step": 26996 }, { "epoch": 0.7882105631952352, "grad_norm": 0.49302890668672833, "learning_rate": 1.1766423357664234e-05, "loss": 0.498, "step": 26997 }, { "epoch": 0.7882397594230826, "grad_norm": 0.507385471643951, "learning_rate": 1.1764801297648014e-05, "loss": 0.5261, "step": 26998 }, { "epoch": 0.7882689556509299, "grad_norm": 0.5437836283200514, "learning_rate": 1.1763179237631792e-05, "loss": 0.6028, "step": 26999 }, { "epoch": 0.7882981518787773, "grad_norm": 0.49011859488206216, "learning_rate": 1.1761557177615572e-05, "loss": 0.5335, "step": 27000 }, { "epoch": 0.7883273481066246, "grad_norm": 0.5169386090366459, "learning_rate": 1.1759935117599352e-05, "loss": 0.605, "step": 27001 }, { "epoch": 0.788356544334472, "grad_norm": 0.5221252801124057, "learning_rate": 1.175831305758313e-05, "loss": 0.5698, "step": 27002 }, { "epoch": 0.7883857405623194, "grad_norm": 0.5830620975973098, "learning_rate": 1.175669099756691e-05, "loss": 0.6762, "step": 27003 }, { "epoch": 0.7884149367901667, "grad_norm": 0.5048679090357511, "learning_rate": 1.1755068937550689e-05, "loss": 0.563, "step": 27004 }, { "epoch": 0.7884441330180141, "grad_norm": 0.5003654927496349, "learning_rate": 1.175344687753447e-05, "loss": 0.5337, "step": 27005 }, { "epoch": 0.7884733292458614, "grad_norm": 0.563742628285757, "learning_rate": 1.1751824817518249e-05, "loss": 0.6924, "step": 27006 }, { "epoch": 0.7885025254737088, "grad_norm": 0.5152548328562273, "learning_rate": 1.1750202757502027e-05, "loss": 0.5896, "step": 27007 }, { "epoch": 0.7885317217015562, "grad_norm": 0.5112165081833251, "learning_rate": 1.1748580697485808e-05, "loss": 0.5436, "step": 27008 }, { "epoch": 0.7885609179294035, "grad_norm": 0.5431835321625758, "learning_rate": 1.1746958637469586e-05, "loss": 0.5947, "step": 27009 }, { "epoch": 0.7885901141572509, "grad_norm": 0.5155532235165581, "learning_rate": 1.1745336577453368e-05, "loss": 0.5386, "step": 27010 }, { "epoch": 0.7886193103850982, "grad_norm": 0.507239359100849, "learning_rate": 1.1743714517437146e-05, "loss": 0.5472, "step": 27011 }, { "epoch": 0.7886485066129456, "grad_norm": 0.561577349785679, "learning_rate": 1.1742092457420924e-05, "loss": 0.6562, "step": 27012 }, { "epoch": 0.788677702840793, "grad_norm": 0.5278190821905447, "learning_rate": 1.1740470397404704e-05, "loss": 0.59, "step": 27013 }, { "epoch": 0.7887068990686403, "grad_norm": 0.6162160952814532, "learning_rate": 1.1738848337388483e-05, "loss": 0.652, "step": 27014 }, { "epoch": 0.7887360952964877, "grad_norm": 0.5641106802242295, "learning_rate": 1.1737226277372265e-05, "loss": 0.6632, "step": 27015 }, { "epoch": 0.788765291524335, "grad_norm": 0.5023136084363702, "learning_rate": 1.1735604217356043e-05, "loss": 0.5562, "step": 27016 }, { "epoch": 0.7887944877521824, "grad_norm": 0.49707084576805843, "learning_rate": 1.1733982157339821e-05, "loss": 0.5368, "step": 27017 }, { "epoch": 0.7888236839800298, "grad_norm": 0.5026376669926025, "learning_rate": 1.1732360097323601e-05, "loss": 0.538, "step": 27018 }, { "epoch": 0.7888528802078771, "grad_norm": 0.5244948864444232, "learning_rate": 1.173073803730738e-05, "loss": 0.5789, "step": 27019 }, { "epoch": 0.7888820764357245, "grad_norm": 0.49779789060330354, "learning_rate": 1.1729115977291162e-05, "loss": 0.53, "step": 27020 }, { "epoch": 0.7889112726635719, "grad_norm": 0.501241281149757, "learning_rate": 1.172749391727494e-05, "loss": 0.5949, "step": 27021 }, { "epoch": 0.7889404688914192, "grad_norm": 0.544371707331746, "learning_rate": 1.1725871857258718e-05, "loss": 0.6558, "step": 27022 }, { "epoch": 0.7889696651192666, "grad_norm": 0.5303029596796401, "learning_rate": 1.1724249797242498e-05, "loss": 0.6521, "step": 27023 }, { "epoch": 0.7889988613471139, "grad_norm": 0.47609115023340143, "learning_rate": 1.1722627737226278e-05, "loss": 0.4785, "step": 27024 }, { "epoch": 0.7890280575749613, "grad_norm": 0.5248451012541907, "learning_rate": 1.1721005677210058e-05, "loss": 0.5927, "step": 27025 }, { "epoch": 0.7890572538028087, "grad_norm": 0.5340406601218737, "learning_rate": 1.1719383617193837e-05, "loss": 0.5914, "step": 27026 }, { "epoch": 0.789086450030656, "grad_norm": 0.5258622150976858, "learning_rate": 1.1717761557177615e-05, "loss": 0.6057, "step": 27027 }, { "epoch": 0.7891156462585034, "grad_norm": 0.5354963373641529, "learning_rate": 1.1716139497161395e-05, "loss": 0.6, "step": 27028 }, { "epoch": 0.7891448424863508, "grad_norm": 0.4815155953179917, "learning_rate": 1.1714517437145175e-05, "loss": 0.5275, "step": 27029 }, { "epoch": 0.7891740387141981, "grad_norm": 0.5576235537621423, "learning_rate": 1.1712895377128954e-05, "loss": 0.6446, "step": 27030 }, { "epoch": 0.7892032349420455, "grad_norm": 0.5748355396989838, "learning_rate": 1.1711273317112734e-05, "loss": 0.6215, "step": 27031 }, { "epoch": 0.7892324311698928, "grad_norm": 0.5591141410628772, "learning_rate": 1.1709651257096512e-05, "loss": 0.6365, "step": 27032 }, { "epoch": 0.7892616273977402, "grad_norm": 0.5985309203664211, "learning_rate": 1.1708029197080292e-05, "loss": 0.7024, "step": 27033 }, { "epoch": 0.7892908236255876, "grad_norm": 0.5297763919265471, "learning_rate": 1.1706407137064072e-05, "loss": 0.6113, "step": 27034 }, { "epoch": 0.7893200198534349, "grad_norm": 0.55623632052338, "learning_rate": 1.170478507704785e-05, "loss": 0.6492, "step": 27035 }, { "epoch": 0.7893492160812823, "grad_norm": 0.5145435283331999, "learning_rate": 1.170316301703163e-05, "loss": 0.5739, "step": 27036 }, { "epoch": 0.7893784123091296, "grad_norm": 0.5741492145077619, "learning_rate": 1.1701540957015409e-05, "loss": 0.6155, "step": 27037 }, { "epoch": 0.789407608536977, "grad_norm": 0.5336931805944725, "learning_rate": 1.1699918896999191e-05, "loss": 0.5851, "step": 27038 }, { "epoch": 0.7894368047648244, "grad_norm": 0.5523028443215819, "learning_rate": 1.169829683698297e-05, "loss": 0.6349, "step": 27039 }, { "epoch": 0.7894660009926717, "grad_norm": 0.5214212198198417, "learning_rate": 1.1696674776966748e-05, "loss": 0.5626, "step": 27040 }, { "epoch": 0.7894951972205191, "grad_norm": 0.5369396361329413, "learning_rate": 1.1695052716950528e-05, "loss": 0.6083, "step": 27041 }, { "epoch": 0.7895243934483664, "grad_norm": 0.5656310492709259, "learning_rate": 1.1693430656934306e-05, "loss": 0.6302, "step": 27042 }, { "epoch": 0.7895535896762138, "grad_norm": 0.5362134796163299, "learning_rate": 1.1691808596918088e-05, "loss": 0.6297, "step": 27043 }, { "epoch": 0.7895827859040612, "grad_norm": 0.5065758981586898, "learning_rate": 1.1690186536901866e-05, "loss": 0.505, "step": 27044 }, { "epoch": 0.7896119821319085, "grad_norm": 0.5305391889723755, "learning_rate": 1.1688564476885645e-05, "loss": 0.5728, "step": 27045 }, { "epoch": 0.7896411783597559, "grad_norm": 0.5108460383632226, "learning_rate": 1.1686942416869425e-05, "loss": 0.5451, "step": 27046 }, { "epoch": 0.7896703745876033, "grad_norm": 0.5573098725716, "learning_rate": 1.1685320356853203e-05, "loss": 0.6621, "step": 27047 }, { "epoch": 0.7896995708154506, "grad_norm": 0.5200209617015673, "learning_rate": 1.1683698296836985e-05, "loss": 0.5493, "step": 27048 }, { "epoch": 0.789728767043298, "grad_norm": 0.508078904047277, "learning_rate": 1.1682076236820763e-05, "loss": 0.5285, "step": 27049 }, { "epoch": 0.7897579632711453, "grad_norm": 0.5398414182924484, "learning_rate": 1.1680454176804542e-05, "loss": 0.6468, "step": 27050 }, { "epoch": 0.7897871594989927, "grad_norm": 0.48865580615249227, "learning_rate": 1.1678832116788322e-05, "loss": 0.5368, "step": 27051 }, { "epoch": 0.7898163557268401, "grad_norm": 0.5285546493316381, "learning_rate": 1.16772100567721e-05, "loss": 0.5742, "step": 27052 }, { "epoch": 0.7898455519546874, "grad_norm": 0.4885424828583923, "learning_rate": 1.1675587996755882e-05, "loss": 0.5022, "step": 27053 }, { "epoch": 0.7898747481825348, "grad_norm": 0.5088689850190631, "learning_rate": 1.167396593673966e-05, "loss": 0.5582, "step": 27054 }, { "epoch": 0.7899039444103821, "grad_norm": 0.5074005725188796, "learning_rate": 1.1672343876723439e-05, "loss": 0.5275, "step": 27055 }, { "epoch": 0.7899331406382295, "grad_norm": 0.5071644572763747, "learning_rate": 1.1670721816707219e-05, "loss": 0.5504, "step": 27056 }, { "epoch": 0.7899623368660769, "grad_norm": 0.5580301518542041, "learning_rate": 1.1669099756690999e-05, "loss": 0.5847, "step": 27057 }, { "epoch": 0.7899915330939242, "grad_norm": 0.4864604273162776, "learning_rate": 1.1667477696674777e-05, "loss": 0.5339, "step": 27058 }, { "epoch": 0.7900207293217716, "grad_norm": 0.5122644188995639, "learning_rate": 1.1665855636658557e-05, "loss": 0.5712, "step": 27059 }, { "epoch": 0.790049925549619, "grad_norm": 0.48999882119957255, "learning_rate": 1.1664233576642335e-05, "loss": 0.5458, "step": 27060 }, { "epoch": 0.7900791217774663, "grad_norm": 0.5340498216153685, "learning_rate": 1.1662611516626116e-05, "loss": 0.6011, "step": 27061 }, { "epoch": 0.7901083180053137, "grad_norm": 0.46156582195010837, "learning_rate": 1.1660989456609896e-05, "loss": 0.4704, "step": 27062 }, { "epoch": 0.790137514233161, "grad_norm": 0.5060471063029862, "learning_rate": 1.1659367396593674e-05, "loss": 0.5226, "step": 27063 }, { "epoch": 0.7901667104610084, "grad_norm": 0.5149839281256476, "learning_rate": 1.1657745336577454e-05, "loss": 0.5548, "step": 27064 }, { "epoch": 0.7901959066888558, "grad_norm": 0.5395230400831516, "learning_rate": 1.1656123276561232e-05, "loss": 0.631, "step": 27065 }, { "epoch": 0.7902251029167031, "grad_norm": 0.5608801182702176, "learning_rate": 1.1654501216545012e-05, "loss": 0.6264, "step": 27066 }, { "epoch": 0.7902542991445505, "grad_norm": 0.5796443432520131, "learning_rate": 1.1652879156528793e-05, "loss": 0.6825, "step": 27067 }, { "epoch": 0.7902834953723978, "grad_norm": 0.5223608343176228, "learning_rate": 1.1651257096512571e-05, "loss": 0.578, "step": 27068 }, { "epoch": 0.7903126916002452, "grad_norm": 0.5302760067051957, "learning_rate": 1.1649635036496351e-05, "loss": 0.5482, "step": 27069 }, { "epoch": 0.7903418878280926, "grad_norm": 0.5342263489671757, "learning_rate": 1.164801297648013e-05, "loss": 0.624, "step": 27070 }, { "epoch": 0.7903710840559399, "grad_norm": 0.4798425062618183, "learning_rate": 1.1646390916463911e-05, "loss": 0.4735, "step": 27071 }, { "epoch": 0.7904002802837873, "grad_norm": 0.5043875433972992, "learning_rate": 1.164476885644769e-05, "loss": 0.5286, "step": 27072 }, { "epoch": 0.7904294765116346, "grad_norm": 0.5268345329066689, "learning_rate": 1.1643146796431468e-05, "loss": 0.573, "step": 27073 }, { "epoch": 0.790458672739482, "grad_norm": 0.5273321645412218, "learning_rate": 1.1641524736415248e-05, "loss": 0.632, "step": 27074 }, { "epoch": 0.7904878689673294, "grad_norm": 0.5117657036046476, "learning_rate": 1.1639902676399026e-05, "loss": 0.5598, "step": 27075 }, { "epoch": 0.7905170651951768, "grad_norm": 0.5117064774981392, "learning_rate": 1.1638280616382808e-05, "loss": 0.553, "step": 27076 }, { "epoch": 0.7905462614230242, "grad_norm": 0.5228356548654696, "learning_rate": 1.1636658556366586e-05, "loss": 0.6007, "step": 27077 }, { "epoch": 0.7905754576508716, "grad_norm": 0.5700395149530493, "learning_rate": 1.1635036496350365e-05, "loss": 0.6609, "step": 27078 }, { "epoch": 0.7906046538787189, "grad_norm": 0.5108719881754055, "learning_rate": 1.1633414436334145e-05, "loss": 0.5592, "step": 27079 }, { "epoch": 0.7906338501065663, "grad_norm": 0.5262516534145053, "learning_rate": 1.1631792376317923e-05, "loss": 0.6254, "step": 27080 }, { "epoch": 0.7906630463344136, "grad_norm": 0.5536873311460111, "learning_rate": 1.1630170316301705e-05, "loss": 0.5906, "step": 27081 }, { "epoch": 0.790692242562261, "grad_norm": 0.4994527527520619, "learning_rate": 1.1628548256285483e-05, "loss": 0.534, "step": 27082 }, { "epoch": 0.7907214387901084, "grad_norm": 0.5409813173218254, "learning_rate": 1.1626926196269262e-05, "loss": 0.585, "step": 27083 }, { "epoch": 0.7907506350179557, "grad_norm": 0.5658191415143007, "learning_rate": 1.1625304136253042e-05, "loss": 0.6527, "step": 27084 }, { "epoch": 0.7907798312458031, "grad_norm": 0.6104170469590803, "learning_rate": 1.162368207623682e-05, "loss": 0.7066, "step": 27085 }, { "epoch": 0.7908090274736504, "grad_norm": 0.47882528085431714, "learning_rate": 1.1622060016220602e-05, "loss": 0.5039, "step": 27086 }, { "epoch": 0.7908382237014978, "grad_norm": 0.5186441446387284, "learning_rate": 1.162043795620438e-05, "loss": 0.5502, "step": 27087 }, { "epoch": 0.7908674199293452, "grad_norm": 0.5058386039871235, "learning_rate": 1.1618815896188159e-05, "loss": 0.5971, "step": 27088 }, { "epoch": 0.7908966161571925, "grad_norm": 0.525314685563012, "learning_rate": 1.1617193836171939e-05, "loss": 0.5764, "step": 27089 }, { "epoch": 0.7909258123850399, "grad_norm": 0.48987211590786955, "learning_rate": 1.1615571776155719e-05, "loss": 0.5132, "step": 27090 }, { "epoch": 0.7909550086128873, "grad_norm": 0.552556304568184, "learning_rate": 1.1613949716139497e-05, "loss": 0.6039, "step": 27091 }, { "epoch": 0.7909842048407346, "grad_norm": 0.5445947901811763, "learning_rate": 1.1612327656123277e-05, "loss": 0.633, "step": 27092 }, { "epoch": 0.791013401068582, "grad_norm": 0.5072931349412234, "learning_rate": 1.1610705596107056e-05, "loss": 0.57, "step": 27093 }, { "epoch": 0.7910425972964293, "grad_norm": 0.5305219962559283, "learning_rate": 1.1609083536090836e-05, "loss": 0.5802, "step": 27094 }, { "epoch": 0.7910717935242767, "grad_norm": 0.5386025639210269, "learning_rate": 1.1607461476074616e-05, "loss": 0.6501, "step": 27095 }, { "epoch": 0.7911009897521241, "grad_norm": 0.5480765215448441, "learning_rate": 1.1605839416058394e-05, "loss": 0.5775, "step": 27096 }, { "epoch": 0.7911301859799714, "grad_norm": 0.5271339286083161, "learning_rate": 1.1604217356042174e-05, "loss": 0.5988, "step": 27097 }, { "epoch": 0.7911593822078188, "grad_norm": 0.5076565181700877, "learning_rate": 1.1602595296025953e-05, "loss": 0.4888, "step": 27098 }, { "epoch": 0.7911885784356661, "grad_norm": 0.5249504542207984, "learning_rate": 1.1600973236009733e-05, "loss": 0.5619, "step": 27099 }, { "epoch": 0.7912177746635135, "grad_norm": 0.4654623686547431, "learning_rate": 1.1599351175993513e-05, "loss": 0.4652, "step": 27100 }, { "epoch": 0.7912469708913609, "grad_norm": 0.5285860853316385, "learning_rate": 1.1597729115977291e-05, "loss": 0.5756, "step": 27101 }, { "epoch": 0.7912761671192082, "grad_norm": 0.530340384430266, "learning_rate": 1.1596107055961071e-05, "loss": 0.5416, "step": 27102 }, { "epoch": 0.7913053633470556, "grad_norm": 0.5279849484923748, "learning_rate": 1.159448499594485e-05, "loss": 0.6049, "step": 27103 }, { "epoch": 0.791334559574903, "grad_norm": 0.4839853420243753, "learning_rate": 1.1592862935928631e-05, "loss": 0.5215, "step": 27104 }, { "epoch": 0.7913637558027503, "grad_norm": 0.5315681011415013, "learning_rate": 1.159124087591241e-05, "loss": 0.5812, "step": 27105 }, { "epoch": 0.7913929520305977, "grad_norm": 0.5191635305812582, "learning_rate": 1.1589618815896188e-05, "loss": 0.589, "step": 27106 }, { "epoch": 0.791422148258445, "grad_norm": 0.503675262745898, "learning_rate": 1.1587996755879968e-05, "loss": 0.5325, "step": 27107 }, { "epoch": 0.7914513444862924, "grad_norm": 0.517718591151347, "learning_rate": 1.1586374695863747e-05, "loss": 0.5729, "step": 27108 }, { "epoch": 0.7914805407141398, "grad_norm": 0.5322461858800617, "learning_rate": 1.1584752635847528e-05, "loss": 0.6052, "step": 27109 }, { "epoch": 0.7915097369419871, "grad_norm": 0.5072908657217845, "learning_rate": 1.1583130575831307e-05, "loss": 0.5758, "step": 27110 }, { "epoch": 0.7915389331698345, "grad_norm": 0.4632269868343697, "learning_rate": 1.1581508515815085e-05, "loss": 0.4754, "step": 27111 }, { "epoch": 0.7915681293976818, "grad_norm": 0.5467580742338899, "learning_rate": 1.1579886455798865e-05, "loss": 0.6179, "step": 27112 }, { "epoch": 0.7915973256255292, "grad_norm": 0.5212021225865451, "learning_rate": 1.1578264395782643e-05, "loss": 0.5617, "step": 27113 }, { "epoch": 0.7916265218533766, "grad_norm": 0.5016346001872385, "learning_rate": 1.1576642335766425e-05, "loss": 0.5636, "step": 27114 }, { "epoch": 0.7916557180812239, "grad_norm": 0.548881897297969, "learning_rate": 1.1575020275750204e-05, "loss": 0.6436, "step": 27115 }, { "epoch": 0.7916849143090713, "grad_norm": 0.5326567881082287, "learning_rate": 1.1573398215733982e-05, "loss": 0.5699, "step": 27116 }, { "epoch": 0.7917141105369186, "grad_norm": 0.547104101280265, "learning_rate": 1.1571776155717762e-05, "loss": 0.5632, "step": 27117 }, { "epoch": 0.791743306764766, "grad_norm": 0.5296233836000672, "learning_rate": 1.157015409570154e-05, "loss": 0.628, "step": 27118 }, { "epoch": 0.7917725029926134, "grad_norm": 0.5454106941687262, "learning_rate": 1.156853203568532e-05, "loss": 0.6372, "step": 27119 }, { "epoch": 0.7918016992204607, "grad_norm": 0.5045568103194086, "learning_rate": 1.15669099756691e-05, "loss": 0.5705, "step": 27120 }, { "epoch": 0.7918308954483081, "grad_norm": 0.5595155674684311, "learning_rate": 1.1565287915652879e-05, "loss": 0.6495, "step": 27121 }, { "epoch": 0.7918600916761555, "grad_norm": 0.5052052731002368, "learning_rate": 1.1563665855636659e-05, "loss": 0.5523, "step": 27122 }, { "epoch": 0.7918892879040028, "grad_norm": 0.4914587258272089, "learning_rate": 1.1562043795620439e-05, "loss": 0.5435, "step": 27123 }, { "epoch": 0.7919184841318502, "grad_norm": 0.5068667860382077, "learning_rate": 1.1560421735604217e-05, "loss": 0.5395, "step": 27124 }, { "epoch": 0.7919476803596975, "grad_norm": 0.5142242813962771, "learning_rate": 1.1558799675587998e-05, "loss": 0.559, "step": 27125 }, { "epoch": 0.7919768765875449, "grad_norm": 0.5244211544417021, "learning_rate": 1.1557177615571776e-05, "loss": 0.6174, "step": 27126 }, { "epoch": 0.7920060728153923, "grad_norm": 0.5403126587237719, "learning_rate": 1.1555555555555556e-05, "loss": 0.6087, "step": 27127 }, { "epoch": 0.7920352690432396, "grad_norm": 0.4948152761239956, "learning_rate": 1.1553933495539336e-05, "loss": 0.5439, "step": 27128 }, { "epoch": 0.792064465271087, "grad_norm": 0.4660664545412241, "learning_rate": 1.1552311435523114e-05, "loss": 0.4648, "step": 27129 }, { "epoch": 0.7920936614989343, "grad_norm": 0.49521985753189224, "learning_rate": 1.1550689375506894e-05, "loss": 0.5337, "step": 27130 }, { "epoch": 0.7921228577267817, "grad_norm": 0.5468591823042277, "learning_rate": 1.1549067315490673e-05, "loss": 0.6223, "step": 27131 }, { "epoch": 0.7921520539546291, "grad_norm": 0.4844180057675291, "learning_rate": 1.1547445255474453e-05, "loss": 0.5029, "step": 27132 }, { "epoch": 0.7921812501824764, "grad_norm": 0.5369833780565549, "learning_rate": 1.1545823195458233e-05, "loss": 0.5787, "step": 27133 }, { "epoch": 0.7922104464103238, "grad_norm": 0.5572655309700921, "learning_rate": 1.1544201135442011e-05, "loss": 0.6349, "step": 27134 }, { "epoch": 0.7922396426381711, "grad_norm": 0.484981866456843, "learning_rate": 1.1542579075425791e-05, "loss": 0.4739, "step": 27135 }, { "epoch": 0.7922688388660185, "grad_norm": 0.5264180756916821, "learning_rate": 1.154095701540957e-05, "loss": 0.5878, "step": 27136 }, { "epoch": 0.7922980350938659, "grad_norm": 0.5178954116909592, "learning_rate": 1.1539334955393352e-05, "loss": 0.5727, "step": 27137 }, { "epoch": 0.7923272313217132, "grad_norm": 0.5065144308308966, "learning_rate": 1.153771289537713e-05, "loss": 0.5727, "step": 27138 }, { "epoch": 0.7923564275495606, "grad_norm": 0.4816509034080918, "learning_rate": 1.1536090835360908e-05, "loss": 0.4593, "step": 27139 }, { "epoch": 0.792385623777408, "grad_norm": 0.5575602822866477, "learning_rate": 1.1534468775344688e-05, "loss": 0.6509, "step": 27140 }, { "epoch": 0.7924148200052553, "grad_norm": 0.5273554731564718, "learning_rate": 1.1532846715328467e-05, "loss": 0.5547, "step": 27141 }, { "epoch": 0.7924440162331027, "grad_norm": 0.5150007590668714, "learning_rate": 1.1531224655312249e-05, "loss": 0.5438, "step": 27142 }, { "epoch": 0.79247321246095, "grad_norm": 0.5907521954370624, "learning_rate": 1.1529602595296027e-05, "loss": 0.615, "step": 27143 }, { "epoch": 0.7925024086887974, "grad_norm": 0.5075087029501423, "learning_rate": 1.1527980535279805e-05, "loss": 0.5731, "step": 27144 }, { "epoch": 0.7925316049166448, "grad_norm": 0.5431288251071557, "learning_rate": 1.1526358475263585e-05, "loss": 0.5965, "step": 27145 }, { "epoch": 0.7925608011444921, "grad_norm": 0.48511439489920993, "learning_rate": 1.1524736415247364e-05, "loss": 0.4814, "step": 27146 }, { "epoch": 0.7925899973723395, "grad_norm": 0.5846648931414822, "learning_rate": 1.1523114355231145e-05, "loss": 0.6509, "step": 27147 }, { "epoch": 0.7926191936001868, "grad_norm": 0.5569912052204106, "learning_rate": 1.1521492295214924e-05, "loss": 0.6462, "step": 27148 }, { "epoch": 0.7926483898280342, "grad_norm": 0.49658056904662684, "learning_rate": 1.1519870235198702e-05, "loss": 0.5117, "step": 27149 }, { "epoch": 0.7926775860558816, "grad_norm": 0.5408593383758171, "learning_rate": 1.1518248175182482e-05, "loss": 0.6314, "step": 27150 }, { "epoch": 0.7927067822837289, "grad_norm": 0.4992324801046261, "learning_rate": 1.151662611516626e-05, "loss": 0.5263, "step": 27151 }, { "epoch": 0.7927359785115763, "grad_norm": 0.48751294902505504, "learning_rate": 1.151500405515004e-05, "loss": 0.5267, "step": 27152 }, { "epoch": 0.7927651747394237, "grad_norm": 0.49963779612559195, "learning_rate": 1.151338199513382e-05, "loss": 0.5435, "step": 27153 }, { "epoch": 0.792794370967271, "grad_norm": 0.5430695096396789, "learning_rate": 1.15117599351176e-05, "loss": 0.6, "step": 27154 }, { "epoch": 0.7928235671951184, "grad_norm": 0.546899640310902, "learning_rate": 1.151013787510138e-05, "loss": 0.5778, "step": 27155 }, { "epoch": 0.7928527634229657, "grad_norm": 0.5303647184889073, "learning_rate": 1.150851581508516e-05, "loss": 0.5866, "step": 27156 }, { "epoch": 0.7928819596508131, "grad_norm": 0.5176250786764094, "learning_rate": 1.1506893755068938e-05, "loss": 0.5869, "step": 27157 }, { "epoch": 0.7929111558786605, "grad_norm": 0.5310532182009752, "learning_rate": 1.1505271695052718e-05, "loss": 0.6333, "step": 27158 }, { "epoch": 0.7929403521065078, "grad_norm": 0.5357135273396159, "learning_rate": 1.1503649635036496e-05, "loss": 0.6014, "step": 27159 }, { "epoch": 0.7929695483343552, "grad_norm": 0.5291811211949019, "learning_rate": 1.1502027575020276e-05, "loss": 0.5683, "step": 27160 }, { "epoch": 0.7929987445622025, "grad_norm": 0.5030038947041537, "learning_rate": 1.1500405515004056e-05, "loss": 0.5906, "step": 27161 }, { "epoch": 0.7930279407900499, "grad_norm": 0.49054422365194805, "learning_rate": 1.1498783454987835e-05, "loss": 0.5283, "step": 27162 }, { "epoch": 0.7930571370178973, "grad_norm": 0.5574632268745235, "learning_rate": 1.1497161394971615e-05, "loss": 0.6226, "step": 27163 }, { "epoch": 0.7930863332457446, "grad_norm": 0.5100839108427722, "learning_rate": 1.1495539334955393e-05, "loss": 0.5288, "step": 27164 }, { "epoch": 0.793115529473592, "grad_norm": 0.5220675249911584, "learning_rate": 1.1493917274939173e-05, "loss": 0.5694, "step": 27165 }, { "epoch": 0.7931447257014393, "grad_norm": 0.5450237541044891, "learning_rate": 1.1492295214922953e-05, "loss": 0.6222, "step": 27166 }, { "epoch": 0.7931739219292867, "grad_norm": 0.5358029894710679, "learning_rate": 1.1490673154906732e-05, "loss": 0.6052, "step": 27167 }, { "epoch": 0.7932031181571341, "grad_norm": 0.4759813714958154, "learning_rate": 1.1489051094890512e-05, "loss": 0.5041, "step": 27168 }, { "epoch": 0.7932323143849814, "grad_norm": 0.5240771788220618, "learning_rate": 1.148742903487429e-05, "loss": 0.6252, "step": 27169 }, { "epoch": 0.7932615106128288, "grad_norm": 0.524839311159695, "learning_rate": 1.148580697485807e-05, "loss": 0.5683, "step": 27170 }, { "epoch": 0.7932907068406762, "grad_norm": 0.5567906775774634, "learning_rate": 1.148418491484185e-05, "loss": 0.6774, "step": 27171 }, { "epoch": 0.7933199030685235, "grad_norm": 0.5152057564333109, "learning_rate": 1.1482562854825629e-05, "loss": 0.5379, "step": 27172 }, { "epoch": 0.7933490992963709, "grad_norm": 0.5070906886466684, "learning_rate": 1.1480940794809409e-05, "loss": 0.5788, "step": 27173 }, { "epoch": 0.7933782955242182, "grad_norm": 0.5102078208369354, "learning_rate": 1.1479318734793187e-05, "loss": 0.5366, "step": 27174 }, { "epoch": 0.7934074917520656, "grad_norm": 0.5062497459351938, "learning_rate": 1.1477696674776969e-05, "loss": 0.563, "step": 27175 }, { "epoch": 0.793436687979913, "grad_norm": 0.5245176982397958, "learning_rate": 1.1476074614760747e-05, "loss": 0.5857, "step": 27176 }, { "epoch": 0.7934658842077603, "grad_norm": 0.513819896150883, "learning_rate": 1.1474452554744525e-05, "loss": 0.608, "step": 27177 }, { "epoch": 0.7934950804356077, "grad_norm": 0.5280264955419629, "learning_rate": 1.1472830494728306e-05, "loss": 0.6029, "step": 27178 }, { "epoch": 0.793524276663455, "grad_norm": 0.47782569534189095, "learning_rate": 1.1471208434712084e-05, "loss": 0.5104, "step": 27179 }, { "epoch": 0.7935534728913024, "grad_norm": 0.5176387343685563, "learning_rate": 1.1469586374695864e-05, "loss": 0.5867, "step": 27180 }, { "epoch": 0.7935826691191498, "grad_norm": 0.5353609691682857, "learning_rate": 1.1467964314679644e-05, "loss": 0.588, "step": 27181 }, { "epoch": 0.7936118653469971, "grad_norm": 0.5391189202453132, "learning_rate": 1.1466342254663422e-05, "loss": 0.5963, "step": 27182 }, { "epoch": 0.7936410615748445, "grad_norm": 0.49857380548696956, "learning_rate": 1.1464720194647203e-05, "loss": 0.5586, "step": 27183 }, { "epoch": 0.7936702578026918, "grad_norm": 0.5439215761427347, "learning_rate": 1.1463098134630981e-05, "loss": 0.5897, "step": 27184 }, { "epoch": 0.7936994540305392, "grad_norm": 0.5149816242405816, "learning_rate": 1.1461476074614761e-05, "loss": 0.5947, "step": 27185 }, { "epoch": 0.7937286502583866, "grad_norm": 0.5744811159634126, "learning_rate": 1.1459854014598541e-05, "loss": 0.7054, "step": 27186 }, { "epoch": 0.7937578464862339, "grad_norm": 0.5045905110637368, "learning_rate": 1.145823195458232e-05, "loss": 0.5462, "step": 27187 }, { "epoch": 0.7937870427140813, "grad_norm": 0.4991510545965845, "learning_rate": 1.14566098945661e-05, "loss": 0.5209, "step": 27188 }, { "epoch": 0.7938162389419287, "grad_norm": 0.5690669953618556, "learning_rate": 1.145498783454988e-05, "loss": 0.6382, "step": 27189 }, { "epoch": 0.793845435169776, "grad_norm": 0.5064382133014412, "learning_rate": 1.1453365774533658e-05, "loss": 0.5304, "step": 27190 }, { "epoch": 0.7938746313976234, "grad_norm": 0.528743355074794, "learning_rate": 1.1451743714517438e-05, "loss": 0.5713, "step": 27191 }, { "epoch": 0.7939038276254707, "grad_norm": 0.5088754249739506, "learning_rate": 1.1450121654501216e-05, "loss": 0.563, "step": 27192 }, { "epoch": 0.7939330238533181, "grad_norm": 0.5573008837930123, "learning_rate": 1.1448499594484996e-05, "loss": 0.6369, "step": 27193 }, { "epoch": 0.7939622200811655, "grad_norm": 0.5250182560706891, "learning_rate": 1.1446877534468776e-05, "loss": 0.5774, "step": 27194 }, { "epoch": 0.7939914163090128, "grad_norm": 0.5452623023809136, "learning_rate": 1.1445255474452555e-05, "loss": 0.6559, "step": 27195 }, { "epoch": 0.7940206125368603, "grad_norm": 0.5149276856607974, "learning_rate": 1.1443633414436335e-05, "loss": 0.5517, "step": 27196 }, { "epoch": 0.7940498087647077, "grad_norm": 0.5105656280071245, "learning_rate": 1.1442011354420113e-05, "loss": 0.5751, "step": 27197 }, { "epoch": 0.794079004992555, "grad_norm": 0.5452595270457741, "learning_rate": 1.1440389294403893e-05, "loss": 0.5973, "step": 27198 }, { "epoch": 0.7941082012204024, "grad_norm": 0.5198937286810053, "learning_rate": 1.1438767234387673e-05, "loss": 0.6162, "step": 27199 }, { "epoch": 0.7941373974482497, "grad_norm": 0.516243794574746, "learning_rate": 1.1437145174371452e-05, "loss": 0.555, "step": 27200 }, { "epoch": 0.7941665936760971, "grad_norm": 0.5030417792860203, "learning_rate": 1.1435523114355232e-05, "loss": 0.5292, "step": 27201 }, { "epoch": 0.7941957899039445, "grad_norm": 0.5119058509395986, "learning_rate": 1.143390105433901e-05, "loss": 0.5794, "step": 27202 }, { "epoch": 0.7942249861317918, "grad_norm": 0.5363839168319856, "learning_rate": 1.143227899432279e-05, "loss": 0.6358, "step": 27203 }, { "epoch": 0.7942541823596392, "grad_norm": 0.5455916027096328, "learning_rate": 1.143065693430657e-05, "loss": 0.6718, "step": 27204 }, { "epoch": 0.7942833785874865, "grad_norm": 0.4988276866218229, "learning_rate": 1.1429034874290349e-05, "loss": 0.5639, "step": 27205 }, { "epoch": 0.7943125748153339, "grad_norm": 0.5106780562770321, "learning_rate": 1.1427412814274129e-05, "loss": 0.5133, "step": 27206 }, { "epoch": 0.7943417710431813, "grad_norm": 0.5396828109805486, "learning_rate": 1.1425790754257907e-05, "loss": 0.5729, "step": 27207 }, { "epoch": 0.7943709672710286, "grad_norm": 0.5487429200685501, "learning_rate": 1.1424168694241689e-05, "loss": 0.5996, "step": 27208 }, { "epoch": 0.794400163498876, "grad_norm": 0.5016438671512308, "learning_rate": 1.1422546634225467e-05, "loss": 0.583, "step": 27209 }, { "epoch": 0.7944293597267233, "grad_norm": 0.5090164538208547, "learning_rate": 1.1420924574209246e-05, "loss": 0.5446, "step": 27210 }, { "epoch": 0.7944585559545707, "grad_norm": 0.500051001586555, "learning_rate": 1.1419302514193026e-05, "loss": 0.4867, "step": 27211 }, { "epoch": 0.7944877521824181, "grad_norm": 0.5267472132535088, "learning_rate": 1.1417680454176804e-05, "loss": 0.6117, "step": 27212 }, { "epoch": 0.7945169484102654, "grad_norm": 0.5365877181142144, "learning_rate": 1.1416058394160584e-05, "loss": 0.6065, "step": 27213 }, { "epoch": 0.7945461446381128, "grad_norm": 0.5471956336158397, "learning_rate": 1.1414436334144364e-05, "loss": 0.6156, "step": 27214 }, { "epoch": 0.7945753408659602, "grad_norm": 0.5287591559563333, "learning_rate": 1.1412814274128143e-05, "loss": 0.5538, "step": 27215 }, { "epoch": 0.7946045370938075, "grad_norm": 0.5096722415354413, "learning_rate": 1.1411192214111923e-05, "loss": 0.6079, "step": 27216 }, { "epoch": 0.7946337333216549, "grad_norm": 0.4999623073346416, "learning_rate": 1.1409570154095701e-05, "loss": 0.5402, "step": 27217 }, { "epoch": 0.7946629295495022, "grad_norm": 0.5301858322798095, "learning_rate": 1.1407948094079481e-05, "loss": 0.5078, "step": 27218 }, { "epoch": 0.7946921257773496, "grad_norm": 0.5113262650864837, "learning_rate": 1.1406326034063261e-05, "loss": 0.6022, "step": 27219 }, { "epoch": 0.794721322005197, "grad_norm": 0.5448212486303279, "learning_rate": 1.140470397404704e-05, "loss": 0.6134, "step": 27220 }, { "epoch": 0.7947505182330443, "grad_norm": 0.5372023216421433, "learning_rate": 1.140308191403082e-05, "loss": 0.5598, "step": 27221 }, { "epoch": 0.7947797144608917, "grad_norm": 0.5070664965899051, "learning_rate": 1.14014598540146e-05, "loss": 0.5497, "step": 27222 }, { "epoch": 0.794808910688739, "grad_norm": 0.507383118709177, "learning_rate": 1.1399837793998378e-05, "loss": 0.5819, "step": 27223 }, { "epoch": 0.7948381069165864, "grad_norm": 0.5176179535679195, "learning_rate": 1.1398215733982158e-05, "loss": 0.5612, "step": 27224 }, { "epoch": 0.7948673031444338, "grad_norm": 0.4968279437238421, "learning_rate": 1.1396593673965937e-05, "loss": 0.543, "step": 27225 }, { "epoch": 0.7948964993722811, "grad_norm": 0.49023564813021636, "learning_rate": 1.1394971613949717e-05, "loss": 0.486, "step": 27226 }, { "epoch": 0.7949256956001285, "grad_norm": 0.4910223487744863, "learning_rate": 1.1393349553933497e-05, "loss": 0.5368, "step": 27227 }, { "epoch": 0.7949548918279759, "grad_norm": 0.5627885339583852, "learning_rate": 1.1391727493917275e-05, "loss": 0.6371, "step": 27228 }, { "epoch": 0.7949840880558232, "grad_norm": 0.5379628622695517, "learning_rate": 1.1390105433901055e-05, "loss": 0.6051, "step": 27229 }, { "epoch": 0.7950132842836706, "grad_norm": 0.5664932320806662, "learning_rate": 1.1388483373884834e-05, "loss": 0.6187, "step": 27230 }, { "epoch": 0.7950424805115179, "grad_norm": 0.510740994027527, "learning_rate": 1.1386861313868614e-05, "loss": 0.5736, "step": 27231 }, { "epoch": 0.7950716767393653, "grad_norm": 0.5091267615824526, "learning_rate": 1.1385239253852394e-05, "loss": 0.5329, "step": 27232 }, { "epoch": 0.7951008729672127, "grad_norm": 0.5399789198412107, "learning_rate": 1.1383617193836172e-05, "loss": 0.6235, "step": 27233 }, { "epoch": 0.79513006919506, "grad_norm": 0.5253250148623528, "learning_rate": 1.1381995133819952e-05, "loss": 0.5715, "step": 27234 }, { "epoch": 0.7951592654229074, "grad_norm": 0.5498948467412987, "learning_rate": 1.138037307380373e-05, "loss": 0.634, "step": 27235 }, { "epoch": 0.7951884616507547, "grad_norm": 0.5322482424552589, "learning_rate": 1.137875101378751e-05, "loss": 0.5892, "step": 27236 }, { "epoch": 0.7952176578786021, "grad_norm": 0.4874979527390312, "learning_rate": 1.137712895377129e-05, "loss": 0.547, "step": 27237 }, { "epoch": 0.7952468541064495, "grad_norm": 0.5751824698774112, "learning_rate": 1.1375506893755069e-05, "loss": 0.6333, "step": 27238 }, { "epoch": 0.7952760503342968, "grad_norm": 0.5860403626365748, "learning_rate": 1.1373884833738849e-05, "loss": 0.6244, "step": 27239 }, { "epoch": 0.7953052465621442, "grad_norm": 0.5306431817046127, "learning_rate": 1.1372262773722627e-05, "loss": 0.5596, "step": 27240 }, { "epoch": 0.7953344427899915, "grad_norm": 0.47823652185528087, "learning_rate": 1.1370640713706407e-05, "loss": 0.4832, "step": 27241 }, { "epoch": 0.7953636390178389, "grad_norm": 0.507287732634029, "learning_rate": 1.1369018653690188e-05, "loss": 0.5268, "step": 27242 }, { "epoch": 0.7953928352456863, "grad_norm": 0.5211196498225125, "learning_rate": 1.1367396593673966e-05, "loss": 0.554, "step": 27243 }, { "epoch": 0.7954220314735336, "grad_norm": 0.5303719778032846, "learning_rate": 1.1365774533657746e-05, "loss": 0.5922, "step": 27244 }, { "epoch": 0.795451227701381, "grad_norm": 0.5801358485068895, "learning_rate": 1.1364152473641524e-05, "loss": 0.7098, "step": 27245 }, { "epoch": 0.7954804239292284, "grad_norm": 0.5440651211314169, "learning_rate": 1.1362530413625304e-05, "loss": 0.6138, "step": 27246 }, { "epoch": 0.7955096201570757, "grad_norm": 0.5119372627865243, "learning_rate": 1.1360908353609085e-05, "loss": 0.5865, "step": 27247 }, { "epoch": 0.7955388163849231, "grad_norm": 0.5497903708268622, "learning_rate": 1.1359286293592863e-05, "loss": 0.6252, "step": 27248 }, { "epoch": 0.7955680126127704, "grad_norm": 0.5162094437135707, "learning_rate": 1.1357664233576643e-05, "loss": 0.5818, "step": 27249 }, { "epoch": 0.7955972088406178, "grad_norm": 0.4953279864008949, "learning_rate": 1.1356042173560421e-05, "loss": 0.5098, "step": 27250 }, { "epoch": 0.7956264050684652, "grad_norm": 0.5434468206642372, "learning_rate": 1.1354420113544201e-05, "loss": 0.6033, "step": 27251 }, { "epoch": 0.7956556012963125, "grad_norm": 0.5220891886841625, "learning_rate": 1.1352798053527981e-05, "loss": 0.571, "step": 27252 }, { "epoch": 0.7956847975241599, "grad_norm": 0.5454405676338229, "learning_rate": 1.135117599351176e-05, "loss": 0.6179, "step": 27253 }, { "epoch": 0.7957139937520072, "grad_norm": 0.5342395075035332, "learning_rate": 1.134955393349554e-05, "loss": 0.5571, "step": 27254 }, { "epoch": 0.7957431899798546, "grad_norm": 0.49819354731469984, "learning_rate": 1.134793187347932e-05, "loss": 0.5676, "step": 27255 }, { "epoch": 0.795772386207702, "grad_norm": 0.5055231814376479, "learning_rate": 1.1346309813463098e-05, "loss": 0.5362, "step": 27256 }, { "epoch": 0.7958015824355493, "grad_norm": 0.5370771619217843, "learning_rate": 1.1344687753446878e-05, "loss": 0.5958, "step": 27257 }, { "epoch": 0.7958307786633967, "grad_norm": 0.5130141414695318, "learning_rate": 1.1343065693430657e-05, "loss": 0.5628, "step": 27258 }, { "epoch": 0.795859974891244, "grad_norm": 0.48155739274490567, "learning_rate": 1.1341443633414437e-05, "loss": 0.4795, "step": 27259 }, { "epoch": 0.7958891711190914, "grad_norm": 0.5116551426185172, "learning_rate": 1.1339821573398217e-05, "loss": 0.5329, "step": 27260 }, { "epoch": 0.7959183673469388, "grad_norm": 0.5292555066031019, "learning_rate": 1.1338199513381995e-05, "loss": 0.6336, "step": 27261 }, { "epoch": 0.7959475635747861, "grad_norm": 0.5142550270569063, "learning_rate": 1.1336577453365775e-05, "loss": 0.5832, "step": 27262 }, { "epoch": 0.7959767598026335, "grad_norm": 0.5343087714191336, "learning_rate": 1.1334955393349554e-05, "loss": 0.6045, "step": 27263 }, { "epoch": 0.7960059560304809, "grad_norm": 0.536089149330651, "learning_rate": 1.1333333333333334e-05, "loss": 0.6205, "step": 27264 }, { "epoch": 0.7960351522583282, "grad_norm": 0.46810212249530064, "learning_rate": 1.1331711273317114e-05, "loss": 0.4829, "step": 27265 }, { "epoch": 0.7960643484861756, "grad_norm": 0.5319095113354075, "learning_rate": 1.1330089213300892e-05, "loss": 0.6352, "step": 27266 }, { "epoch": 0.7960935447140229, "grad_norm": 0.5406291926108664, "learning_rate": 1.1328467153284672e-05, "loss": 0.5503, "step": 27267 }, { "epoch": 0.7961227409418703, "grad_norm": 0.6338848222345899, "learning_rate": 1.132684509326845e-05, "loss": 0.6408, "step": 27268 }, { "epoch": 0.7961519371697177, "grad_norm": 0.5353306874854519, "learning_rate": 1.132522303325223e-05, "loss": 0.6031, "step": 27269 }, { "epoch": 0.796181133397565, "grad_norm": 0.5473924917736125, "learning_rate": 1.132360097323601e-05, "loss": 0.5925, "step": 27270 }, { "epoch": 0.7962103296254124, "grad_norm": 0.5247771484325761, "learning_rate": 1.132197891321979e-05, "loss": 0.5988, "step": 27271 }, { "epoch": 0.7962395258532597, "grad_norm": 0.5636929697753518, "learning_rate": 1.132035685320357e-05, "loss": 0.6188, "step": 27272 }, { "epoch": 0.7962687220811071, "grad_norm": 0.5031834764713818, "learning_rate": 1.1318734793187348e-05, "loss": 0.5361, "step": 27273 }, { "epoch": 0.7962979183089545, "grad_norm": 0.5184513902235942, "learning_rate": 1.1317112733171128e-05, "loss": 0.5713, "step": 27274 }, { "epoch": 0.7963271145368018, "grad_norm": 0.4773970261717532, "learning_rate": 1.1315490673154908e-05, "loss": 0.5166, "step": 27275 }, { "epoch": 0.7963563107646492, "grad_norm": 0.5134877230893079, "learning_rate": 1.1313868613138686e-05, "loss": 0.5909, "step": 27276 }, { "epoch": 0.7963855069924966, "grad_norm": 0.5518384827890799, "learning_rate": 1.1312246553122466e-05, "loss": 0.6643, "step": 27277 }, { "epoch": 0.7964147032203439, "grad_norm": 0.5284595822665875, "learning_rate": 1.1310624493106245e-05, "loss": 0.5851, "step": 27278 }, { "epoch": 0.7964438994481913, "grad_norm": 0.5023436350575575, "learning_rate": 1.1309002433090025e-05, "loss": 0.5598, "step": 27279 }, { "epoch": 0.7964730956760386, "grad_norm": 0.5562624738155377, "learning_rate": 1.1307380373073805e-05, "loss": 0.6526, "step": 27280 }, { "epoch": 0.796502291903886, "grad_norm": 0.4761595844452396, "learning_rate": 1.1305758313057583e-05, "loss": 0.5041, "step": 27281 }, { "epoch": 0.7965314881317334, "grad_norm": 0.5273639535644277, "learning_rate": 1.1304136253041363e-05, "loss": 0.5562, "step": 27282 }, { "epoch": 0.7965606843595807, "grad_norm": 0.5064189751940282, "learning_rate": 1.1302514193025142e-05, "loss": 0.5614, "step": 27283 }, { "epoch": 0.7965898805874281, "grad_norm": 0.5396882035365008, "learning_rate": 1.1300892133008922e-05, "loss": 0.5582, "step": 27284 }, { "epoch": 0.7966190768152754, "grad_norm": 0.5260988232113011, "learning_rate": 1.1299270072992702e-05, "loss": 0.5995, "step": 27285 }, { "epoch": 0.7966482730431228, "grad_norm": 0.5196837352619011, "learning_rate": 1.129764801297648e-05, "loss": 0.5937, "step": 27286 }, { "epoch": 0.7966774692709702, "grad_norm": 0.5364684897036115, "learning_rate": 1.129602595296026e-05, "loss": 0.6046, "step": 27287 }, { "epoch": 0.7967066654988175, "grad_norm": 0.5797340735620529, "learning_rate": 1.1294403892944038e-05, "loss": 0.6755, "step": 27288 }, { "epoch": 0.7967358617266649, "grad_norm": 0.510782352769503, "learning_rate": 1.1292781832927819e-05, "loss": 0.5565, "step": 27289 }, { "epoch": 0.7967650579545122, "grad_norm": 0.5643056108084782, "learning_rate": 1.1291159772911599e-05, "loss": 0.6353, "step": 27290 }, { "epoch": 0.7967942541823596, "grad_norm": 0.5550663377428842, "learning_rate": 1.1289537712895377e-05, "loss": 0.6064, "step": 27291 }, { "epoch": 0.796823450410207, "grad_norm": 0.4893464289711383, "learning_rate": 1.1287915652879157e-05, "loss": 0.4823, "step": 27292 }, { "epoch": 0.7968526466380543, "grad_norm": 0.48857059748630316, "learning_rate": 1.1286293592862937e-05, "loss": 0.4853, "step": 27293 }, { "epoch": 0.7968818428659017, "grad_norm": 0.5176675477393483, "learning_rate": 1.1284671532846716e-05, "loss": 0.5769, "step": 27294 }, { "epoch": 0.796911039093749, "grad_norm": 0.504379814349271, "learning_rate": 1.1283049472830496e-05, "loss": 0.5486, "step": 27295 }, { "epoch": 0.7969402353215964, "grad_norm": 0.5122607962608939, "learning_rate": 1.1281427412814274e-05, "loss": 0.5548, "step": 27296 }, { "epoch": 0.7969694315494438, "grad_norm": 0.45208096782659285, "learning_rate": 1.1279805352798054e-05, "loss": 0.4695, "step": 27297 }, { "epoch": 0.7969986277772911, "grad_norm": 0.5497164782533126, "learning_rate": 1.1278183292781834e-05, "loss": 0.5558, "step": 27298 }, { "epoch": 0.7970278240051385, "grad_norm": 0.5246540041960756, "learning_rate": 1.1276561232765612e-05, "loss": 0.5819, "step": 27299 }, { "epoch": 0.7970570202329859, "grad_norm": 0.5248286478171834, "learning_rate": 1.1274939172749393e-05, "loss": 0.5773, "step": 27300 }, { "epoch": 0.7970862164608332, "grad_norm": 0.5036821147585211, "learning_rate": 1.1273317112733171e-05, "loss": 0.5626, "step": 27301 }, { "epoch": 0.7971154126886806, "grad_norm": 0.5285800538799831, "learning_rate": 1.1271695052716951e-05, "loss": 0.6018, "step": 27302 }, { "epoch": 0.7971446089165279, "grad_norm": 0.5118197639432643, "learning_rate": 1.1270072992700731e-05, "loss": 0.5833, "step": 27303 }, { "epoch": 0.7971738051443753, "grad_norm": 0.5137544904451139, "learning_rate": 1.126845093268451e-05, "loss": 0.5903, "step": 27304 }, { "epoch": 0.7972030013722227, "grad_norm": 0.549795618855378, "learning_rate": 1.126682887266829e-05, "loss": 0.6571, "step": 27305 }, { "epoch": 0.79723219760007, "grad_norm": 0.5141319871395659, "learning_rate": 1.1265206812652068e-05, "loss": 0.5726, "step": 27306 }, { "epoch": 0.7972613938279174, "grad_norm": 0.5455538345937961, "learning_rate": 1.1263584752635848e-05, "loss": 0.68, "step": 27307 }, { "epoch": 0.7972905900557647, "grad_norm": 0.6243412870710097, "learning_rate": 1.1261962692619628e-05, "loss": 0.5675, "step": 27308 }, { "epoch": 0.7973197862836121, "grad_norm": 0.4822805501733842, "learning_rate": 1.1260340632603406e-05, "loss": 0.4963, "step": 27309 }, { "epoch": 0.7973489825114595, "grad_norm": 0.598503067987872, "learning_rate": 1.1258718572587186e-05, "loss": 0.6608, "step": 27310 }, { "epoch": 0.7973781787393068, "grad_norm": 0.5220968998255381, "learning_rate": 1.1257096512570965e-05, "loss": 0.6138, "step": 27311 }, { "epoch": 0.7974073749671542, "grad_norm": 0.5516199142964961, "learning_rate": 1.1255474452554745e-05, "loss": 0.6266, "step": 27312 }, { "epoch": 0.7974365711950016, "grad_norm": 0.5172900640695599, "learning_rate": 1.1253852392538525e-05, "loss": 0.5968, "step": 27313 }, { "epoch": 0.7974657674228489, "grad_norm": 0.4864200457897954, "learning_rate": 1.1252230332522303e-05, "loss": 0.5279, "step": 27314 }, { "epoch": 0.7974949636506963, "grad_norm": 0.5359214443297473, "learning_rate": 1.1250608272506083e-05, "loss": 0.5702, "step": 27315 }, { "epoch": 0.7975241598785436, "grad_norm": 0.5462712790658397, "learning_rate": 1.1248986212489862e-05, "loss": 0.5938, "step": 27316 }, { "epoch": 0.7975533561063911, "grad_norm": 0.5291192894815497, "learning_rate": 1.1247364152473642e-05, "loss": 0.5969, "step": 27317 }, { "epoch": 0.7975825523342385, "grad_norm": 0.520231282314474, "learning_rate": 1.1245742092457422e-05, "loss": 0.5692, "step": 27318 }, { "epoch": 0.7976117485620858, "grad_norm": 0.5218867725069499, "learning_rate": 1.12441200324412e-05, "loss": 0.587, "step": 27319 }, { "epoch": 0.7976409447899332, "grad_norm": 0.5022518767194396, "learning_rate": 1.124249797242498e-05, "loss": 0.569, "step": 27320 }, { "epoch": 0.7976701410177806, "grad_norm": 0.5348574152772411, "learning_rate": 1.1240875912408759e-05, "loss": 0.5899, "step": 27321 }, { "epoch": 0.7976993372456279, "grad_norm": 0.5188569599773257, "learning_rate": 1.1239253852392539e-05, "loss": 0.5792, "step": 27322 }, { "epoch": 0.7977285334734753, "grad_norm": 0.5256859008465408, "learning_rate": 1.1237631792376319e-05, "loss": 0.6384, "step": 27323 }, { "epoch": 0.7977577297013226, "grad_norm": 0.6034794031135193, "learning_rate": 1.1236009732360097e-05, "loss": 0.6377, "step": 27324 }, { "epoch": 0.79778692592917, "grad_norm": 0.4939683609022529, "learning_rate": 1.1234387672343877e-05, "loss": 0.538, "step": 27325 }, { "epoch": 0.7978161221570174, "grad_norm": 0.536197979620001, "learning_rate": 1.1232765612327657e-05, "loss": 0.617, "step": 27326 }, { "epoch": 0.7978453183848647, "grad_norm": 0.5178977716150124, "learning_rate": 1.1231143552311436e-05, "loss": 0.5933, "step": 27327 }, { "epoch": 0.7978745146127121, "grad_norm": 0.4820565048, "learning_rate": 1.1229521492295216e-05, "loss": 0.5028, "step": 27328 }, { "epoch": 0.7979037108405594, "grad_norm": 0.48836428220662736, "learning_rate": 1.1227899432278994e-05, "loss": 0.5215, "step": 27329 }, { "epoch": 0.7979329070684068, "grad_norm": 0.5863911965230392, "learning_rate": 1.1226277372262774e-05, "loss": 0.6912, "step": 27330 }, { "epoch": 0.7979621032962542, "grad_norm": 0.4947261029232288, "learning_rate": 1.1224655312246554e-05, "loss": 0.5117, "step": 27331 }, { "epoch": 0.7979912995241015, "grad_norm": 0.5227439336713368, "learning_rate": 1.1223033252230333e-05, "loss": 0.585, "step": 27332 }, { "epoch": 0.7980204957519489, "grad_norm": 0.4977986124508057, "learning_rate": 1.1221411192214113e-05, "loss": 0.5158, "step": 27333 }, { "epoch": 0.7980496919797962, "grad_norm": 0.4833729823256309, "learning_rate": 1.1219789132197891e-05, "loss": 0.5197, "step": 27334 }, { "epoch": 0.7980788882076436, "grad_norm": 0.5160907562919446, "learning_rate": 1.1218167072181671e-05, "loss": 0.5893, "step": 27335 }, { "epoch": 0.798108084435491, "grad_norm": 0.5756931270635938, "learning_rate": 1.1216545012165451e-05, "loss": 0.7006, "step": 27336 }, { "epoch": 0.7981372806633383, "grad_norm": 0.48970150309040433, "learning_rate": 1.121492295214923e-05, "loss": 0.5469, "step": 27337 }, { "epoch": 0.7981664768911857, "grad_norm": 0.5197062829026009, "learning_rate": 1.121330089213301e-05, "loss": 0.5645, "step": 27338 }, { "epoch": 0.7981956731190331, "grad_norm": 0.5432572861319861, "learning_rate": 1.1211678832116788e-05, "loss": 0.5996, "step": 27339 }, { "epoch": 0.7982248693468804, "grad_norm": 0.5097547207416008, "learning_rate": 1.1210056772100568e-05, "loss": 0.5243, "step": 27340 }, { "epoch": 0.7982540655747278, "grad_norm": 0.5284649945432024, "learning_rate": 1.1208434712084348e-05, "loss": 0.5842, "step": 27341 }, { "epoch": 0.7982832618025751, "grad_norm": 0.547832162142784, "learning_rate": 1.1206812652068127e-05, "loss": 0.6187, "step": 27342 }, { "epoch": 0.7983124580304225, "grad_norm": 0.5296405232347693, "learning_rate": 1.1205190592051907e-05, "loss": 0.5742, "step": 27343 }, { "epoch": 0.7983416542582699, "grad_norm": 0.5250238439075793, "learning_rate": 1.1203568532035685e-05, "loss": 0.5961, "step": 27344 }, { "epoch": 0.7983708504861172, "grad_norm": 0.5233699541809917, "learning_rate": 1.1201946472019465e-05, "loss": 0.6136, "step": 27345 }, { "epoch": 0.7984000467139646, "grad_norm": 0.5551993418433012, "learning_rate": 1.1200324412003245e-05, "loss": 0.5987, "step": 27346 }, { "epoch": 0.798429242941812, "grad_norm": 0.5459002165349046, "learning_rate": 1.1198702351987024e-05, "loss": 0.6474, "step": 27347 }, { "epoch": 0.7984584391696593, "grad_norm": 0.4899840502602192, "learning_rate": 1.1197080291970804e-05, "loss": 0.5395, "step": 27348 }, { "epoch": 0.7984876353975067, "grad_norm": 0.5058383818570793, "learning_rate": 1.1195458231954582e-05, "loss": 0.5622, "step": 27349 }, { "epoch": 0.798516831625354, "grad_norm": 0.5493773919586008, "learning_rate": 1.1193836171938362e-05, "loss": 0.6554, "step": 27350 }, { "epoch": 0.7985460278532014, "grad_norm": 0.527606661888866, "learning_rate": 1.1192214111922142e-05, "loss": 0.5715, "step": 27351 }, { "epoch": 0.7985752240810488, "grad_norm": 0.4629185282355523, "learning_rate": 1.119059205190592e-05, "loss": 0.4793, "step": 27352 }, { "epoch": 0.7986044203088961, "grad_norm": 0.5083228646607948, "learning_rate": 1.11889699918897e-05, "loss": 0.563, "step": 27353 }, { "epoch": 0.7986336165367435, "grad_norm": 0.5138030356993197, "learning_rate": 1.1187347931873479e-05, "loss": 0.5625, "step": 27354 }, { "epoch": 0.7986628127645908, "grad_norm": 0.48296423644027386, "learning_rate": 1.1185725871857259e-05, "loss": 0.5006, "step": 27355 }, { "epoch": 0.7986920089924382, "grad_norm": 0.497073652089067, "learning_rate": 1.1184103811841039e-05, "loss": 0.5367, "step": 27356 }, { "epoch": 0.7987212052202856, "grad_norm": 0.514967790423098, "learning_rate": 1.1182481751824817e-05, "loss": 0.5521, "step": 27357 }, { "epoch": 0.7987504014481329, "grad_norm": 0.48747633257023143, "learning_rate": 1.1180859691808598e-05, "loss": 0.5135, "step": 27358 }, { "epoch": 0.7987795976759803, "grad_norm": 0.5309485881623871, "learning_rate": 1.1179237631792378e-05, "loss": 0.5681, "step": 27359 }, { "epoch": 0.7988087939038276, "grad_norm": 0.5539625925397211, "learning_rate": 1.1177615571776156e-05, "loss": 0.662, "step": 27360 }, { "epoch": 0.798837990131675, "grad_norm": 0.4750702778431876, "learning_rate": 1.1175993511759936e-05, "loss": 0.4744, "step": 27361 }, { "epoch": 0.7988671863595224, "grad_norm": 0.5271448677804917, "learning_rate": 1.1174371451743714e-05, "loss": 0.5797, "step": 27362 }, { "epoch": 0.7988963825873697, "grad_norm": 0.525040218789656, "learning_rate": 1.1172749391727494e-05, "loss": 0.5842, "step": 27363 }, { "epoch": 0.7989255788152171, "grad_norm": 0.5477534994671854, "learning_rate": 1.1171127331711275e-05, "loss": 0.6531, "step": 27364 }, { "epoch": 0.7989547750430644, "grad_norm": 0.5183533033902546, "learning_rate": 1.1169505271695053e-05, "loss": 0.5745, "step": 27365 }, { "epoch": 0.7989839712709118, "grad_norm": 0.5118206398480212, "learning_rate": 1.1167883211678833e-05, "loss": 0.6504, "step": 27366 }, { "epoch": 0.7990131674987592, "grad_norm": 0.5046372505279009, "learning_rate": 1.1166261151662611e-05, "loss": 0.5924, "step": 27367 }, { "epoch": 0.7990423637266065, "grad_norm": 0.5308908121145627, "learning_rate": 1.1164639091646391e-05, "loss": 0.5866, "step": 27368 }, { "epoch": 0.7990715599544539, "grad_norm": 0.5384340005284485, "learning_rate": 1.1163017031630171e-05, "loss": 0.6449, "step": 27369 }, { "epoch": 0.7991007561823013, "grad_norm": 0.5242065141423052, "learning_rate": 1.116139497161395e-05, "loss": 0.605, "step": 27370 }, { "epoch": 0.7991299524101486, "grad_norm": 0.49952403356398944, "learning_rate": 1.115977291159773e-05, "loss": 0.5669, "step": 27371 }, { "epoch": 0.799159148637996, "grad_norm": 0.5236772288818433, "learning_rate": 1.1158150851581508e-05, "loss": 0.6466, "step": 27372 }, { "epoch": 0.7991883448658433, "grad_norm": 0.525574385330973, "learning_rate": 1.1156528791565288e-05, "loss": 0.5857, "step": 27373 }, { "epoch": 0.7992175410936907, "grad_norm": 0.5285755412720201, "learning_rate": 1.1154906731549068e-05, "loss": 0.616, "step": 27374 }, { "epoch": 0.7992467373215381, "grad_norm": 0.49954080053512867, "learning_rate": 1.1153284671532847e-05, "loss": 0.5208, "step": 27375 }, { "epoch": 0.7992759335493854, "grad_norm": 0.5527619404778454, "learning_rate": 1.1151662611516627e-05, "loss": 0.6602, "step": 27376 }, { "epoch": 0.7993051297772328, "grad_norm": 0.5295988166921738, "learning_rate": 1.1150040551500405e-05, "loss": 0.5977, "step": 27377 }, { "epoch": 0.7993343260050801, "grad_norm": 0.5170724197401116, "learning_rate": 1.1148418491484185e-05, "loss": 0.5462, "step": 27378 }, { "epoch": 0.7993635222329275, "grad_norm": 0.5491816315095945, "learning_rate": 1.1146796431467965e-05, "loss": 0.6347, "step": 27379 }, { "epoch": 0.7993927184607749, "grad_norm": 0.5164573228561861, "learning_rate": 1.1145174371451744e-05, "loss": 0.5815, "step": 27380 }, { "epoch": 0.7994219146886222, "grad_norm": 0.524134689342531, "learning_rate": 1.1143552311435524e-05, "loss": 0.5708, "step": 27381 }, { "epoch": 0.7994511109164696, "grad_norm": 0.4598685023616416, "learning_rate": 1.1141930251419302e-05, "loss": 0.4371, "step": 27382 }, { "epoch": 0.799480307144317, "grad_norm": 0.46982430559017513, "learning_rate": 1.1140308191403082e-05, "loss": 0.4719, "step": 27383 }, { "epoch": 0.7995095033721643, "grad_norm": 0.5317338281098107, "learning_rate": 1.1138686131386862e-05, "loss": 0.6001, "step": 27384 }, { "epoch": 0.7995386996000117, "grad_norm": 0.4956143349421949, "learning_rate": 1.113706407137064e-05, "loss": 0.5559, "step": 27385 }, { "epoch": 0.799567895827859, "grad_norm": 0.5804394048948252, "learning_rate": 1.113544201135442e-05, "loss": 0.689, "step": 27386 }, { "epoch": 0.7995970920557064, "grad_norm": 0.5137372010771116, "learning_rate": 1.11338199513382e-05, "loss": 0.5149, "step": 27387 }, { "epoch": 0.7996262882835538, "grad_norm": 0.532701618567573, "learning_rate": 1.113219789132198e-05, "loss": 0.5706, "step": 27388 }, { "epoch": 0.7996554845114011, "grad_norm": 0.511517190711149, "learning_rate": 1.113057583130576e-05, "loss": 0.5663, "step": 27389 }, { "epoch": 0.7996846807392485, "grad_norm": 0.5316884439192764, "learning_rate": 1.1128953771289538e-05, "loss": 0.5896, "step": 27390 }, { "epoch": 0.7997138769670958, "grad_norm": 0.5134152740950028, "learning_rate": 1.1127331711273318e-05, "loss": 0.5698, "step": 27391 }, { "epoch": 0.7997430731949432, "grad_norm": 0.48670358618951737, "learning_rate": 1.1125709651257098e-05, "loss": 0.5082, "step": 27392 }, { "epoch": 0.7997722694227906, "grad_norm": 0.514419488777274, "learning_rate": 1.1124087591240876e-05, "loss": 0.5711, "step": 27393 }, { "epoch": 0.7998014656506379, "grad_norm": 0.5222542364625578, "learning_rate": 1.1122465531224656e-05, "loss": 0.5648, "step": 27394 }, { "epoch": 0.7998306618784853, "grad_norm": 0.5361373137849798, "learning_rate": 1.1120843471208435e-05, "loss": 0.5687, "step": 27395 }, { "epoch": 0.7998598581063326, "grad_norm": 0.5027210528654165, "learning_rate": 1.1119221411192215e-05, "loss": 0.5566, "step": 27396 }, { "epoch": 0.79988905433418, "grad_norm": 0.5331679652653248, "learning_rate": 1.1117599351175995e-05, "loss": 0.6259, "step": 27397 }, { "epoch": 0.7999182505620274, "grad_norm": 0.5433654813999224, "learning_rate": 1.1115977291159773e-05, "loss": 0.6243, "step": 27398 }, { "epoch": 0.7999474467898747, "grad_norm": 0.5505170715596275, "learning_rate": 1.1114355231143553e-05, "loss": 0.6106, "step": 27399 }, { "epoch": 0.7999766430177221, "grad_norm": 0.5178152941028447, "learning_rate": 1.1112733171127332e-05, "loss": 0.5168, "step": 27400 }, { "epoch": 0.8000058392455695, "grad_norm": 0.5157000238035849, "learning_rate": 1.1111111111111112e-05, "loss": 0.6121, "step": 27401 }, { "epoch": 0.8000350354734168, "grad_norm": 0.5174299179804135, "learning_rate": 1.1109489051094892e-05, "loss": 0.5804, "step": 27402 }, { "epoch": 0.8000642317012642, "grad_norm": 0.536730302398107, "learning_rate": 1.110786699107867e-05, "loss": 0.5678, "step": 27403 }, { "epoch": 0.8000934279291115, "grad_norm": 0.5447544853292251, "learning_rate": 1.110624493106245e-05, "loss": 0.581, "step": 27404 }, { "epoch": 0.8001226241569589, "grad_norm": 0.4868988381201487, "learning_rate": 1.1104622871046229e-05, "loss": 0.4716, "step": 27405 }, { "epoch": 0.8001518203848063, "grad_norm": 0.5198418278367065, "learning_rate": 1.1103000811030009e-05, "loss": 0.5775, "step": 27406 }, { "epoch": 0.8001810166126536, "grad_norm": 0.5530106561859977, "learning_rate": 1.1101378751013789e-05, "loss": 0.607, "step": 27407 }, { "epoch": 0.800210212840501, "grad_norm": 0.5086620378179458, "learning_rate": 1.1099756690997567e-05, "loss": 0.5618, "step": 27408 }, { "epoch": 0.8002394090683483, "grad_norm": 0.5368293535576031, "learning_rate": 1.1098134630981347e-05, "loss": 0.564, "step": 27409 }, { "epoch": 0.8002686052961957, "grad_norm": 0.5220408900924812, "learning_rate": 1.1096512570965125e-05, "loss": 0.5769, "step": 27410 }, { "epoch": 0.8002978015240431, "grad_norm": 0.5007846910612685, "learning_rate": 1.1094890510948906e-05, "loss": 0.5207, "step": 27411 }, { "epoch": 0.8003269977518904, "grad_norm": 0.540853834108867, "learning_rate": 1.1093268450932686e-05, "loss": 0.5741, "step": 27412 }, { "epoch": 0.8003561939797378, "grad_norm": 0.5212232147472486, "learning_rate": 1.1091646390916464e-05, "loss": 0.5695, "step": 27413 }, { "epoch": 0.8003853902075851, "grad_norm": 0.5412382392823474, "learning_rate": 1.1090024330900244e-05, "loss": 0.6131, "step": 27414 }, { "epoch": 0.8004145864354325, "grad_norm": 0.5498644665478287, "learning_rate": 1.1088402270884022e-05, "loss": 0.6377, "step": 27415 }, { "epoch": 0.8004437826632799, "grad_norm": 0.5382613544121443, "learning_rate": 1.1086780210867802e-05, "loss": 0.6301, "step": 27416 }, { "epoch": 0.8004729788911272, "grad_norm": 0.5932208856590072, "learning_rate": 1.1085158150851583e-05, "loss": 0.7166, "step": 27417 }, { "epoch": 0.8005021751189746, "grad_norm": 0.5325648614866436, "learning_rate": 1.1083536090835361e-05, "loss": 0.5587, "step": 27418 }, { "epoch": 0.800531371346822, "grad_norm": 0.503861583489202, "learning_rate": 1.1081914030819141e-05, "loss": 0.5104, "step": 27419 }, { "epoch": 0.8005605675746693, "grad_norm": 0.49387307061380054, "learning_rate": 1.108029197080292e-05, "loss": 0.5535, "step": 27420 }, { "epoch": 0.8005897638025167, "grad_norm": 0.545258360810892, "learning_rate": 1.10786699107867e-05, "loss": 0.6104, "step": 27421 }, { "epoch": 0.800618960030364, "grad_norm": 0.48887888802239543, "learning_rate": 1.107704785077048e-05, "loss": 0.5205, "step": 27422 }, { "epoch": 0.8006481562582114, "grad_norm": 0.4700004708865358, "learning_rate": 1.1075425790754258e-05, "loss": 0.498, "step": 27423 }, { "epoch": 0.8006773524860588, "grad_norm": 0.5549276597061878, "learning_rate": 1.1073803730738038e-05, "loss": 0.518, "step": 27424 }, { "epoch": 0.8007065487139061, "grad_norm": 0.5106074558246274, "learning_rate": 1.1072181670721818e-05, "loss": 0.5172, "step": 27425 }, { "epoch": 0.8007357449417535, "grad_norm": 0.4666235524162499, "learning_rate": 1.1070559610705596e-05, "loss": 0.4886, "step": 27426 }, { "epoch": 0.8007649411696008, "grad_norm": 0.4855185688077137, "learning_rate": 1.1068937550689376e-05, "loss": 0.5059, "step": 27427 }, { "epoch": 0.8007941373974482, "grad_norm": 0.5443513121344489, "learning_rate": 1.1067315490673155e-05, "loss": 0.5771, "step": 27428 }, { "epoch": 0.8008233336252956, "grad_norm": 0.5306588624137316, "learning_rate": 1.1065693430656935e-05, "loss": 0.5707, "step": 27429 }, { "epoch": 0.8008525298531429, "grad_norm": 0.5462658654148144, "learning_rate": 1.1064071370640715e-05, "loss": 0.6752, "step": 27430 }, { "epoch": 0.8008817260809903, "grad_norm": 0.5451266626926199, "learning_rate": 1.1062449310624493e-05, "loss": 0.6353, "step": 27431 }, { "epoch": 0.8009109223088376, "grad_norm": 0.5482731222026045, "learning_rate": 1.1060827250608273e-05, "loss": 0.554, "step": 27432 }, { "epoch": 0.800940118536685, "grad_norm": 0.5665681175830442, "learning_rate": 1.1059205190592052e-05, "loss": 0.6781, "step": 27433 }, { "epoch": 0.8009693147645324, "grad_norm": 0.5144293901302728, "learning_rate": 1.1057583130575832e-05, "loss": 0.5699, "step": 27434 }, { "epoch": 0.8009985109923797, "grad_norm": 0.5166046165365518, "learning_rate": 1.1055961070559612e-05, "loss": 0.5658, "step": 27435 }, { "epoch": 0.8010277072202271, "grad_norm": 0.4989283182093422, "learning_rate": 1.105433901054339e-05, "loss": 0.5635, "step": 27436 }, { "epoch": 0.8010569034480746, "grad_norm": 0.49085408939149144, "learning_rate": 1.105271695052717e-05, "loss": 0.5227, "step": 27437 }, { "epoch": 0.8010860996759219, "grad_norm": 0.5501137152508737, "learning_rate": 1.1051094890510949e-05, "loss": 0.5889, "step": 27438 }, { "epoch": 0.8011152959037693, "grad_norm": 0.5487529052123375, "learning_rate": 1.1049472830494729e-05, "loss": 0.5841, "step": 27439 }, { "epoch": 0.8011444921316166, "grad_norm": 0.5380479267901317, "learning_rate": 1.1047850770478509e-05, "loss": 0.6356, "step": 27440 }, { "epoch": 0.801173688359464, "grad_norm": 0.5412823870405442, "learning_rate": 1.1046228710462287e-05, "loss": 0.6381, "step": 27441 }, { "epoch": 0.8012028845873114, "grad_norm": 0.5259820908388413, "learning_rate": 1.1044606650446067e-05, "loss": 0.6012, "step": 27442 }, { "epoch": 0.8012320808151587, "grad_norm": 0.5207583847071945, "learning_rate": 1.1042984590429846e-05, "loss": 0.5101, "step": 27443 }, { "epoch": 0.8012612770430061, "grad_norm": 0.5366101618730544, "learning_rate": 1.1041362530413626e-05, "loss": 0.6272, "step": 27444 }, { "epoch": 0.8012904732708535, "grad_norm": 0.5073952293573595, "learning_rate": 1.1039740470397406e-05, "loss": 0.5591, "step": 27445 }, { "epoch": 0.8013196694987008, "grad_norm": 0.5430231986070412, "learning_rate": 1.1038118410381184e-05, "loss": 0.6476, "step": 27446 }, { "epoch": 0.8013488657265482, "grad_norm": 0.4980776353190739, "learning_rate": 1.1036496350364964e-05, "loss": 0.5365, "step": 27447 }, { "epoch": 0.8013780619543955, "grad_norm": 0.4756136950473372, "learning_rate": 1.1034874290348743e-05, "loss": 0.4698, "step": 27448 }, { "epoch": 0.8014072581822429, "grad_norm": 0.5782246731416573, "learning_rate": 1.1033252230332523e-05, "loss": 0.6064, "step": 27449 }, { "epoch": 0.8014364544100903, "grad_norm": 0.5116759214035459, "learning_rate": 1.1031630170316303e-05, "loss": 0.523, "step": 27450 }, { "epoch": 0.8014656506379376, "grad_norm": 0.603081666710934, "learning_rate": 1.1030008110300081e-05, "loss": 0.6646, "step": 27451 }, { "epoch": 0.801494846865785, "grad_norm": 0.5469730269989804, "learning_rate": 1.1028386050283861e-05, "loss": 0.5992, "step": 27452 }, { "epoch": 0.8015240430936323, "grad_norm": 0.49284221146721013, "learning_rate": 1.102676399026764e-05, "loss": 0.5224, "step": 27453 }, { "epoch": 0.8015532393214797, "grad_norm": 0.48239249621580976, "learning_rate": 1.102514193025142e-05, "loss": 0.4876, "step": 27454 }, { "epoch": 0.8015824355493271, "grad_norm": 0.4702084706082524, "learning_rate": 1.10235198702352e-05, "loss": 0.5101, "step": 27455 }, { "epoch": 0.8016116317771744, "grad_norm": 0.4974602257845095, "learning_rate": 1.1021897810218978e-05, "loss": 0.5429, "step": 27456 }, { "epoch": 0.8016408280050218, "grad_norm": 0.5298216749978103, "learning_rate": 1.1020275750202758e-05, "loss": 0.6097, "step": 27457 }, { "epoch": 0.8016700242328691, "grad_norm": 0.48483482808485806, "learning_rate": 1.1018653690186538e-05, "loss": 0.5261, "step": 27458 }, { "epoch": 0.8016992204607165, "grad_norm": 0.5353230058483969, "learning_rate": 1.1017031630170317e-05, "loss": 0.5936, "step": 27459 }, { "epoch": 0.8017284166885639, "grad_norm": 0.5496141855787078, "learning_rate": 1.1015409570154097e-05, "loss": 0.6568, "step": 27460 }, { "epoch": 0.8017576129164112, "grad_norm": 0.5408659529815073, "learning_rate": 1.1013787510137875e-05, "loss": 0.6258, "step": 27461 }, { "epoch": 0.8017868091442586, "grad_norm": 0.4869750018961557, "learning_rate": 1.1012165450121655e-05, "loss": 0.5219, "step": 27462 }, { "epoch": 0.801816005372106, "grad_norm": 0.49735850551637556, "learning_rate": 1.1010543390105435e-05, "loss": 0.5582, "step": 27463 }, { "epoch": 0.8018452015999533, "grad_norm": 0.5162845813124958, "learning_rate": 1.1008921330089214e-05, "loss": 0.5444, "step": 27464 }, { "epoch": 0.8018743978278007, "grad_norm": 0.49824480808340466, "learning_rate": 1.1007299270072994e-05, "loss": 0.5799, "step": 27465 }, { "epoch": 0.801903594055648, "grad_norm": 0.5321629456776207, "learning_rate": 1.1005677210056772e-05, "loss": 0.6038, "step": 27466 }, { "epoch": 0.8019327902834954, "grad_norm": 0.5086728344213393, "learning_rate": 1.1004055150040552e-05, "loss": 0.5316, "step": 27467 }, { "epoch": 0.8019619865113428, "grad_norm": 0.5486782442785366, "learning_rate": 1.1002433090024332e-05, "loss": 0.6028, "step": 27468 }, { "epoch": 0.8019911827391901, "grad_norm": 0.5783852803972043, "learning_rate": 1.100081103000811e-05, "loss": 0.6489, "step": 27469 }, { "epoch": 0.8020203789670375, "grad_norm": 0.5590147415713176, "learning_rate": 1.099918896999189e-05, "loss": 0.6342, "step": 27470 }, { "epoch": 0.8020495751948848, "grad_norm": 0.5608223152095241, "learning_rate": 1.0997566909975669e-05, "loss": 0.6814, "step": 27471 }, { "epoch": 0.8020787714227322, "grad_norm": 0.5551559867331486, "learning_rate": 1.0995944849959449e-05, "loss": 0.6485, "step": 27472 }, { "epoch": 0.8021079676505796, "grad_norm": 0.5021060423312675, "learning_rate": 1.0994322789943229e-05, "loss": 0.5385, "step": 27473 }, { "epoch": 0.8021371638784269, "grad_norm": 0.5293169590031767, "learning_rate": 1.0992700729927007e-05, "loss": 0.5751, "step": 27474 }, { "epoch": 0.8021663601062743, "grad_norm": 0.5094882440397583, "learning_rate": 1.0991078669910788e-05, "loss": 0.5227, "step": 27475 }, { "epoch": 0.8021955563341217, "grad_norm": 0.5223219762199827, "learning_rate": 1.0989456609894566e-05, "loss": 0.5555, "step": 27476 }, { "epoch": 0.802224752561969, "grad_norm": 0.5150136969444987, "learning_rate": 1.0987834549878346e-05, "loss": 0.5582, "step": 27477 }, { "epoch": 0.8022539487898164, "grad_norm": 0.4962861569884658, "learning_rate": 1.0986212489862126e-05, "loss": 0.52, "step": 27478 }, { "epoch": 0.8022831450176637, "grad_norm": 0.5170475614758513, "learning_rate": 1.0984590429845904e-05, "loss": 0.5747, "step": 27479 }, { "epoch": 0.8023123412455111, "grad_norm": 0.5094673342690514, "learning_rate": 1.0982968369829684e-05, "loss": 0.5707, "step": 27480 }, { "epoch": 0.8023415374733585, "grad_norm": 0.5245504710919054, "learning_rate": 1.0981346309813463e-05, "loss": 0.5677, "step": 27481 }, { "epoch": 0.8023707337012058, "grad_norm": 0.5955873848611057, "learning_rate": 1.0979724249797243e-05, "loss": 0.6818, "step": 27482 }, { "epoch": 0.8023999299290532, "grad_norm": 0.49924888307094045, "learning_rate": 1.0978102189781023e-05, "loss": 0.5426, "step": 27483 }, { "epoch": 0.8024291261569005, "grad_norm": 0.4923357560183448, "learning_rate": 1.0976480129764801e-05, "loss": 0.5517, "step": 27484 }, { "epoch": 0.8024583223847479, "grad_norm": 0.5239189787315194, "learning_rate": 1.0974858069748581e-05, "loss": 0.6033, "step": 27485 }, { "epoch": 0.8024875186125953, "grad_norm": 0.4936587157678076, "learning_rate": 1.097323600973236e-05, "loss": 0.509, "step": 27486 }, { "epoch": 0.8025167148404426, "grad_norm": 0.5367966950242502, "learning_rate": 1.097161394971614e-05, "loss": 0.6096, "step": 27487 }, { "epoch": 0.80254591106829, "grad_norm": 0.544214543602821, "learning_rate": 1.096999188969992e-05, "loss": 0.6233, "step": 27488 }, { "epoch": 0.8025751072961373, "grad_norm": 0.5975956341435387, "learning_rate": 1.0968369829683698e-05, "loss": 0.7295, "step": 27489 }, { "epoch": 0.8026043035239847, "grad_norm": 0.5435127204003442, "learning_rate": 1.0966747769667478e-05, "loss": 0.6095, "step": 27490 }, { "epoch": 0.8026334997518321, "grad_norm": 0.5225027184790378, "learning_rate": 1.0965125709651258e-05, "loss": 0.5943, "step": 27491 }, { "epoch": 0.8026626959796794, "grad_norm": 0.49852556325002795, "learning_rate": 1.0963503649635037e-05, "loss": 0.5395, "step": 27492 }, { "epoch": 0.8026918922075268, "grad_norm": 0.48506167384339, "learning_rate": 1.0961881589618817e-05, "loss": 0.5112, "step": 27493 }, { "epoch": 0.8027210884353742, "grad_norm": 0.5044408857528593, "learning_rate": 1.0960259529602595e-05, "loss": 0.5418, "step": 27494 }, { "epoch": 0.8027502846632215, "grad_norm": 0.5565592242714554, "learning_rate": 1.0958637469586375e-05, "loss": 0.6087, "step": 27495 }, { "epoch": 0.8027794808910689, "grad_norm": 0.5497495464699117, "learning_rate": 1.0957015409570155e-05, "loss": 0.6393, "step": 27496 }, { "epoch": 0.8028086771189162, "grad_norm": 0.5277099468943728, "learning_rate": 1.0955393349553934e-05, "loss": 0.6112, "step": 27497 }, { "epoch": 0.8028378733467636, "grad_norm": 0.5668304233514936, "learning_rate": 1.0953771289537714e-05, "loss": 0.6592, "step": 27498 }, { "epoch": 0.802867069574611, "grad_norm": 0.49168988994818996, "learning_rate": 1.0952149229521492e-05, "loss": 0.5262, "step": 27499 }, { "epoch": 0.8028962658024583, "grad_norm": 0.5260768193784962, "learning_rate": 1.0950527169505272e-05, "loss": 0.5668, "step": 27500 }, { "epoch": 0.8029254620303057, "grad_norm": 0.51963422213448, "learning_rate": 1.0948905109489052e-05, "loss": 0.5545, "step": 27501 }, { "epoch": 0.802954658258153, "grad_norm": 0.5219227267740378, "learning_rate": 1.094728304947283e-05, "loss": 0.5807, "step": 27502 }, { "epoch": 0.8029838544860004, "grad_norm": 0.5236760378196068, "learning_rate": 1.094566098945661e-05, "loss": 0.5727, "step": 27503 }, { "epoch": 0.8030130507138478, "grad_norm": 0.5387857341473221, "learning_rate": 1.094403892944039e-05, "loss": 0.6114, "step": 27504 }, { "epoch": 0.8030422469416951, "grad_norm": 0.5117920010708138, "learning_rate": 1.094241686942417e-05, "loss": 0.5331, "step": 27505 }, { "epoch": 0.8030714431695425, "grad_norm": 0.5166640899266844, "learning_rate": 1.094079480940795e-05, "loss": 0.5559, "step": 27506 }, { "epoch": 0.8031006393973898, "grad_norm": 0.5260445535878072, "learning_rate": 1.0939172749391728e-05, "loss": 0.6159, "step": 27507 }, { "epoch": 0.8031298356252372, "grad_norm": 0.5463334834736061, "learning_rate": 1.0937550689375508e-05, "loss": 0.564, "step": 27508 }, { "epoch": 0.8031590318530846, "grad_norm": 0.5282617656545195, "learning_rate": 1.0935928629359286e-05, "loss": 0.6046, "step": 27509 }, { "epoch": 0.8031882280809319, "grad_norm": 0.5223013252163164, "learning_rate": 1.0934306569343066e-05, "loss": 0.6013, "step": 27510 }, { "epoch": 0.8032174243087793, "grad_norm": 0.5200821976999349, "learning_rate": 1.0932684509326846e-05, "loss": 0.5643, "step": 27511 }, { "epoch": 0.8032466205366267, "grad_norm": 0.577852688716977, "learning_rate": 1.0931062449310625e-05, "loss": 0.6563, "step": 27512 }, { "epoch": 0.803275816764474, "grad_norm": 0.5357990786397386, "learning_rate": 1.0929440389294405e-05, "loss": 0.6165, "step": 27513 }, { "epoch": 0.8033050129923214, "grad_norm": 0.5369132517852467, "learning_rate": 1.0927818329278183e-05, "loss": 0.6383, "step": 27514 }, { "epoch": 0.8033342092201687, "grad_norm": 0.5292847567061951, "learning_rate": 1.0926196269261963e-05, "loss": 0.6182, "step": 27515 }, { "epoch": 0.8033634054480161, "grad_norm": 0.5380719823495286, "learning_rate": 1.0924574209245743e-05, "loss": 0.617, "step": 27516 }, { "epoch": 0.8033926016758635, "grad_norm": 0.5483623332386448, "learning_rate": 1.0922952149229522e-05, "loss": 0.6723, "step": 27517 }, { "epoch": 0.8034217979037108, "grad_norm": 0.5367022817940315, "learning_rate": 1.0921330089213302e-05, "loss": 0.6063, "step": 27518 }, { "epoch": 0.8034509941315582, "grad_norm": 0.5598626776434225, "learning_rate": 1.091970802919708e-05, "loss": 0.6576, "step": 27519 }, { "epoch": 0.8034801903594055, "grad_norm": 0.509055072533307, "learning_rate": 1.091808596918086e-05, "loss": 0.5834, "step": 27520 }, { "epoch": 0.8035093865872529, "grad_norm": 0.5258558179878176, "learning_rate": 1.091646390916464e-05, "loss": 0.6144, "step": 27521 }, { "epoch": 0.8035385828151003, "grad_norm": 0.5125729126629692, "learning_rate": 1.0914841849148419e-05, "loss": 0.5564, "step": 27522 }, { "epoch": 0.8035677790429476, "grad_norm": 0.5090543953751624, "learning_rate": 1.0913219789132199e-05, "loss": 0.5948, "step": 27523 }, { "epoch": 0.803596975270795, "grad_norm": 0.5318275670619099, "learning_rate": 1.0911597729115979e-05, "loss": 0.592, "step": 27524 }, { "epoch": 0.8036261714986423, "grad_norm": 0.5502802487433212, "learning_rate": 1.0909975669099757e-05, "loss": 0.6178, "step": 27525 }, { "epoch": 0.8036553677264897, "grad_norm": 0.49673587410918163, "learning_rate": 1.0908353609083537e-05, "loss": 0.5664, "step": 27526 }, { "epoch": 0.8036845639543371, "grad_norm": 0.5338743062685852, "learning_rate": 1.0906731549067316e-05, "loss": 0.633, "step": 27527 }, { "epoch": 0.8037137601821844, "grad_norm": 0.53413168351679, "learning_rate": 1.0905109489051096e-05, "loss": 0.6134, "step": 27528 }, { "epoch": 0.8037429564100318, "grad_norm": 0.5342019904797614, "learning_rate": 1.0903487429034876e-05, "loss": 0.5608, "step": 27529 }, { "epoch": 0.8037721526378792, "grad_norm": 0.4891842625077199, "learning_rate": 1.0901865369018654e-05, "loss": 0.5315, "step": 27530 }, { "epoch": 0.8038013488657265, "grad_norm": 0.5783353455201681, "learning_rate": 1.0900243309002434e-05, "loss": 0.653, "step": 27531 }, { "epoch": 0.8038305450935739, "grad_norm": 0.5425924419292133, "learning_rate": 1.0898621248986212e-05, "loss": 0.6127, "step": 27532 }, { "epoch": 0.8038597413214212, "grad_norm": 0.5134551574341669, "learning_rate": 1.0896999188969993e-05, "loss": 0.5297, "step": 27533 }, { "epoch": 0.8038889375492686, "grad_norm": 0.5225882874080721, "learning_rate": 1.0895377128953773e-05, "loss": 0.5954, "step": 27534 }, { "epoch": 0.803918133777116, "grad_norm": 0.5827463312721869, "learning_rate": 1.0893755068937551e-05, "loss": 0.7152, "step": 27535 }, { "epoch": 0.8039473300049633, "grad_norm": 0.5433754622770164, "learning_rate": 1.0892133008921331e-05, "loss": 0.6307, "step": 27536 }, { "epoch": 0.8039765262328107, "grad_norm": 0.49677295075524236, "learning_rate": 1.089051094890511e-05, "loss": 0.5166, "step": 27537 }, { "epoch": 0.804005722460658, "grad_norm": 0.5043191433942749, "learning_rate": 1.088888888888889e-05, "loss": 0.5449, "step": 27538 }, { "epoch": 0.8040349186885054, "grad_norm": 0.5127820810397051, "learning_rate": 1.088726682887267e-05, "loss": 0.5428, "step": 27539 }, { "epoch": 0.8040641149163528, "grad_norm": 0.5642881048714101, "learning_rate": 1.0885644768856448e-05, "loss": 0.6769, "step": 27540 }, { "epoch": 0.8040933111442001, "grad_norm": 0.5447103455961397, "learning_rate": 1.0884022708840228e-05, "loss": 0.6021, "step": 27541 }, { "epoch": 0.8041225073720475, "grad_norm": 0.5292837038050917, "learning_rate": 1.0882400648824006e-05, "loss": 0.6147, "step": 27542 }, { "epoch": 0.8041517035998949, "grad_norm": 0.5531722268146528, "learning_rate": 1.0880778588807786e-05, "loss": 0.5734, "step": 27543 }, { "epoch": 0.8041808998277422, "grad_norm": 0.510359779574927, "learning_rate": 1.0879156528791566e-05, "loss": 0.5702, "step": 27544 }, { "epoch": 0.8042100960555896, "grad_norm": 0.520353638521631, "learning_rate": 1.0877534468775345e-05, "loss": 0.5933, "step": 27545 }, { "epoch": 0.8042392922834369, "grad_norm": 0.5237821115785166, "learning_rate": 1.0875912408759125e-05, "loss": 0.6081, "step": 27546 }, { "epoch": 0.8042684885112843, "grad_norm": 0.5189108739373803, "learning_rate": 1.0874290348742903e-05, "loss": 0.5434, "step": 27547 }, { "epoch": 0.8042976847391317, "grad_norm": 0.520647866874417, "learning_rate": 1.0872668288726683e-05, "loss": 0.5769, "step": 27548 }, { "epoch": 0.804326880966979, "grad_norm": 0.5517836015481713, "learning_rate": 1.0871046228710463e-05, "loss": 0.6452, "step": 27549 }, { "epoch": 0.8043560771948264, "grad_norm": 0.5460505309488535, "learning_rate": 1.0869424168694242e-05, "loss": 0.6191, "step": 27550 }, { "epoch": 0.8043852734226737, "grad_norm": 0.5332301048780406, "learning_rate": 1.0867802108678022e-05, "loss": 0.5925, "step": 27551 }, { "epoch": 0.8044144696505211, "grad_norm": 0.5327116267673478, "learning_rate": 1.08661800486618e-05, "loss": 0.6226, "step": 27552 }, { "epoch": 0.8044436658783685, "grad_norm": 0.515967761306132, "learning_rate": 1.086455798864558e-05, "loss": 0.5613, "step": 27553 }, { "epoch": 0.8044728621062158, "grad_norm": 0.4791369465920476, "learning_rate": 1.086293592862936e-05, "loss": 0.5401, "step": 27554 }, { "epoch": 0.8045020583340632, "grad_norm": 0.5161698545006498, "learning_rate": 1.0861313868613139e-05, "loss": 0.5712, "step": 27555 }, { "epoch": 0.8045312545619105, "grad_norm": 0.5466922225914366, "learning_rate": 1.0859691808596919e-05, "loss": 0.6242, "step": 27556 }, { "epoch": 0.8045604507897579, "grad_norm": 0.5736933806854301, "learning_rate": 1.0858069748580699e-05, "loss": 0.6604, "step": 27557 }, { "epoch": 0.8045896470176054, "grad_norm": 0.5346125676803853, "learning_rate": 1.0856447688564477e-05, "loss": 0.5827, "step": 27558 }, { "epoch": 0.8046188432454527, "grad_norm": 0.5280173040609817, "learning_rate": 1.0854825628548257e-05, "loss": 0.6157, "step": 27559 }, { "epoch": 0.8046480394733001, "grad_norm": 0.5281574623852802, "learning_rate": 1.0853203568532036e-05, "loss": 0.5839, "step": 27560 }, { "epoch": 0.8046772357011475, "grad_norm": 0.544863780509861, "learning_rate": 1.0851581508515816e-05, "loss": 0.6154, "step": 27561 }, { "epoch": 0.8047064319289948, "grad_norm": 0.5380544521891443, "learning_rate": 1.0849959448499596e-05, "loss": 0.6171, "step": 27562 }, { "epoch": 0.8047356281568422, "grad_norm": 0.5242491767028326, "learning_rate": 1.0848337388483374e-05, "loss": 0.565, "step": 27563 }, { "epoch": 0.8047648243846895, "grad_norm": 0.5391556679426183, "learning_rate": 1.0846715328467154e-05, "loss": 0.5767, "step": 27564 }, { "epoch": 0.8047940206125369, "grad_norm": 0.5015484150575981, "learning_rate": 1.0845093268450933e-05, "loss": 0.5465, "step": 27565 }, { "epoch": 0.8048232168403843, "grad_norm": 0.5357181308097857, "learning_rate": 1.0843471208434713e-05, "loss": 0.5771, "step": 27566 }, { "epoch": 0.8048524130682316, "grad_norm": 0.5281144885874417, "learning_rate": 1.0841849148418493e-05, "loss": 0.5871, "step": 27567 }, { "epoch": 0.804881609296079, "grad_norm": 0.5066620999247442, "learning_rate": 1.0840227088402271e-05, "loss": 0.5685, "step": 27568 }, { "epoch": 0.8049108055239264, "grad_norm": 0.5351514790723969, "learning_rate": 1.0838605028386051e-05, "loss": 0.6122, "step": 27569 }, { "epoch": 0.8049400017517737, "grad_norm": 0.5223540688476631, "learning_rate": 1.083698296836983e-05, "loss": 0.5622, "step": 27570 }, { "epoch": 0.8049691979796211, "grad_norm": 0.5288297953708313, "learning_rate": 1.083536090835361e-05, "loss": 0.6035, "step": 27571 }, { "epoch": 0.8049983942074684, "grad_norm": 0.5210942747006011, "learning_rate": 1.083373884833739e-05, "loss": 0.5818, "step": 27572 }, { "epoch": 0.8050275904353158, "grad_norm": 0.5177677728915435, "learning_rate": 1.0832116788321168e-05, "loss": 0.5459, "step": 27573 }, { "epoch": 0.8050567866631632, "grad_norm": 0.5031626649221381, "learning_rate": 1.0830494728304948e-05, "loss": 0.5252, "step": 27574 }, { "epoch": 0.8050859828910105, "grad_norm": 0.5383916996544617, "learning_rate": 1.0828872668288727e-05, "loss": 0.5892, "step": 27575 }, { "epoch": 0.8051151791188579, "grad_norm": 0.5681194758989486, "learning_rate": 1.0827250608272507e-05, "loss": 0.6489, "step": 27576 }, { "epoch": 0.8051443753467052, "grad_norm": 0.514078739808727, "learning_rate": 1.0825628548256287e-05, "loss": 0.5823, "step": 27577 }, { "epoch": 0.8051735715745526, "grad_norm": 0.5410128124352596, "learning_rate": 1.0824006488240065e-05, "loss": 0.6186, "step": 27578 }, { "epoch": 0.8052027678024, "grad_norm": 0.5009826865682806, "learning_rate": 1.0822384428223845e-05, "loss": 0.5448, "step": 27579 }, { "epoch": 0.8052319640302473, "grad_norm": 0.5453350087297802, "learning_rate": 1.0820762368207624e-05, "loss": 0.6442, "step": 27580 }, { "epoch": 0.8052611602580947, "grad_norm": 0.5926491559348769, "learning_rate": 1.0819140308191404e-05, "loss": 0.7342, "step": 27581 }, { "epoch": 0.805290356485942, "grad_norm": 0.5265153265400548, "learning_rate": 1.0817518248175184e-05, "loss": 0.5878, "step": 27582 }, { "epoch": 0.8053195527137894, "grad_norm": 0.5260532765279887, "learning_rate": 1.0815896188158962e-05, "loss": 0.6048, "step": 27583 }, { "epoch": 0.8053487489416368, "grad_norm": 0.5028096871078743, "learning_rate": 1.0814274128142742e-05, "loss": 0.588, "step": 27584 }, { "epoch": 0.8053779451694841, "grad_norm": 0.5157164077932337, "learning_rate": 1.081265206812652e-05, "loss": 0.5712, "step": 27585 }, { "epoch": 0.8054071413973315, "grad_norm": 0.4737504426826351, "learning_rate": 1.08110300081103e-05, "loss": 0.5101, "step": 27586 }, { "epoch": 0.8054363376251789, "grad_norm": 0.5413985955891868, "learning_rate": 1.080940794809408e-05, "loss": 0.56, "step": 27587 }, { "epoch": 0.8054655338530262, "grad_norm": 0.6313230113178367, "learning_rate": 1.0807785888077859e-05, "loss": 0.5993, "step": 27588 }, { "epoch": 0.8054947300808736, "grad_norm": 0.5115980229049433, "learning_rate": 1.0806163828061639e-05, "loss": 0.5797, "step": 27589 }, { "epoch": 0.8055239263087209, "grad_norm": 0.5178939622063721, "learning_rate": 1.0804541768045417e-05, "loss": 0.5328, "step": 27590 }, { "epoch": 0.8055531225365683, "grad_norm": 0.5120278249042928, "learning_rate": 1.0802919708029198e-05, "loss": 0.5755, "step": 27591 }, { "epoch": 0.8055823187644157, "grad_norm": 0.5306828487387064, "learning_rate": 1.0801297648012978e-05, "loss": 0.6062, "step": 27592 }, { "epoch": 0.805611514992263, "grad_norm": 0.527139933880818, "learning_rate": 1.0799675587996756e-05, "loss": 0.6016, "step": 27593 }, { "epoch": 0.8056407112201104, "grad_norm": 0.521836334220425, "learning_rate": 1.0798053527980536e-05, "loss": 0.5125, "step": 27594 }, { "epoch": 0.8056699074479577, "grad_norm": 0.5043149824173143, "learning_rate": 1.0796431467964316e-05, "loss": 0.5502, "step": 27595 }, { "epoch": 0.8056991036758051, "grad_norm": 0.5643843294918913, "learning_rate": 1.0794809407948094e-05, "loss": 0.656, "step": 27596 }, { "epoch": 0.8057282999036525, "grad_norm": 0.5060412139069234, "learning_rate": 1.0793187347931875e-05, "loss": 0.5685, "step": 27597 }, { "epoch": 0.8057574961314998, "grad_norm": 0.5315428343844599, "learning_rate": 1.0791565287915653e-05, "loss": 0.5688, "step": 27598 }, { "epoch": 0.8057866923593472, "grad_norm": 0.5284704703041565, "learning_rate": 1.0789943227899433e-05, "loss": 0.5817, "step": 27599 }, { "epoch": 0.8058158885871946, "grad_norm": 0.49117028033010307, "learning_rate": 1.0788321167883213e-05, "loss": 0.5464, "step": 27600 }, { "epoch": 0.8058450848150419, "grad_norm": 0.5188062988667004, "learning_rate": 1.0786699107866991e-05, "loss": 0.5423, "step": 27601 }, { "epoch": 0.8058742810428893, "grad_norm": 0.5068811899933, "learning_rate": 1.0785077047850771e-05, "loss": 0.5391, "step": 27602 }, { "epoch": 0.8059034772707366, "grad_norm": 0.5442157668713328, "learning_rate": 1.078345498783455e-05, "loss": 0.6526, "step": 27603 }, { "epoch": 0.805932673498584, "grad_norm": 0.5214074226055498, "learning_rate": 1.078183292781833e-05, "loss": 0.5638, "step": 27604 }, { "epoch": 0.8059618697264314, "grad_norm": 0.46346708708959383, "learning_rate": 1.078021086780211e-05, "loss": 0.474, "step": 27605 }, { "epoch": 0.8059910659542787, "grad_norm": 0.575096111267301, "learning_rate": 1.0778588807785888e-05, "loss": 0.6916, "step": 27606 }, { "epoch": 0.8060202621821261, "grad_norm": 0.5128008830782074, "learning_rate": 1.0776966747769668e-05, "loss": 0.5806, "step": 27607 }, { "epoch": 0.8060494584099734, "grad_norm": 0.4956395561352755, "learning_rate": 1.0775344687753447e-05, "loss": 0.5413, "step": 27608 }, { "epoch": 0.8060786546378208, "grad_norm": 0.5357327073189448, "learning_rate": 1.0773722627737227e-05, "loss": 0.6054, "step": 27609 }, { "epoch": 0.8061078508656682, "grad_norm": 0.5834463432863196, "learning_rate": 1.0772100567721007e-05, "loss": 0.6495, "step": 27610 }, { "epoch": 0.8061370470935155, "grad_norm": 0.5197882208154486, "learning_rate": 1.0770478507704785e-05, "loss": 0.6043, "step": 27611 }, { "epoch": 0.8061662433213629, "grad_norm": 0.5055302766489406, "learning_rate": 1.0768856447688565e-05, "loss": 0.553, "step": 27612 }, { "epoch": 0.8061954395492102, "grad_norm": 0.5395108858272357, "learning_rate": 1.0767234387672344e-05, "loss": 0.6089, "step": 27613 }, { "epoch": 0.8062246357770576, "grad_norm": 0.48634473845839993, "learning_rate": 1.0765612327656124e-05, "loss": 0.5428, "step": 27614 }, { "epoch": 0.806253832004905, "grad_norm": 0.5215520896119754, "learning_rate": 1.0763990267639904e-05, "loss": 0.5756, "step": 27615 }, { "epoch": 0.8062830282327523, "grad_norm": 0.5184338634343972, "learning_rate": 1.0762368207623682e-05, "loss": 0.5798, "step": 27616 }, { "epoch": 0.8063122244605997, "grad_norm": 0.5421933868765093, "learning_rate": 1.0760746147607462e-05, "loss": 0.6043, "step": 27617 }, { "epoch": 0.806341420688447, "grad_norm": 0.5618948539842774, "learning_rate": 1.075912408759124e-05, "loss": 0.6793, "step": 27618 }, { "epoch": 0.8063706169162944, "grad_norm": 0.5280714858561484, "learning_rate": 1.075750202757502e-05, "loss": 0.5925, "step": 27619 }, { "epoch": 0.8063998131441418, "grad_norm": 0.509921033945789, "learning_rate": 1.07558799675588e-05, "loss": 0.5869, "step": 27620 }, { "epoch": 0.8064290093719891, "grad_norm": 0.5127151303369749, "learning_rate": 1.075425790754258e-05, "loss": 0.5637, "step": 27621 }, { "epoch": 0.8064582055998365, "grad_norm": 0.4830700878211009, "learning_rate": 1.075263584752636e-05, "loss": 0.5187, "step": 27622 }, { "epoch": 0.8064874018276839, "grad_norm": 0.5664616699024603, "learning_rate": 1.0751013787510138e-05, "loss": 0.6791, "step": 27623 }, { "epoch": 0.8065165980555312, "grad_norm": 0.5065495817928234, "learning_rate": 1.0749391727493918e-05, "loss": 0.5364, "step": 27624 }, { "epoch": 0.8065457942833786, "grad_norm": 0.4985843835306517, "learning_rate": 1.0747769667477698e-05, "loss": 0.5366, "step": 27625 }, { "epoch": 0.8065749905112259, "grad_norm": 0.5183072253758964, "learning_rate": 1.0746147607461476e-05, "loss": 0.5838, "step": 27626 }, { "epoch": 0.8066041867390733, "grad_norm": 0.4996049472986476, "learning_rate": 1.0744525547445256e-05, "loss": 0.5642, "step": 27627 }, { "epoch": 0.8066333829669207, "grad_norm": 0.5348585765070799, "learning_rate": 1.0742903487429036e-05, "loss": 0.6184, "step": 27628 }, { "epoch": 0.806662579194768, "grad_norm": 0.5388343525365051, "learning_rate": 1.0741281427412815e-05, "loss": 0.6136, "step": 27629 }, { "epoch": 0.8066917754226154, "grad_norm": 0.5616696496957545, "learning_rate": 1.0739659367396595e-05, "loss": 0.6447, "step": 27630 }, { "epoch": 0.8067209716504627, "grad_norm": 0.46938028432139967, "learning_rate": 1.0738037307380373e-05, "loss": 0.4615, "step": 27631 }, { "epoch": 0.8067501678783101, "grad_norm": 0.5198379028122547, "learning_rate": 1.0736415247364153e-05, "loss": 0.5743, "step": 27632 }, { "epoch": 0.8067793641061575, "grad_norm": 0.5262413848971602, "learning_rate": 1.0734793187347933e-05, "loss": 0.5742, "step": 27633 }, { "epoch": 0.8068085603340048, "grad_norm": 0.5077543323484806, "learning_rate": 1.0733171127331712e-05, "loss": 0.5964, "step": 27634 }, { "epoch": 0.8068377565618522, "grad_norm": 0.5029923533563144, "learning_rate": 1.0731549067315492e-05, "loss": 0.5626, "step": 27635 }, { "epoch": 0.8068669527896996, "grad_norm": 0.5408661259014907, "learning_rate": 1.072992700729927e-05, "loss": 0.6204, "step": 27636 }, { "epoch": 0.8068961490175469, "grad_norm": 0.5289415567513934, "learning_rate": 1.072830494728305e-05, "loss": 0.5771, "step": 27637 }, { "epoch": 0.8069253452453943, "grad_norm": 0.5139124247760991, "learning_rate": 1.072668288726683e-05, "loss": 0.5299, "step": 27638 }, { "epoch": 0.8069545414732416, "grad_norm": 0.530257023999752, "learning_rate": 1.0725060827250609e-05, "loss": 0.6251, "step": 27639 }, { "epoch": 0.806983737701089, "grad_norm": 0.5781265663987947, "learning_rate": 1.0723438767234389e-05, "loss": 0.6605, "step": 27640 }, { "epoch": 0.8070129339289364, "grad_norm": 0.5094675248553399, "learning_rate": 1.0721816707218167e-05, "loss": 0.539, "step": 27641 }, { "epoch": 0.8070421301567837, "grad_norm": 0.5004297474494714, "learning_rate": 1.0720194647201947e-05, "loss": 0.5561, "step": 27642 }, { "epoch": 0.8070713263846311, "grad_norm": 0.5214344495702324, "learning_rate": 1.0718572587185727e-05, "loss": 0.5564, "step": 27643 }, { "epoch": 0.8071005226124784, "grad_norm": 0.4882453247100642, "learning_rate": 1.0716950527169506e-05, "loss": 0.5549, "step": 27644 }, { "epoch": 0.8071297188403258, "grad_norm": 0.5007624071828815, "learning_rate": 1.0715328467153286e-05, "loss": 0.526, "step": 27645 }, { "epoch": 0.8071589150681732, "grad_norm": 0.5356687942333793, "learning_rate": 1.0713706407137064e-05, "loss": 0.6055, "step": 27646 }, { "epoch": 0.8071881112960205, "grad_norm": 0.5304393197559772, "learning_rate": 1.0712084347120844e-05, "loss": 0.5882, "step": 27647 }, { "epoch": 0.8072173075238679, "grad_norm": 0.5075858791076442, "learning_rate": 1.0710462287104624e-05, "loss": 0.5403, "step": 27648 }, { "epoch": 0.8072465037517152, "grad_norm": 0.540199064046039, "learning_rate": 1.0708840227088402e-05, "loss": 0.6025, "step": 27649 }, { "epoch": 0.8072756999795626, "grad_norm": 0.5338687043920096, "learning_rate": 1.0707218167072183e-05, "loss": 0.5926, "step": 27650 }, { "epoch": 0.80730489620741, "grad_norm": 0.4931904597534974, "learning_rate": 1.0705596107055961e-05, "loss": 0.5126, "step": 27651 }, { "epoch": 0.8073340924352573, "grad_norm": 0.5313075381726241, "learning_rate": 1.0703974047039741e-05, "loss": 0.5878, "step": 27652 }, { "epoch": 0.8073632886631047, "grad_norm": 0.5111609702676131, "learning_rate": 1.0702351987023521e-05, "loss": 0.55, "step": 27653 }, { "epoch": 0.8073924848909521, "grad_norm": 0.5258371194222046, "learning_rate": 1.07007299270073e-05, "loss": 0.6034, "step": 27654 }, { "epoch": 0.8074216811187994, "grad_norm": 0.530765619354656, "learning_rate": 1.069910786699108e-05, "loss": 0.5708, "step": 27655 }, { "epoch": 0.8074508773466468, "grad_norm": 0.5399609237369585, "learning_rate": 1.0697485806974858e-05, "loss": 0.6087, "step": 27656 }, { "epoch": 0.8074800735744941, "grad_norm": 0.5508496799646523, "learning_rate": 1.0695863746958638e-05, "loss": 0.6252, "step": 27657 }, { "epoch": 0.8075092698023415, "grad_norm": 0.49164729690029446, "learning_rate": 1.0694241686942418e-05, "loss": 0.5164, "step": 27658 }, { "epoch": 0.8075384660301889, "grad_norm": 0.5249464617549664, "learning_rate": 1.0692619626926196e-05, "loss": 0.5815, "step": 27659 }, { "epoch": 0.8075676622580362, "grad_norm": 0.5214787556793553, "learning_rate": 1.0690997566909976e-05, "loss": 0.5277, "step": 27660 }, { "epoch": 0.8075968584858836, "grad_norm": 0.551531245959551, "learning_rate": 1.0689375506893757e-05, "loss": 0.662, "step": 27661 }, { "epoch": 0.807626054713731, "grad_norm": 0.5298131751435168, "learning_rate": 1.0687753446877535e-05, "loss": 0.5869, "step": 27662 }, { "epoch": 0.8076552509415783, "grad_norm": 0.463740687143351, "learning_rate": 1.0686131386861315e-05, "loss": 0.4686, "step": 27663 }, { "epoch": 0.8076844471694257, "grad_norm": 0.5157967404975655, "learning_rate": 1.0684509326845093e-05, "loss": 0.5324, "step": 27664 }, { "epoch": 0.807713643397273, "grad_norm": 0.554797016389436, "learning_rate": 1.0682887266828873e-05, "loss": 0.6056, "step": 27665 }, { "epoch": 0.8077428396251204, "grad_norm": 0.5477721062185646, "learning_rate": 1.0681265206812653e-05, "loss": 0.6326, "step": 27666 }, { "epoch": 0.8077720358529678, "grad_norm": 0.5292138004082619, "learning_rate": 1.0679643146796432e-05, "loss": 0.5877, "step": 27667 }, { "epoch": 0.8078012320808151, "grad_norm": 0.5564846576804906, "learning_rate": 1.0678021086780212e-05, "loss": 0.6447, "step": 27668 }, { "epoch": 0.8078304283086625, "grad_norm": 0.5181095252008344, "learning_rate": 1.067639902676399e-05, "loss": 0.5569, "step": 27669 }, { "epoch": 0.8078596245365098, "grad_norm": 0.49657525074518566, "learning_rate": 1.067477696674777e-05, "loss": 0.543, "step": 27670 }, { "epoch": 0.8078888207643572, "grad_norm": 0.4897237857954446, "learning_rate": 1.067315490673155e-05, "loss": 0.5302, "step": 27671 }, { "epoch": 0.8079180169922046, "grad_norm": 0.5423600272065344, "learning_rate": 1.0671532846715329e-05, "loss": 0.6099, "step": 27672 }, { "epoch": 0.8079472132200519, "grad_norm": 0.5615731632466401, "learning_rate": 1.0669910786699109e-05, "loss": 0.6099, "step": 27673 }, { "epoch": 0.8079764094478993, "grad_norm": 0.4927458881898081, "learning_rate": 1.0668288726682887e-05, "loss": 0.531, "step": 27674 }, { "epoch": 0.8080056056757466, "grad_norm": 0.5185354270188236, "learning_rate": 1.0666666666666667e-05, "loss": 0.5881, "step": 27675 }, { "epoch": 0.808034801903594, "grad_norm": 0.46442080984208284, "learning_rate": 1.0665044606650447e-05, "loss": 0.4624, "step": 27676 }, { "epoch": 0.8080639981314414, "grad_norm": 0.5374234415468645, "learning_rate": 1.0663422546634226e-05, "loss": 0.6082, "step": 27677 }, { "epoch": 0.8080931943592887, "grad_norm": 0.5382543582463104, "learning_rate": 1.0661800486618006e-05, "loss": 0.5921, "step": 27678 }, { "epoch": 0.8081223905871362, "grad_norm": 0.533509783736532, "learning_rate": 1.0660178426601784e-05, "loss": 0.6037, "step": 27679 }, { "epoch": 0.8081515868149836, "grad_norm": 0.5355013695531784, "learning_rate": 1.0658556366585564e-05, "loss": 0.5658, "step": 27680 }, { "epoch": 0.8081807830428309, "grad_norm": 0.5337429960630481, "learning_rate": 1.0656934306569344e-05, "loss": 0.6052, "step": 27681 }, { "epoch": 0.8082099792706783, "grad_norm": 0.5621311458484358, "learning_rate": 1.0655312246553123e-05, "loss": 0.5818, "step": 27682 }, { "epoch": 0.8082391754985256, "grad_norm": 0.51781221146108, "learning_rate": 1.0653690186536903e-05, "loss": 0.5873, "step": 27683 }, { "epoch": 0.808268371726373, "grad_norm": 0.5120369944106913, "learning_rate": 1.0652068126520681e-05, "loss": 0.5457, "step": 27684 }, { "epoch": 0.8082975679542204, "grad_norm": 0.5773015054205911, "learning_rate": 1.0650446066504461e-05, "loss": 0.6761, "step": 27685 }, { "epoch": 0.8083267641820677, "grad_norm": 0.5197180599827729, "learning_rate": 1.0648824006488241e-05, "loss": 0.6004, "step": 27686 }, { "epoch": 0.8083559604099151, "grad_norm": 0.5541255296549665, "learning_rate": 1.064720194647202e-05, "loss": 0.6207, "step": 27687 }, { "epoch": 0.8083851566377624, "grad_norm": 0.5001344537536403, "learning_rate": 1.06455798864558e-05, "loss": 0.5358, "step": 27688 }, { "epoch": 0.8084143528656098, "grad_norm": 0.485713163730539, "learning_rate": 1.0643957826439578e-05, "loss": 0.4837, "step": 27689 }, { "epoch": 0.8084435490934572, "grad_norm": 0.5276545199003133, "learning_rate": 1.0642335766423358e-05, "loss": 0.6186, "step": 27690 }, { "epoch": 0.8084727453213045, "grad_norm": 0.5487653248334097, "learning_rate": 1.0640713706407138e-05, "loss": 0.6505, "step": 27691 }, { "epoch": 0.8085019415491519, "grad_norm": 0.5360094797340336, "learning_rate": 1.0639091646390917e-05, "loss": 0.6052, "step": 27692 }, { "epoch": 0.8085311377769993, "grad_norm": 0.5464752300151907, "learning_rate": 1.0637469586374697e-05, "loss": 0.5756, "step": 27693 }, { "epoch": 0.8085603340048466, "grad_norm": 0.5023396055381887, "learning_rate": 1.0635847526358477e-05, "loss": 0.4864, "step": 27694 }, { "epoch": 0.808589530232694, "grad_norm": 0.5514883638355663, "learning_rate": 1.0634225466342255e-05, "loss": 0.6189, "step": 27695 }, { "epoch": 0.8086187264605413, "grad_norm": 0.5896257643171557, "learning_rate": 1.0632603406326035e-05, "loss": 0.6262, "step": 27696 }, { "epoch": 0.8086479226883887, "grad_norm": 0.4878301631154301, "learning_rate": 1.0630981346309814e-05, "loss": 0.5264, "step": 27697 }, { "epoch": 0.8086771189162361, "grad_norm": 0.5722289256914072, "learning_rate": 1.0629359286293594e-05, "loss": 0.6478, "step": 27698 }, { "epoch": 0.8087063151440834, "grad_norm": 0.5484614900043443, "learning_rate": 1.0627737226277374e-05, "loss": 0.6317, "step": 27699 }, { "epoch": 0.8087355113719308, "grad_norm": 0.5264704707375959, "learning_rate": 1.0626115166261152e-05, "loss": 0.57, "step": 27700 }, { "epoch": 0.8087647075997781, "grad_norm": 0.5437688253448905, "learning_rate": 1.0624493106244932e-05, "loss": 0.6353, "step": 27701 }, { "epoch": 0.8087939038276255, "grad_norm": 0.5003045334639109, "learning_rate": 1.062287104622871e-05, "loss": 0.5352, "step": 27702 }, { "epoch": 0.8088231000554729, "grad_norm": 0.5193857235331855, "learning_rate": 1.062124898621249e-05, "loss": 0.535, "step": 27703 }, { "epoch": 0.8088522962833202, "grad_norm": 0.5196249247661003, "learning_rate": 1.061962692619627e-05, "loss": 0.5849, "step": 27704 }, { "epoch": 0.8088814925111676, "grad_norm": 0.5316420838897445, "learning_rate": 1.0618004866180049e-05, "loss": 0.587, "step": 27705 }, { "epoch": 0.808910688739015, "grad_norm": 0.500547259578989, "learning_rate": 1.0616382806163829e-05, "loss": 0.511, "step": 27706 }, { "epoch": 0.8089398849668623, "grad_norm": 0.5012728511541164, "learning_rate": 1.0614760746147607e-05, "loss": 0.527, "step": 27707 }, { "epoch": 0.8089690811947097, "grad_norm": 0.4975747518420683, "learning_rate": 1.0613138686131386e-05, "loss": 0.5236, "step": 27708 }, { "epoch": 0.808998277422557, "grad_norm": 0.5295928937462479, "learning_rate": 1.0611516626115168e-05, "loss": 0.5631, "step": 27709 }, { "epoch": 0.8090274736504044, "grad_norm": 0.5341773217504586, "learning_rate": 1.0609894566098946e-05, "loss": 0.5824, "step": 27710 }, { "epoch": 0.8090566698782518, "grad_norm": 0.49010608102460235, "learning_rate": 1.0608272506082726e-05, "loss": 0.5466, "step": 27711 }, { "epoch": 0.8090858661060991, "grad_norm": 0.5110592429590743, "learning_rate": 1.0606650446066504e-05, "loss": 0.5678, "step": 27712 }, { "epoch": 0.8091150623339465, "grad_norm": 0.5093563755567632, "learning_rate": 1.0605028386050284e-05, "loss": 0.5788, "step": 27713 }, { "epoch": 0.8091442585617938, "grad_norm": 0.5403903410511492, "learning_rate": 1.0603406326034065e-05, "loss": 0.6302, "step": 27714 }, { "epoch": 0.8091734547896412, "grad_norm": 0.5301970205941778, "learning_rate": 1.0601784266017843e-05, "loss": 0.6444, "step": 27715 }, { "epoch": 0.8092026510174886, "grad_norm": 0.5113760133293697, "learning_rate": 1.0600162206001623e-05, "loss": 0.5624, "step": 27716 }, { "epoch": 0.8092318472453359, "grad_norm": 0.5474155356510553, "learning_rate": 1.0598540145985401e-05, "loss": 0.5811, "step": 27717 }, { "epoch": 0.8092610434731833, "grad_norm": 0.5062829329457327, "learning_rate": 1.0596918085969181e-05, "loss": 0.6038, "step": 27718 }, { "epoch": 0.8092902397010306, "grad_norm": 0.48748883529409054, "learning_rate": 1.0595296025952962e-05, "loss": 0.494, "step": 27719 }, { "epoch": 0.809319435928878, "grad_norm": 0.5088884463281489, "learning_rate": 1.059367396593674e-05, "loss": 0.5484, "step": 27720 }, { "epoch": 0.8093486321567254, "grad_norm": 0.4862132965430863, "learning_rate": 1.059205190592052e-05, "loss": 0.5004, "step": 27721 }, { "epoch": 0.8093778283845727, "grad_norm": 0.5095680032197925, "learning_rate": 1.0590429845904298e-05, "loss": 0.5247, "step": 27722 }, { "epoch": 0.8094070246124201, "grad_norm": 0.5559923100720294, "learning_rate": 1.0588807785888078e-05, "loss": 0.6304, "step": 27723 }, { "epoch": 0.8094362208402675, "grad_norm": 0.5653281796823307, "learning_rate": 1.0587185725871858e-05, "loss": 0.5545, "step": 27724 }, { "epoch": 0.8094654170681148, "grad_norm": 0.5674677358844238, "learning_rate": 1.0585563665855637e-05, "loss": 0.6559, "step": 27725 }, { "epoch": 0.8094946132959622, "grad_norm": 0.5821090204418646, "learning_rate": 1.0583941605839417e-05, "loss": 0.6447, "step": 27726 }, { "epoch": 0.8095238095238095, "grad_norm": 0.545780627063347, "learning_rate": 1.0582319545823197e-05, "loss": 0.5928, "step": 27727 }, { "epoch": 0.8095530057516569, "grad_norm": 0.5082158434456475, "learning_rate": 1.0580697485806975e-05, "loss": 0.5522, "step": 27728 }, { "epoch": 0.8095822019795043, "grad_norm": 0.4916267462182546, "learning_rate": 1.0579075425790755e-05, "loss": 0.5254, "step": 27729 }, { "epoch": 0.8096113982073516, "grad_norm": 0.5451139273034474, "learning_rate": 1.0577453365774534e-05, "loss": 0.5939, "step": 27730 }, { "epoch": 0.809640594435199, "grad_norm": 0.5386680591817898, "learning_rate": 1.0575831305758314e-05, "loss": 0.6205, "step": 27731 }, { "epoch": 0.8096697906630463, "grad_norm": 0.5770155442115793, "learning_rate": 1.0574209245742094e-05, "loss": 0.6919, "step": 27732 }, { "epoch": 0.8096989868908937, "grad_norm": 0.5299660782612546, "learning_rate": 1.0572587185725872e-05, "loss": 0.5641, "step": 27733 }, { "epoch": 0.8097281831187411, "grad_norm": 0.5652557788710727, "learning_rate": 1.0570965125709652e-05, "loss": 0.6843, "step": 27734 }, { "epoch": 0.8097573793465884, "grad_norm": 0.5272006198030016, "learning_rate": 1.056934306569343e-05, "loss": 0.597, "step": 27735 }, { "epoch": 0.8097865755744358, "grad_norm": 0.5216439650820807, "learning_rate": 1.056772100567721e-05, "loss": 0.5726, "step": 27736 }, { "epoch": 0.8098157718022831, "grad_norm": 0.5201024587615916, "learning_rate": 1.0566098945660991e-05, "loss": 0.5644, "step": 27737 }, { "epoch": 0.8098449680301305, "grad_norm": 0.5064804249124106, "learning_rate": 1.056447688564477e-05, "loss": 0.5649, "step": 27738 }, { "epoch": 0.8098741642579779, "grad_norm": 0.5277368370267943, "learning_rate": 1.056285482562855e-05, "loss": 0.6035, "step": 27739 }, { "epoch": 0.8099033604858252, "grad_norm": 0.544763153311129, "learning_rate": 1.0561232765612328e-05, "loss": 0.5787, "step": 27740 }, { "epoch": 0.8099325567136726, "grad_norm": 0.5528961098172003, "learning_rate": 1.0559610705596106e-05, "loss": 0.6283, "step": 27741 }, { "epoch": 0.80996175294152, "grad_norm": 0.5451754781809653, "learning_rate": 1.0557988645579888e-05, "loss": 0.6643, "step": 27742 }, { "epoch": 0.8099909491693673, "grad_norm": 0.5419983394976781, "learning_rate": 1.0556366585563666e-05, "loss": 0.6218, "step": 27743 }, { "epoch": 0.8100201453972147, "grad_norm": 0.5443863172483546, "learning_rate": 1.0554744525547446e-05, "loss": 0.6124, "step": 27744 }, { "epoch": 0.810049341625062, "grad_norm": 0.5399006315823794, "learning_rate": 1.0553122465531225e-05, "loss": 0.6264, "step": 27745 }, { "epoch": 0.8100785378529094, "grad_norm": 0.5054559922946531, "learning_rate": 1.0551500405515005e-05, "loss": 0.5786, "step": 27746 }, { "epoch": 0.8101077340807568, "grad_norm": 0.514266168705329, "learning_rate": 1.0549878345498785e-05, "loss": 0.5289, "step": 27747 }, { "epoch": 0.8101369303086041, "grad_norm": 0.5143322540543342, "learning_rate": 1.0548256285482563e-05, "loss": 0.5376, "step": 27748 }, { "epoch": 0.8101661265364515, "grad_norm": 0.5405434899607442, "learning_rate": 1.0546634225466343e-05, "loss": 0.6128, "step": 27749 }, { "epoch": 0.8101953227642988, "grad_norm": 0.5151723493354479, "learning_rate": 1.0545012165450122e-05, "loss": 0.5304, "step": 27750 }, { "epoch": 0.8102245189921462, "grad_norm": 0.47852290279194826, "learning_rate": 1.0543390105433902e-05, "loss": 0.4901, "step": 27751 }, { "epoch": 0.8102537152199936, "grad_norm": 0.5485521380023488, "learning_rate": 1.0541768045417682e-05, "loss": 0.634, "step": 27752 }, { "epoch": 0.8102829114478409, "grad_norm": 0.5316565151290017, "learning_rate": 1.054014598540146e-05, "loss": 0.5765, "step": 27753 }, { "epoch": 0.8103121076756883, "grad_norm": 0.51033672625289, "learning_rate": 1.053852392538524e-05, "loss": 0.5011, "step": 27754 }, { "epoch": 0.8103413039035356, "grad_norm": 0.5240533027435625, "learning_rate": 1.0536901865369019e-05, "loss": 0.6161, "step": 27755 }, { "epoch": 0.810370500131383, "grad_norm": 0.5023290417548059, "learning_rate": 1.0535279805352799e-05, "loss": 0.5424, "step": 27756 }, { "epoch": 0.8103996963592304, "grad_norm": 0.5344552037549708, "learning_rate": 1.0533657745336579e-05, "loss": 0.6469, "step": 27757 }, { "epoch": 0.8104288925870777, "grad_norm": 0.5384845999338558, "learning_rate": 1.0532035685320357e-05, "loss": 0.5632, "step": 27758 }, { "epoch": 0.8104580888149251, "grad_norm": 0.5435620219160087, "learning_rate": 1.0530413625304137e-05, "loss": 0.6197, "step": 27759 }, { "epoch": 0.8104872850427725, "grad_norm": 0.49151096009620243, "learning_rate": 1.0528791565287917e-05, "loss": 0.5104, "step": 27760 }, { "epoch": 0.8105164812706198, "grad_norm": 0.5847629783421376, "learning_rate": 1.0527169505271696e-05, "loss": 0.7393, "step": 27761 }, { "epoch": 0.8105456774984672, "grad_norm": 0.5410298851545379, "learning_rate": 1.0525547445255476e-05, "loss": 0.6193, "step": 27762 }, { "epoch": 0.8105748737263145, "grad_norm": 0.5477695139291701, "learning_rate": 1.0523925385239254e-05, "loss": 0.6219, "step": 27763 }, { "epoch": 0.8106040699541619, "grad_norm": 0.5339392402527158, "learning_rate": 1.0522303325223034e-05, "loss": 0.6408, "step": 27764 }, { "epoch": 0.8106332661820093, "grad_norm": 0.5301367199172721, "learning_rate": 1.0520681265206814e-05, "loss": 0.588, "step": 27765 }, { "epoch": 0.8106624624098566, "grad_norm": 0.5157701566905228, "learning_rate": 1.0519059205190593e-05, "loss": 0.6208, "step": 27766 }, { "epoch": 0.810691658637704, "grad_norm": 0.5398357597123894, "learning_rate": 1.0517437145174373e-05, "loss": 0.6095, "step": 27767 }, { "epoch": 0.8107208548655513, "grad_norm": 0.5573130016136901, "learning_rate": 1.0515815085158151e-05, "loss": 0.6498, "step": 27768 }, { "epoch": 0.8107500510933987, "grad_norm": 0.5595731756803343, "learning_rate": 1.051419302514193e-05, "loss": 0.6208, "step": 27769 }, { "epoch": 0.8107792473212461, "grad_norm": 0.5155561067451, "learning_rate": 1.0512570965125711e-05, "loss": 0.6026, "step": 27770 }, { "epoch": 0.8108084435490934, "grad_norm": 0.5435568914205389, "learning_rate": 1.051094890510949e-05, "loss": 0.5736, "step": 27771 }, { "epoch": 0.8108376397769408, "grad_norm": 0.5494110431246825, "learning_rate": 1.050932684509327e-05, "loss": 0.6167, "step": 27772 }, { "epoch": 0.8108668360047881, "grad_norm": 0.5112676369988788, "learning_rate": 1.0507704785077048e-05, "loss": 0.5724, "step": 27773 }, { "epoch": 0.8108960322326355, "grad_norm": 0.5396484258019992, "learning_rate": 1.0506082725060826e-05, "loss": 0.643, "step": 27774 }, { "epoch": 0.8109252284604829, "grad_norm": 0.5237233752893086, "learning_rate": 1.0504460665044608e-05, "loss": 0.5916, "step": 27775 }, { "epoch": 0.8109544246883302, "grad_norm": 0.5151740532096819, "learning_rate": 1.0502838605028386e-05, "loss": 0.5875, "step": 27776 }, { "epoch": 0.8109836209161776, "grad_norm": 0.5030635242209858, "learning_rate": 1.0501216545012166e-05, "loss": 0.5328, "step": 27777 }, { "epoch": 0.811012817144025, "grad_norm": 0.5882578657321634, "learning_rate": 1.0499594484995945e-05, "loss": 0.6814, "step": 27778 }, { "epoch": 0.8110420133718723, "grad_norm": 0.4676776520716296, "learning_rate": 1.0497972424979725e-05, "loss": 0.513, "step": 27779 }, { "epoch": 0.8110712095997197, "grad_norm": 0.5964801613543541, "learning_rate": 1.0496350364963505e-05, "loss": 0.6584, "step": 27780 }, { "epoch": 0.811100405827567, "grad_norm": 0.5681871992885879, "learning_rate": 1.0494728304947283e-05, "loss": 0.6652, "step": 27781 }, { "epoch": 0.8111296020554144, "grad_norm": 0.49014554360516344, "learning_rate": 1.0493106244931063e-05, "loss": 0.5155, "step": 27782 }, { "epoch": 0.8111587982832618, "grad_norm": 0.48654071962944667, "learning_rate": 1.0491484184914842e-05, "loss": 0.5146, "step": 27783 }, { "epoch": 0.8111879945111091, "grad_norm": 0.4967619428652776, "learning_rate": 1.0489862124898622e-05, "loss": 0.5382, "step": 27784 }, { "epoch": 0.8112171907389565, "grad_norm": 0.5418744777161841, "learning_rate": 1.0488240064882402e-05, "loss": 0.6003, "step": 27785 }, { "epoch": 0.8112463869668038, "grad_norm": 0.6190610308437418, "learning_rate": 1.048661800486618e-05, "loss": 0.6241, "step": 27786 }, { "epoch": 0.8112755831946512, "grad_norm": 0.5601731180053531, "learning_rate": 1.048499594484996e-05, "loss": 0.6474, "step": 27787 }, { "epoch": 0.8113047794224986, "grad_norm": 0.492172193276497, "learning_rate": 1.0483373884833739e-05, "loss": 0.5696, "step": 27788 }, { "epoch": 0.8113339756503459, "grad_norm": 0.5381601778229796, "learning_rate": 1.0481751824817519e-05, "loss": 0.6544, "step": 27789 }, { "epoch": 0.8113631718781933, "grad_norm": 0.5535722827550424, "learning_rate": 1.0480129764801299e-05, "loss": 0.6118, "step": 27790 }, { "epoch": 0.8113923681060407, "grad_norm": 0.5069633594454726, "learning_rate": 1.0478507704785077e-05, "loss": 0.5364, "step": 27791 }, { "epoch": 0.811421564333888, "grad_norm": 0.5595355540368085, "learning_rate": 1.0476885644768857e-05, "loss": 0.6163, "step": 27792 }, { "epoch": 0.8114507605617354, "grad_norm": 0.5188792386250414, "learning_rate": 1.0475263584752637e-05, "loss": 0.5749, "step": 27793 }, { "epoch": 0.8114799567895827, "grad_norm": 0.5378551175926782, "learning_rate": 1.0473641524736416e-05, "loss": 0.5462, "step": 27794 }, { "epoch": 0.8115091530174301, "grad_norm": 0.5302020315989168, "learning_rate": 1.0472019464720196e-05, "loss": 0.5944, "step": 27795 }, { "epoch": 0.8115383492452775, "grad_norm": 0.5157802585430196, "learning_rate": 1.0470397404703974e-05, "loss": 0.5747, "step": 27796 }, { "epoch": 0.8115675454731248, "grad_norm": 0.5284498315130701, "learning_rate": 1.0468775344687754e-05, "loss": 0.6414, "step": 27797 }, { "epoch": 0.8115967417009722, "grad_norm": 0.5414088760880962, "learning_rate": 1.0467153284671534e-05, "loss": 0.6862, "step": 27798 }, { "epoch": 0.8116259379288197, "grad_norm": 0.5494852206955254, "learning_rate": 1.0465531224655313e-05, "loss": 0.6379, "step": 27799 }, { "epoch": 0.811655134156667, "grad_norm": 0.5135791738954217, "learning_rate": 1.0463909164639093e-05, "loss": 0.5552, "step": 27800 }, { "epoch": 0.8116843303845144, "grad_norm": 0.5572949302223893, "learning_rate": 1.0462287104622871e-05, "loss": 0.6432, "step": 27801 }, { "epoch": 0.8117135266123617, "grad_norm": 0.5482648133605876, "learning_rate": 1.046066504460665e-05, "loss": 0.6128, "step": 27802 }, { "epoch": 0.8117427228402091, "grad_norm": 0.5699669264111957, "learning_rate": 1.0459042984590431e-05, "loss": 0.6716, "step": 27803 }, { "epoch": 0.8117719190680565, "grad_norm": 0.521051361622667, "learning_rate": 1.045742092457421e-05, "loss": 0.5572, "step": 27804 }, { "epoch": 0.8118011152959038, "grad_norm": 0.5305920969219478, "learning_rate": 1.045579886455799e-05, "loss": 0.6013, "step": 27805 }, { "epoch": 0.8118303115237512, "grad_norm": 0.4882151202020773, "learning_rate": 1.0454176804541768e-05, "loss": 0.5168, "step": 27806 }, { "epoch": 0.8118595077515985, "grad_norm": 0.5163691820765437, "learning_rate": 1.0452554744525547e-05, "loss": 0.5235, "step": 27807 }, { "epoch": 0.8118887039794459, "grad_norm": 0.506921492565955, "learning_rate": 1.0450932684509328e-05, "loss": 0.5185, "step": 27808 }, { "epoch": 0.8119179002072933, "grad_norm": 0.5431787183227211, "learning_rate": 1.0449310624493107e-05, "loss": 0.6132, "step": 27809 }, { "epoch": 0.8119470964351406, "grad_norm": 0.5405845477872774, "learning_rate": 1.0447688564476887e-05, "loss": 0.5735, "step": 27810 }, { "epoch": 0.811976292662988, "grad_norm": 0.5113984066108688, "learning_rate": 1.0446066504460665e-05, "loss": 0.5414, "step": 27811 }, { "epoch": 0.8120054888908353, "grad_norm": 0.5658056084688553, "learning_rate": 1.0444444444444445e-05, "loss": 0.6458, "step": 27812 }, { "epoch": 0.8120346851186827, "grad_norm": 0.5206662971730194, "learning_rate": 1.0442822384428225e-05, "loss": 0.5429, "step": 27813 }, { "epoch": 0.8120638813465301, "grad_norm": 0.5512579390507001, "learning_rate": 1.0441200324412004e-05, "loss": 0.6584, "step": 27814 }, { "epoch": 0.8120930775743774, "grad_norm": 0.5036892842637706, "learning_rate": 1.0439578264395784e-05, "loss": 0.5271, "step": 27815 }, { "epoch": 0.8121222738022248, "grad_norm": 0.5218178504612667, "learning_rate": 1.0437956204379562e-05, "loss": 0.563, "step": 27816 }, { "epoch": 0.8121514700300722, "grad_norm": 0.5088684017423077, "learning_rate": 1.0436334144363342e-05, "loss": 0.5658, "step": 27817 }, { "epoch": 0.8121806662579195, "grad_norm": 0.5184291583146743, "learning_rate": 1.0434712084347122e-05, "loss": 0.5985, "step": 27818 }, { "epoch": 0.8122098624857669, "grad_norm": 0.5143565260234794, "learning_rate": 1.04330900243309e-05, "loss": 0.5901, "step": 27819 }, { "epoch": 0.8122390587136142, "grad_norm": 0.5133700988527552, "learning_rate": 1.043146796431468e-05, "loss": 0.5661, "step": 27820 }, { "epoch": 0.8122682549414616, "grad_norm": 0.5154849001099435, "learning_rate": 1.0429845904298459e-05, "loss": 0.5511, "step": 27821 }, { "epoch": 0.812297451169309, "grad_norm": 0.49754427252978495, "learning_rate": 1.0428223844282239e-05, "loss": 0.5319, "step": 27822 }, { "epoch": 0.8123266473971563, "grad_norm": 0.5461726131267799, "learning_rate": 1.0426601784266019e-05, "loss": 0.6485, "step": 27823 }, { "epoch": 0.8123558436250037, "grad_norm": 0.5616390660688559, "learning_rate": 1.0424979724249797e-05, "loss": 0.6571, "step": 27824 }, { "epoch": 0.812385039852851, "grad_norm": 0.5751873915777105, "learning_rate": 1.0423357664233578e-05, "loss": 0.5291, "step": 27825 }, { "epoch": 0.8124142360806984, "grad_norm": 0.515073156418277, "learning_rate": 1.0421735604217358e-05, "loss": 0.561, "step": 27826 }, { "epoch": 0.8124434323085458, "grad_norm": 0.5803125309281184, "learning_rate": 1.0420113544201136e-05, "loss": 0.65, "step": 27827 }, { "epoch": 0.8124726285363931, "grad_norm": 0.4953332076359347, "learning_rate": 1.0418491484184916e-05, "loss": 0.5357, "step": 27828 }, { "epoch": 0.8125018247642405, "grad_norm": 0.5456972056540277, "learning_rate": 1.0416869424168694e-05, "loss": 0.5823, "step": 27829 }, { "epoch": 0.8125310209920878, "grad_norm": 0.5215159253595308, "learning_rate": 1.0415247364152473e-05, "loss": 0.5428, "step": 27830 }, { "epoch": 0.8125602172199352, "grad_norm": 0.5069583242819672, "learning_rate": 1.0413625304136255e-05, "loss": 0.5682, "step": 27831 }, { "epoch": 0.8125894134477826, "grad_norm": 0.4838482854081503, "learning_rate": 1.0412003244120033e-05, "loss": 0.5321, "step": 27832 }, { "epoch": 0.8126186096756299, "grad_norm": 0.4827458252159261, "learning_rate": 1.0410381184103813e-05, "loss": 0.5054, "step": 27833 }, { "epoch": 0.8126478059034773, "grad_norm": 0.5329554098388619, "learning_rate": 1.0408759124087591e-05, "loss": 0.5727, "step": 27834 }, { "epoch": 0.8126770021313247, "grad_norm": 0.5365390537425117, "learning_rate": 1.040713706407137e-05, "loss": 0.6034, "step": 27835 }, { "epoch": 0.812706198359172, "grad_norm": 0.5312223984748486, "learning_rate": 1.0405515004055152e-05, "loss": 0.6023, "step": 27836 }, { "epoch": 0.8127353945870194, "grad_norm": 0.5218792097294482, "learning_rate": 1.040389294403893e-05, "loss": 0.5862, "step": 27837 }, { "epoch": 0.8127645908148667, "grad_norm": 0.5153987137894929, "learning_rate": 1.040227088402271e-05, "loss": 0.5904, "step": 27838 }, { "epoch": 0.8127937870427141, "grad_norm": 0.5126221411541497, "learning_rate": 1.0400648824006488e-05, "loss": 0.5457, "step": 27839 }, { "epoch": 0.8128229832705615, "grad_norm": 0.5272795578186393, "learning_rate": 1.0399026763990267e-05, "loss": 0.6333, "step": 27840 }, { "epoch": 0.8128521794984088, "grad_norm": 0.5685425694415825, "learning_rate": 1.0397404703974048e-05, "loss": 0.6597, "step": 27841 }, { "epoch": 0.8128813757262562, "grad_norm": 0.5103689626330642, "learning_rate": 1.0395782643957827e-05, "loss": 0.5392, "step": 27842 }, { "epoch": 0.8129105719541035, "grad_norm": 0.5498227287942692, "learning_rate": 1.0394160583941607e-05, "loss": 0.6348, "step": 27843 }, { "epoch": 0.8129397681819509, "grad_norm": 0.5283101878644496, "learning_rate": 1.0392538523925385e-05, "loss": 0.5892, "step": 27844 }, { "epoch": 0.8129689644097983, "grad_norm": 0.5064361147000801, "learning_rate": 1.0390916463909165e-05, "loss": 0.5408, "step": 27845 }, { "epoch": 0.8129981606376456, "grad_norm": 0.5384694203861238, "learning_rate": 1.0389294403892945e-05, "loss": 0.6089, "step": 27846 }, { "epoch": 0.813027356865493, "grad_norm": 0.5293124082978643, "learning_rate": 1.0387672343876724e-05, "loss": 0.5791, "step": 27847 }, { "epoch": 0.8130565530933404, "grad_norm": 0.46539022873485963, "learning_rate": 1.0386050283860504e-05, "loss": 0.4907, "step": 27848 }, { "epoch": 0.8130857493211877, "grad_norm": 0.5292631080257283, "learning_rate": 1.0384428223844282e-05, "loss": 0.623, "step": 27849 }, { "epoch": 0.8131149455490351, "grad_norm": 0.5360225249315929, "learning_rate": 1.0382806163828062e-05, "loss": 0.5834, "step": 27850 }, { "epoch": 0.8131441417768824, "grad_norm": 0.5412747226151877, "learning_rate": 1.0381184103811842e-05, "loss": 0.5809, "step": 27851 }, { "epoch": 0.8131733380047298, "grad_norm": 0.5356994116723713, "learning_rate": 1.037956204379562e-05, "loss": 0.568, "step": 27852 }, { "epoch": 0.8132025342325772, "grad_norm": 0.5186549855546464, "learning_rate": 1.03779399837794e-05, "loss": 0.5982, "step": 27853 }, { "epoch": 0.8132317304604245, "grad_norm": 0.5672733018405891, "learning_rate": 1.037631792376318e-05, "loss": 0.6459, "step": 27854 }, { "epoch": 0.8132609266882719, "grad_norm": 0.5600701515468991, "learning_rate": 1.037469586374696e-05, "loss": 0.6054, "step": 27855 }, { "epoch": 0.8132901229161192, "grad_norm": 0.5391370055959195, "learning_rate": 1.037307380373074e-05, "loss": 0.6313, "step": 27856 }, { "epoch": 0.8133193191439666, "grad_norm": 0.6490569355927756, "learning_rate": 1.0371451743714518e-05, "loss": 0.6052, "step": 27857 }, { "epoch": 0.813348515371814, "grad_norm": 0.529065505886313, "learning_rate": 1.0369829683698298e-05, "loss": 0.5662, "step": 27858 }, { "epoch": 0.8133777115996613, "grad_norm": 0.545427090456936, "learning_rate": 1.0368207623682076e-05, "loss": 0.6564, "step": 27859 }, { "epoch": 0.8134069078275087, "grad_norm": 0.5074480699615069, "learning_rate": 1.0366585563665856e-05, "loss": 0.5584, "step": 27860 }, { "epoch": 0.813436104055356, "grad_norm": 0.5857921502737483, "learning_rate": 1.0364963503649636e-05, "loss": 0.6598, "step": 27861 }, { "epoch": 0.8134653002832034, "grad_norm": 0.4894062587507462, "learning_rate": 1.0363341443633415e-05, "loss": 0.54, "step": 27862 }, { "epoch": 0.8134944965110508, "grad_norm": 0.5280493587182578, "learning_rate": 1.0361719383617193e-05, "loss": 0.5419, "step": 27863 }, { "epoch": 0.8135236927388981, "grad_norm": 0.5345570354944367, "learning_rate": 1.0360097323600975e-05, "loss": 0.5872, "step": 27864 }, { "epoch": 0.8135528889667455, "grad_norm": 0.5503097822156502, "learning_rate": 1.0358475263584753e-05, "loss": 0.6275, "step": 27865 }, { "epoch": 0.8135820851945929, "grad_norm": 0.5318269406523196, "learning_rate": 1.0356853203568533e-05, "loss": 0.5502, "step": 27866 }, { "epoch": 0.8136112814224402, "grad_norm": 0.5506534491242898, "learning_rate": 1.0355231143552312e-05, "loss": 0.6078, "step": 27867 }, { "epoch": 0.8136404776502876, "grad_norm": 0.47985226835370876, "learning_rate": 1.035360908353609e-05, "loss": 0.5059, "step": 27868 }, { "epoch": 0.8136696738781349, "grad_norm": 0.5890929938023183, "learning_rate": 1.0351987023519872e-05, "loss": 0.7303, "step": 27869 }, { "epoch": 0.8136988701059823, "grad_norm": 0.49641090587957554, "learning_rate": 1.035036496350365e-05, "loss": 0.5522, "step": 27870 }, { "epoch": 0.8137280663338297, "grad_norm": 0.5551235291176884, "learning_rate": 1.034874290348743e-05, "loss": 0.6297, "step": 27871 }, { "epoch": 0.813757262561677, "grad_norm": 0.5109685239774716, "learning_rate": 1.0347120843471209e-05, "loss": 0.5694, "step": 27872 }, { "epoch": 0.8137864587895244, "grad_norm": 0.48132755915094105, "learning_rate": 1.0345498783454987e-05, "loss": 0.5057, "step": 27873 }, { "epoch": 0.8138156550173717, "grad_norm": 0.5547637034013889, "learning_rate": 1.0343876723438769e-05, "loss": 0.6237, "step": 27874 }, { "epoch": 0.8138448512452191, "grad_norm": 0.554451782981931, "learning_rate": 1.0342254663422547e-05, "loss": 0.6618, "step": 27875 }, { "epoch": 0.8138740474730665, "grad_norm": 0.5124716493204429, "learning_rate": 1.0340632603406327e-05, "loss": 0.5305, "step": 27876 }, { "epoch": 0.8139032437009138, "grad_norm": 0.49438652861235494, "learning_rate": 1.0339010543390106e-05, "loss": 0.5332, "step": 27877 }, { "epoch": 0.8139324399287612, "grad_norm": 0.5220473102624049, "learning_rate": 1.0337388483373886e-05, "loss": 0.5981, "step": 27878 }, { "epoch": 0.8139616361566085, "grad_norm": 0.5011696141567721, "learning_rate": 1.0335766423357666e-05, "loss": 0.5617, "step": 27879 }, { "epoch": 0.8139908323844559, "grad_norm": 0.5846078248163682, "learning_rate": 1.0334144363341444e-05, "loss": 0.6678, "step": 27880 }, { "epoch": 0.8140200286123033, "grad_norm": 0.5064598094370265, "learning_rate": 1.0332522303325224e-05, "loss": 0.5512, "step": 27881 }, { "epoch": 0.8140492248401506, "grad_norm": 0.5069194166892614, "learning_rate": 1.0330900243309002e-05, "loss": 0.531, "step": 27882 }, { "epoch": 0.814078421067998, "grad_norm": 0.5116424197533416, "learning_rate": 1.0329278183292783e-05, "loss": 0.5612, "step": 27883 }, { "epoch": 0.8141076172958454, "grad_norm": 0.499612127010934, "learning_rate": 1.0327656123276563e-05, "loss": 0.5326, "step": 27884 }, { "epoch": 0.8141368135236927, "grad_norm": 0.49754351920287493, "learning_rate": 1.0326034063260341e-05, "loss": 0.5544, "step": 27885 }, { "epoch": 0.8141660097515401, "grad_norm": 0.4926938850518996, "learning_rate": 1.0324412003244121e-05, "loss": 0.5511, "step": 27886 }, { "epoch": 0.8141952059793874, "grad_norm": 0.6089136612567566, "learning_rate": 1.03227899432279e-05, "loss": 0.7315, "step": 27887 }, { "epoch": 0.8142244022072348, "grad_norm": 0.5661123270578462, "learning_rate": 1.032116788321168e-05, "loss": 0.6936, "step": 27888 }, { "epoch": 0.8142535984350822, "grad_norm": 0.5365689831856595, "learning_rate": 1.031954582319546e-05, "loss": 0.602, "step": 27889 }, { "epoch": 0.8142827946629295, "grad_norm": 0.5097726603233477, "learning_rate": 1.0317923763179238e-05, "loss": 0.5533, "step": 27890 }, { "epoch": 0.8143119908907769, "grad_norm": 0.5473477467201262, "learning_rate": 1.0316301703163016e-05, "loss": 0.5663, "step": 27891 }, { "epoch": 0.8143411871186242, "grad_norm": 0.5398105736719762, "learning_rate": 1.0314679643146796e-05, "loss": 0.5224, "step": 27892 }, { "epoch": 0.8143703833464716, "grad_norm": 0.5223982540680978, "learning_rate": 1.0313057583130576e-05, "loss": 0.5475, "step": 27893 }, { "epoch": 0.814399579574319, "grad_norm": 0.4915945856733991, "learning_rate": 1.0311435523114357e-05, "loss": 0.5581, "step": 27894 }, { "epoch": 0.8144287758021663, "grad_norm": 0.5324129391911004, "learning_rate": 1.0309813463098135e-05, "loss": 0.6119, "step": 27895 }, { "epoch": 0.8144579720300137, "grad_norm": 0.539132665261186, "learning_rate": 1.0308191403081913e-05, "loss": 0.6189, "step": 27896 }, { "epoch": 0.814487168257861, "grad_norm": 0.4930949634286966, "learning_rate": 1.0306569343065695e-05, "loss": 0.4922, "step": 27897 }, { "epoch": 0.8145163644857084, "grad_norm": 0.5524062149677642, "learning_rate": 1.0304947283049473e-05, "loss": 0.6348, "step": 27898 }, { "epoch": 0.8145455607135558, "grad_norm": 0.5649156891905621, "learning_rate": 1.0303325223033253e-05, "loss": 0.66, "step": 27899 }, { "epoch": 0.8145747569414031, "grad_norm": 0.5430964993903752, "learning_rate": 1.0301703163017032e-05, "loss": 0.5725, "step": 27900 }, { "epoch": 0.8146039531692505, "grad_norm": 0.5555949029581082, "learning_rate": 1.030008110300081e-05, "loss": 0.649, "step": 27901 }, { "epoch": 0.8146331493970979, "grad_norm": 0.5689131113949717, "learning_rate": 1.0298459042984592e-05, "loss": 0.7124, "step": 27902 }, { "epoch": 0.8146623456249452, "grad_norm": 0.4702892810666545, "learning_rate": 1.029683698296837e-05, "loss": 0.5163, "step": 27903 }, { "epoch": 0.8146915418527926, "grad_norm": 0.5932510637758991, "learning_rate": 1.029521492295215e-05, "loss": 0.6818, "step": 27904 }, { "epoch": 0.8147207380806399, "grad_norm": 0.5716607377006265, "learning_rate": 1.0293592862935929e-05, "loss": 0.635, "step": 27905 }, { "epoch": 0.8147499343084873, "grad_norm": 0.5176214620178192, "learning_rate": 1.0291970802919707e-05, "loss": 0.599, "step": 27906 }, { "epoch": 0.8147791305363347, "grad_norm": 0.5337603793644333, "learning_rate": 1.0290348742903489e-05, "loss": 0.6265, "step": 27907 }, { "epoch": 0.814808326764182, "grad_norm": 0.48949223826976945, "learning_rate": 1.0288726682887267e-05, "loss": 0.5871, "step": 27908 }, { "epoch": 0.8148375229920294, "grad_norm": 0.5279034633719397, "learning_rate": 1.0287104622871047e-05, "loss": 0.6097, "step": 27909 }, { "epoch": 0.8148667192198767, "grad_norm": 0.5131296588593948, "learning_rate": 1.0285482562854826e-05, "loss": 0.5882, "step": 27910 }, { "epoch": 0.8148959154477241, "grad_norm": 0.5222457919651267, "learning_rate": 1.0283860502838606e-05, "loss": 0.5999, "step": 27911 }, { "epoch": 0.8149251116755715, "grad_norm": 0.5557550096848184, "learning_rate": 1.0282238442822386e-05, "loss": 0.6104, "step": 27912 }, { "epoch": 0.8149543079034188, "grad_norm": 0.5411512745885314, "learning_rate": 1.0280616382806164e-05, "loss": 0.6377, "step": 27913 }, { "epoch": 0.8149835041312662, "grad_norm": 0.5489264769058777, "learning_rate": 1.0278994322789944e-05, "loss": 0.6422, "step": 27914 }, { "epoch": 0.8150127003591136, "grad_norm": 0.5254403195949953, "learning_rate": 1.0277372262773723e-05, "loss": 0.5716, "step": 27915 }, { "epoch": 0.8150418965869609, "grad_norm": 0.5019677839073194, "learning_rate": 1.0275750202757503e-05, "loss": 0.5497, "step": 27916 }, { "epoch": 0.8150710928148083, "grad_norm": 0.4781314876636873, "learning_rate": 1.0274128142741283e-05, "loss": 0.475, "step": 27917 }, { "epoch": 0.8151002890426556, "grad_norm": 0.5013180639981548, "learning_rate": 1.0272506082725061e-05, "loss": 0.5439, "step": 27918 }, { "epoch": 0.815129485270503, "grad_norm": 0.5318621787451375, "learning_rate": 1.0270884022708841e-05, "loss": 0.5663, "step": 27919 }, { "epoch": 0.8151586814983505, "grad_norm": 0.5068374698126412, "learning_rate": 1.026926196269262e-05, "loss": 0.5743, "step": 27920 }, { "epoch": 0.8151878777261978, "grad_norm": 0.5381863372956872, "learning_rate": 1.02676399026764e-05, "loss": 0.6312, "step": 27921 }, { "epoch": 0.8152170739540452, "grad_norm": 0.5332810491246305, "learning_rate": 1.026601784266018e-05, "loss": 0.6106, "step": 27922 }, { "epoch": 0.8152462701818926, "grad_norm": 0.46455613517068917, "learning_rate": 1.0264395782643958e-05, "loss": 0.4849, "step": 27923 }, { "epoch": 0.8152754664097399, "grad_norm": 0.5404497272730568, "learning_rate": 1.0262773722627737e-05, "loss": 0.6186, "step": 27924 }, { "epoch": 0.8153046626375873, "grad_norm": 0.5062527841458843, "learning_rate": 1.0261151662611517e-05, "loss": 0.5103, "step": 27925 }, { "epoch": 0.8153338588654346, "grad_norm": 0.538761049274747, "learning_rate": 1.0259529602595297e-05, "loss": 0.5652, "step": 27926 }, { "epoch": 0.815363055093282, "grad_norm": 0.5237191769838131, "learning_rate": 1.0257907542579077e-05, "loss": 0.5875, "step": 27927 }, { "epoch": 0.8153922513211294, "grad_norm": 0.5171456027832387, "learning_rate": 1.0256285482562855e-05, "loss": 0.5794, "step": 27928 }, { "epoch": 0.8154214475489767, "grad_norm": 0.5333294667855122, "learning_rate": 1.0254663422546633e-05, "loss": 0.5795, "step": 27929 }, { "epoch": 0.8154506437768241, "grad_norm": 0.5208432280111059, "learning_rate": 1.0253041362530415e-05, "loss": 0.5928, "step": 27930 }, { "epoch": 0.8154798400046714, "grad_norm": 0.5275892226154719, "learning_rate": 1.0251419302514194e-05, "loss": 0.5651, "step": 27931 }, { "epoch": 0.8155090362325188, "grad_norm": 0.5172029849100738, "learning_rate": 1.0249797242497974e-05, "loss": 0.5628, "step": 27932 }, { "epoch": 0.8155382324603662, "grad_norm": 0.538239319413951, "learning_rate": 1.0248175182481752e-05, "loss": 0.6225, "step": 27933 }, { "epoch": 0.8155674286882135, "grad_norm": 0.5070641561069914, "learning_rate": 1.024655312246553e-05, "loss": 0.5536, "step": 27934 }, { "epoch": 0.8155966249160609, "grad_norm": 0.5016047611477192, "learning_rate": 1.0244931062449312e-05, "loss": 0.5544, "step": 27935 }, { "epoch": 0.8156258211439082, "grad_norm": 0.5103627534120853, "learning_rate": 1.024330900243309e-05, "loss": 0.5911, "step": 27936 }, { "epoch": 0.8156550173717556, "grad_norm": 0.5111813405471711, "learning_rate": 1.024168694241687e-05, "loss": 0.578, "step": 27937 }, { "epoch": 0.815684213599603, "grad_norm": 0.5657678591805136, "learning_rate": 1.0240064882400649e-05, "loss": 0.649, "step": 27938 }, { "epoch": 0.8157134098274503, "grad_norm": 0.5217228408974035, "learning_rate": 1.0238442822384427e-05, "loss": 0.5389, "step": 27939 }, { "epoch": 0.8157426060552977, "grad_norm": 0.48383794416155673, "learning_rate": 1.0236820762368209e-05, "loss": 0.4952, "step": 27940 }, { "epoch": 0.815771802283145, "grad_norm": 0.5163956142841586, "learning_rate": 1.0235198702351988e-05, "loss": 0.5622, "step": 27941 }, { "epoch": 0.8158009985109924, "grad_norm": 0.512389963176792, "learning_rate": 1.0233576642335768e-05, "loss": 0.5376, "step": 27942 }, { "epoch": 0.8158301947388398, "grad_norm": 0.548900148671053, "learning_rate": 1.0231954582319546e-05, "loss": 0.5953, "step": 27943 }, { "epoch": 0.8158593909666871, "grad_norm": 0.5585841140758628, "learning_rate": 1.0230332522303326e-05, "loss": 0.6534, "step": 27944 }, { "epoch": 0.8158885871945345, "grad_norm": 0.4862973478096307, "learning_rate": 1.0228710462287106e-05, "loss": 0.5392, "step": 27945 }, { "epoch": 0.8159177834223819, "grad_norm": 0.5430300767325287, "learning_rate": 1.0227088402270884e-05, "loss": 0.5936, "step": 27946 }, { "epoch": 0.8159469796502292, "grad_norm": 0.5380201125930526, "learning_rate": 1.0225466342254665e-05, "loss": 0.6542, "step": 27947 }, { "epoch": 0.8159761758780766, "grad_norm": 0.5519527554600331, "learning_rate": 1.0223844282238443e-05, "loss": 0.6285, "step": 27948 }, { "epoch": 0.8160053721059239, "grad_norm": 0.5704641661026147, "learning_rate": 1.0222222222222223e-05, "loss": 0.7076, "step": 27949 }, { "epoch": 0.8160345683337713, "grad_norm": 0.5034968257330162, "learning_rate": 1.0220600162206003e-05, "loss": 0.5435, "step": 27950 }, { "epoch": 0.8160637645616187, "grad_norm": 0.5229875670784255, "learning_rate": 1.0218978102189781e-05, "loss": 0.5831, "step": 27951 }, { "epoch": 0.816092960789466, "grad_norm": 0.5324507061901965, "learning_rate": 1.021735604217356e-05, "loss": 0.5898, "step": 27952 }, { "epoch": 0.8161221570173134, "grad_norm": 0.5291706873189346, "learning_rate": 1.021573398215734e-05, "loss": 0.6142, "step": 27953 }, { "epoch": 0.8161513532451607, "grad_norm": 0.5147368784629947, "learning_rate": 1.021411192214112e-05, "loss": 0.5694, "step": 27954 }, { "epoch": 0.8161805494730081, "grad_norm": 0.5511044587502738, "learning_rate": 1.02124898621249e-05, "loss": 0.6524, "step": 27955 }, { "epoch": 0.8162097457008555, "grad_norm": 0.5408332355456503, "learning_rate": 1.0210867802108678e-05, "loss": 0.64, "step": 27956 }, { "epoch": 0.8162389419287028, "grad_norm": 0.5316401337188249, "learning_rate": 1.0209245742092457e-05, "loss": 0.5925, "step": 27957 }, { "epoch": 0.8162681381565502, "grad_norm": 0.5261333105648056, "learning_rate": 1.0207623682076237e-05, "loss": 0.5844, "step": 27958 }, { "epoch": 0.8162973343843976, "grad_norm": 0.5241234018056585, "learning_rate": 1.0206001622060017e-05, "loss": 0.583, "step": 27959 }, { "epoch": 0.8163265306122449, "grad_norm": 0.5331838018449134, "learning_rate": 1.0204379562043797e-05, "loss": 0.5567, "step": 27960 }, { "epoch": 0.8163557268400923, "grad_norm": 0.49362146624585485, "learning_rate": 1.0202757502027575e-05, "loss": 0.5395, "step": 27961 }, { "epoch": 0.8163849230679396, "grad_norm": 0.5141763292744785, "learning_rate": 1.0201135442011354e-05, "loss": 0.572, "step": 27962 }, { "epoch": 0.816414119295787, "grad_norm": 0.5320786737698837, "learning_rate": 1.0199513381995135e-05, "loss": 0.5757, "step": 27963 }, { "epoch": 0.8164433155236344, "grad_norm": 0.5363269551569537, "learning_rate": 1.0197891321978914e-05, "loss": 0.6159, "step": 27964 }, { "epoch": 0.8164725117514817, "grad_norm": 0.5121634454337576, "learning_rate": 1.0196269261962694e-05, "loss": 0.5666, "step": 27965 }, { "epoch": 0.8165017079793291, "grad_norm": 0.533960214181984, "learning_rate": 1.0194647201946472e-05, "loss": 0.5934, "step": 27966 }, { "epoch": 0.8165309042071764, "grad_norm": 0.5392491975349736, "learning_rate": 1.019302514193025e-05, "loss": 0.6015, "step": 27967 }, { "epoch": 0.8165601004350238, "grad_norm": 0.5161917153683823, "learning_rate": 1.0191403081914032e-05, "loss": 0.521, "step": 27968 }, { "epoch": 0.8165892966628712, "grad_norm": 0.5357067884263031, "learning_rate": 1.018978102189781e-05, "loss": 0.605, "step": 27969 }, { "epoch": 0.8166184928907185, "grad_norm": 0.5366884040339199, "learning_rate": 1.0188158961881591e-05, "loss": 0.589, "step": 27970 }, { "epoch": 0.8166476891185659, "grad_norm": 0.5629568197663446, "learning_rate": 1.018653690186537e-05, "loss": 0.6386, "step": 27971 }, { "epoch": 0.8166768853464133, "grad_norm": 0.5312486165754621, "learning_rate": 1.0184914841849148e-05, "loss": 0.6166, "step": 27972 }, { "epoch": 0.8167060815742606, "grad_norm": 0.5195195390760626, "learning_rate": 1.018329278183293e-05, "loss": 0.5545, "step": 27973 }, { "epoch": 0.816735277802108, "grad_norm": 0.5175614216124231, "learning_rate": 1.0181670721816708e-05, "loss": 0.6014, "step": 27974 }, { "epoch": 0.8167644740299553, "grad_norm": 0.558457339280468, "learning_rate": 1.0180048661800488e-05, "loss": 0.5927, "step": 27975 }, { "epoch": 0.8167936702578027, "grad_norm": 0.5223028082274427, "learning_rate": 1.0178426601784266e-05, "loss": 0.5671, "step": 27976 }, { "epoch": 0.8168228664856501, "grad_norm": 0.5187921301274986, "learning_rate": 1.0176804541768046e-05, "loss": 0.5827, "step": 27977 }, { "epoch": 0.8168520627134974, "grad_norm": 0.5727054905335024, "learning_rate": 1.0175182481751826e-05, "loss": 0.6176, "step": 27978 }, { "epoch": 0.8168812589413448, "grad_norm": 0.48654532703979714, "learning_rate": 1.0173560421735605e-05, "loss": 0.5132, "step": 27979 }, { "epoch": 0.8169104551691921, "grad_norm": 0.517192882765906, "learning_rate": 1.0171938361719383e-05, "loss": 0.5789, "step": 27980 }, { "epoch": 0.8169396513970395, "grad_norm": 0.5242107586172596, "learning_rate": 1.0170316301703163e-05, "loss": 0.5685, "step": 27981 }, { "epoch": 0.8169688476248869, "grad_norm": 0.5637820814221102, "learning_rate": 1.0168694241686943e-05, "loss": 0.6347, "step": 27982 }, { "epoch": 0.8169980438527342, "grad_norm": 0.5278282804102409, "learning_rate": 1.0167072181670723e-05, "loss": 0.5532, "step": 27983 }, { "epoch": 0.8170272400805816, "grad_norm": 0.5180547986874713, "learning_rate": 1.0165450121654502e-05, "loss": 0.5328, "step": 27984 }, { "epoch": 0.817056436308429, "grad_norm": 0.5198902681244135, "learning_rate": 1.016382806163828e-05, "loss": 0.5881, "step": 27985 }, { "epoch": 0.8170856325362763, "grad_norm": 0.5698484766823917, "learning_rate": 1.016220600162206e-05, "loss": 0.6854, "step": 27986 }, { "epoch": 0.8171148287641237, "grad_norm": 0.521938395186678, "learning_rate": 1.016058394160584e-05, "loss": 0.5991, "step": 27987 }, { "epoch": 0.817144024991971, "grad_norm": 0.5380252390563769, "learning_rate": 1.015896188158962e-05, "loss": 0.5727, "step": 27988 }, { "epoch": 0.8171732212198184, "grad_norm": 0.5445334831933095, "learning_rate": 1.0157339821573399e-05, "loss": 0.5769, "step": 27989 }, { "epoch": 0.8172024174476658, "grad_norm": 0.5113302485340382, "learning_rate": 1.0155717761557177e-05, "loss": 0.5682, "step": 27990 }, { "epoch": 0.8172316136755131, "grad_norm": 0.47958576271946823, "learning_rate": 1.0154095701540957e-05, "loss": 0.515, "step": 27991 }, { "epoch": 0.8172608099033605, "grad_norm": 0.5219806703905177, "learning_rate": 1.0152473641524737e-05, "loss": 0.5493, "step": 27992 }, { "epoch": 0.8172900061312078, "grad_norm": 0.5389570117368586, "learning_rate": 1.0150851581508517e-05, "loss": 0.6056, "step": 27993 }, { "epoch": 0.8173192023590552, "grad_norm": 0.46488579134304725, "learning_rate": 1.0149229521492296e-05, "loss": 0.471, "step": 27994 }, { "epoch": 0.8173483985869026, "grad_norm": 0.5083775021463143, "learning_rate": 1.0147607461476074e-05, "loss": 0.5657, "step": 27995 }, { "epoch": 0.8173775948147499, "grad_norm": 0.5393984805527986, "learning_rate": 1.0145985401459856e-05, "loss": 0.6023, "step": 27996 }, { "epoch": 0.8174067910425973, "grad_norm": 0.5236498317206367, "learning_rate": 1.0144363341443634e-05, "loss": 0.5847, "step": 27997 }, { "epoch": 0.8174359872704446, "grad_norm": 0.5505170800450607, "learning_rate": 1.0142741281427414e-05, "loss": 0.6189, "step": 27998 }, { "epoch": 0.817465183498292, "grad_norm": 0.5196829358899299, "learning_rate": 1.0141119221411193e-05, "loss": 0.5715, "step": 27999 }, { "epoch": 0.8174943797261394, "grad_norm": 0.5105450684742264, "learning_rate": 1.0139497161394971e-05, "loss": 0.5612, "step": 28000 }, { "epoch": 0.8175235759539867, "grad_norm": 0.4991361489163462, "learning_rate": 1.0137875101378753e-05, "loss": 0.5, "step": 28001 }, { "epoch": 0.8175527721818341, "grad_norm": 0.5552593122338882, "learning_rate": 1.0136253041362531e-05, "loss": 0.6504, "step": 28002 }, { "epoch": 0.8175819684096814, "grad_norm": 0.529124043905159, "learning_rate": 1.0134630981346311e-05, "loss": 0.6055, "step": 28003 }, { "epoch": 0.8176111646375288, "grad_norm": 0.5505704442777285, "learning_rate": 1.013300892133009e-05, "loss": 0.6611, "step": 28004 }, { "epoch": 0.8176403608653762, "grad_norm": 0.5278958319341098, "learning_rate": 1.0131386861313868e-05, "loss": 0.5854, "step": 28005 }, { "epoch": 0.8176695570932235, "grad_norm": 0.5294286970618505, "learning_rate": 1.012976480129765e-05, "loss": 0.6232, "step": 28006 }, { "epoch": 0.8176987533210709, "grad_norm": 0.5015112350449794, "learning_rate": 1.0128142741281428e-05, "loss": 0.552, "step": 28007 }, { "epoch": 0.8177279495489183, "grad_norm": 0.527559055536182, "learning_rate": 1.0126520681265208e-05, "loss": 0.5812, "step": 28008 }, { "epoch": 0.8177571457767656, "grad_norm": 0.5337538447530421, "learning_rate": 1.0124898621248986e-05, "loss": 0.6212, "step": 28009 }, { "epoch": 0.817786342004613, "grad_norm": 0.5406140817946704, "learning_rate": 1.0123276561232765e-05, "loss": 0.5914, "step": 28010 }, { "epoch": 0.8178155382324603, "grad_norm": 0.572020621635109, "learning_rate": 1.0121654501216547e-05, "loss": 0.6233, "step": 28011 }, { "epoch": 0.8178447344603077, "grad_norm": 0.5199997859455799, "learning_rate": 1.0120032441200325e-05, "loss": 0.5687, "step": 28012 }, { "epoch": 0.8178739306881551, "grad_norm": 0.5544589163731188, "learning_rate": 1.0118410381184103e-05, "loss": 0.6508, "step": 28013 }, { "epoch": 0.8179031269160024, "grad_norm": 0.5100193274556557, "learning_rate": 1.0116788321167883e-05, "loss": 0.5797, "step": 28014 }, { "epoch": 0.8179323231438498, "grad_norm": 0.5167708806706198, "learning_rate": 1.0115166261151663e-05, "loss": 0.5781, "step": 28015 }, { "epoch": 0.8179615193716971, "grad_norm": 0.5225010600953943, "learning_rate": 1.0113544201135443e-05, "loss": 0.5972, "step": 28016 }, { "epoch": 0.8179907155995445, "grad_norm": 0.4986907540671388, "learning_rate": 1.0111922141119222e-05, "loss": 0.5413, "step": 28017 }, { "epoch": 0.8180199118273919, "grad_norm": 0.5212223212649937, "learning_rate": 1.0110300081103e-05, "loss": 0.5464, "step": 28018 }, { "epoch": 0.8180491080552392, "grad_norm": 0.47740861466644413, "learning_rate": 1.010867802108678e-05, "loss": 0.4889, "step": 28019 }, { "epoch": 0.8180783042830866, "grad_norm": 0.5022932673455767, "learning_rate": 1.010705596107056e-05, "loss": 0.5581, "step": 28020 }, { "epoch": 0.818107500510934, "grad_norm": 0.47902581017539864, "learning_rate": 1.010543390105434e-05, "loss": 0.4946, "step": 28021 }, { "epoch": 0.8181366967387813, "grad_norm": 0.4987339079260258, "learning_rate": 1.0103811841038119e-05, "loss": 0.5454, "step": 28022 }, { "epoch": 0.8181658929666287, "grad_norm": 0.48040936607314555, "learning_rate": 1.0102189781021897e-05, "loss": 0.4802, "step": 28023 }, { "epoch": 0.818195089194476, "grad_norm": 0.5226830527895365, "learning_rate": 1.0100567721005677e-05, "loss": 0.5695, "step": 28024 }, { "epoch": 0.8182242854223234, "grad_norm": 0.5273044093818468, "learning_rate": 1.0098945660989457e-05, "loss": 0.5732, "step": 28025 }, { "epoch": 0.8182534816501708, "grad_norm": 0.4969715582668978, "learning_rate": 1.0097323600973237e-05, "loss": 0.5479, "step": 28026 }, { "epoch": 0.8182826778780181, "grad_norm": 0.5469564481329908, "learning_rate": 1.0095701540957016e-05, "loss": 0.6344, "step": 28027 }, { "epoch": 0.8183118741058655, "grad_norm": 0.5208026383160694, "learning_rate": 1.0094079480940794e-05, "loss": 0.5745, "step": 28028 }, { "epoch": 0.8183410703337128, "grad_norm": 0.5648152772173985, "learning_rate": 1.0092457420924576e-05, "loss": 0.6599, "step": 28029 }, { "epoch": 0.8183702665615602, "grad_norm": 0.5543603995004373, "learning_rate": 1.0090835360908354e-05, "loss": 0.6442, "step": 28030 }, { "epoch": 0.8183994627894076, "grad_norm": 0.5738573333621523, "learning_rate": 1.0089213300892134e-05, "loss": 0.6666, "step": 28031 }, { "epoch": 0.8184286590172549, "grad_norm": 0.5412817611811535, "learning_rate": 1.0087591240875913e-05, "loss": 0.5859, "step": 28032 }, { "epoch": 0.8184578552451023, "grad_norm": 0.5804024052830057, "learning_rate": 1.0085969180859691e-05, "loss": 0.6419, "step": 28033 }, { "epoch": 0.8184870514729496, "grad_norm": 0.5419474536797863, "learning_rate": 1.0084347120843473e-05, "loss": 0.6044, "step": 28034 }, { "epoch": 0.818516247700797, "grad_norm": 0.5485181857783781, "learning_rate": 1.0082725060827251e-05, "loss": 0.5993, "step": 28035 }, { "epoch": 0.8185454439286444, "grad_norm": 0.5504344156379875, "learning_rate": 1.0081103000811031e-05, "loss": 0.6113, "step": 28036 }, { "epoch": 0.8185746401564917, "grad_norm": 0.5336360313245766, "learning_rate": 1.007948094079481e-05, "loss": 0.5953, "step": 28037 }, { "epoch": 0.8186038363843391, "grad_norm": 0.5108777954614452, "learning_rate": 1.0077858880778588e-05, "loss": 0.5967, "step": 28038 }, { "epoch": 0.8186330326121865, "grad_norm": 0.5220718281738108, "learning_rate": 1.007623682076237e-05, "loss": 0.5672, "step": 28039 }, { "epoch": 0.8186622288400339, "grad_norm": 0.5039649444277308, "learning_rate": 1.0074614760746148e-05, "loss": 0.5572, "step": 28040 }, { "epoch": 0.8186914250678813, "grad_norm": 0.5601735355892605, "learning_rate": 1.0072992700729927e-05, "loss": 0.6329, "step": 28041 }, { "epoch": 0.8187206212957286, "grad_norm": 0.5364956317623018, "learning_rate": 1.0071370640713707e-05, "loss": 0.6248, "step": 28042 }, { "epoch": 0.818749817523576, "grad_norm": 0.5134472132101834, "learning_rate": 1.0069748580697485e-05, "loss": 0.6006, "step": 28043 }, { "epoch": 0.8187790137514234, "grad_norm": 0.5437210200817327, "learning_rate": 1.0068126520681267e-05, "loss": 0.5987, "step": 28044 }, { "epoch": 0.8188082099792707, "grad_norm": 0.5274117285412204, "learning_rate": 1.0066504460665045e-05, "loss": 0.6055, "step": 28045 }, { "epoch": 0.8188374062071181, "grad_norm": 0.5455496417452239, "learning_rate": 1.0064882400648824e-05, "loss": 0.6288, "step": 28046 }, { "epoch": 0.8188666024349655, "grad_norm": 0.5115967241046198, "learning_rate": 1.0063260340632604e-05, "loss": 0.5718, "step": 28047 }, { "epoch": 0.8188957986628128, "grad_norm": 0.5398185749064246, "learning_rate": 1.0061638280616384e-05, "loss": 0.6163, "step": 28048 }, { "epoch": 0.8189249948906602, "grad_norm": 0.5604251006162445, "learning_rate": 1.0060016220600164e-05, "loss": 0.6532, "step": 28049 }, { "epoch": 0.8189541911185075, "grad_norm": 0.5341142541485474, "learning_rate": 1.0058394160583942e-05, "loss": 0.6072, "step": 28050 }, { "epoch": 0.8189833873463549, "grad_norm": 0.5214716113294113, "learning_rate": 1.005677210056772e-05, "loss": 0.5879, "step": 28051 }, { "epoch": 0.8190125835742023, "grad_norm": 0.5333261637265939, "learning_rate": 1.00551500405515e-05, "loss": 0.6155, "step": 28052 }, { "epoch": 0.8190417798020496, "grad_norm": 0.515495836454485, "learning_rate": 1.005352798053528e-05, "loss": 0.5848, "step": 28053 }, { "epoch": 0.819070976029897, "grad_norm": 0.5613569397098884, "learning_rate": 1.005190592051906e-05, "loss": 0.6916, "step": 28054 }, { "epoch": 0.8191001722577443, "grad_norm": 0.5195116984549228, "learning_rate": 1.0050283860502839e-05, "loss": 0.5757, "step": 28055 }, { "epoch": 0.8191293684855917, "grad_norm": 0.5541846904208311, "learning_rate": 1.0048661800486617e-05, "loss": 0.6447, "step": 28056 }, { "epoch": 0.8191585647134391, "grad_norm": 0.5692706753352725, "learning_rate": 1.0047039740470397e-05, "loss": 0.6754, "step": 28057 }, { "epoch": 0.8191877609412864, "grad_norm": 0.48766271929724764, "learning_rate": 1.0045417680454178e-05, "loss": 0.5338, "step": 28058 }, { "epoch": 0.8192169571691338, "grad_norm": 0.5494475481963736, "learning_rate": 1.0043795620437958e-05, "loss": 0.6274, "step": 28059 }, { "epoch": 0.8192461533969811, "grad_norm": 0.540790421771603, "learning_rate": 1.0042173560421736e-05, "loss": 0.6071, "step": 28060 }, { "epoch": 0.8192753496248285, "grad_norm": 0.5503848801317561, "learning_rate": 1.0040551500405514e-05, "loss": 0.6053, "step": 28061 }, { "epoch": 0.8193045458526759, "grad_norm": 0.5166101333785303, "learning_rate": 1.0038929440389296e-05, "loss": 0.5446, "step": 28062 }, { "epoch": 0.8193337420805232, "grad_norm": 0.5252329403897007, "learning_rate": 1.0037307380373075e-05, "loss": 0.5654, "step": 28063 }, { "epoch": 0.8193629383083706, "grad_norm": 0.501034141889194, "learning_rate": 1.0035685320356855e-05, "loss": 0.4811, "step": 28064 }, { "epoch": 0.819392134536218, "grad_norm": 0.5206446605872102, "learning_rate": 1.0034063260340633e-05, "loss": 0.5379, "step": 28065 }, { "epoch": 0.8194213307640653, "grad_norm": 0.529867394979854, "learning_rate": 1.0032441200324411e-05, "loss": 0.5727, "step": 28066 }, { "epoch": 0.8194505269919127, "grad_norm": 0.5525309067426472, "learning_rate": 1.0030819140308193e-05, "loss": 0.6286, "step": 28067 }, { "epoch": 0.81947972321976, "grad_norm": 0.5180072822237363, "learning_rate": 1.0029197080291971e-05, "loss": 0.5863, "step": 28068 }, { "epoch": 0.8195089194476074, "grad_norm": 0.46876764105304164, "learning_rate": 1.0027575020275752e-05, "loss": 0.4871, "step": 28069 }, { "epoch": 0.8195381156754548, "grad_norm": 0.5069936401003843, "learning_rate": 1.002595296025953e-05, "loss": 0.5358, "step": 28070 }, { "epoch": 0.8195673119033021, "grad_norm": 0.48828997096231574, "learning_rate": 1.0024330900243308e-05, "loss": 0.5132, "step": 28071 }, { "epoch": 0.8195965081311495, "grad_norm": 0.5487005196364909, "learning_rate": 1.002270884022709e-05, "loss": 0.6112, "step": 28072 }, { "epoch": 0.8196257043589968, "grad_norm": 0.5291287516384071, "learning_rate": 1.0021086780210868e-05, "loss": 0.5523, "step": 28073 }, { "epoch": 0.8196549005868442, "grad_norm": 0.5467388919604492, "learning_rate": 1.0019464720194647e-05, "loss": 0.6404, "step": 28074 }, { "epoch": 0.8196840968146916, "grad_norm": 0.5494823197581267, "learning_rate": 1.0017842660178427e-05, "loss": 0.6392, "step": 28075 }, { "epoch": 0.8197132930425389, "grad_norm": 0.5020568290644299, "learning_rate": 1.0016220600162205e-05, "loss": 0.5433, "step": 28076 }, { "epoch": 0.8197424892703863, "grad_norm": 0.5656467717364554, "learning_rate": 1.0014598540145987e-05, "loss": 0.6985, "step": 28077 }, { "epoch": 0.8197716854982336, "grad_norm": 0.5545431116601862, "learning_rate": 1.0012976480129765e-05, "loss": 0.5738, "step": 28078 }, { "epoch": 0.819800881726081, "grad_norm": 0.524193946746127, "learning_rate": 1.0011354420113544e-05, "loss": 0.6167, "step": 28079 }, { "epoch": 0.8198300779539284, "grad_norm": 0.5185602129577703, "learning_rate": 1.0009732360097324e-05, "loss": 0.5962, "step": 28080 }, { "epoch": 0.8198592741817757, "grad_norm": 0.5695619768475816, "learning_rate": 1.0008110300081104e-05, "loss": 0.6238, "step": 28081 }, { "epoch": 0.8198884704096231, "grad_norm": 0.5172119609288609, "learning_rate": 1.0006488240064884e-05, "loss": 0.558, "step": 28082 }, { "epoch": 0.8199176666374705, "grad_norm": 0.48815415890476005, "learning_rate": 1.0004866180048662e-05, "loss": 0.5258, "step": 28083 }, { "epoch": 0.8199468628653178, "grad_norm": 0.517205429241017, "learning_rate": 1.000324412003244e-05, "loss": 0.574, "step": 28084 }, { "epoch": 0.8199760590931652, "grad_norm": 0.5077116178565951, "learning_rate": 1.000162206001622e-05, "loss": 0.542, "step": 28085 }, { "epoch": 0.8200052553210125, "grad_norm": 0.554252202398459, "learning_rate": 1e-05, "loss": 0.6305, "step": 28086 }, { "epoch": 0.8200344515488599, "grad_norm": 0.5499083848875147, "learning_rate": 9.998377939983781e-06, "loss": 0.5964, "step": 28087 }, { "epoch": 0.8200636477767073, "grad_norm": 0.5429505836423033, "learning_rate": 9.99675587996756e-06, "loss": 0.6222, "step": 28088 }, { "epoch": 0.8200928440045546, "grad_norm": 0.5570189966558332, "learning_rate": 9.995133819951338e-06, "loss": 0.6544, "step": 28089 }, { "epoch": 0.820122040232402, "grad_norm": 0.522103548459458, "learning_rate": 9.993511759935118e-06, "loss": 0.601, "step": 28090 }, { "epoch": 0.8201512364602493, "grad_norm": 0.5614286081351206, "learning_rate": 9.991889699918898e-06, "loss": 0.6485, "step": 28091 }, { "epoch": 0.8201804326880967, "grad_norm": 0.519176909814112, "learning_rate": 9.990267639902678e-06, "loss": 0.5759, "step": 28092 }, { "epoch": 0.8202096289159441, "grad_norm": 0.5305595994866336, "learning_rate": 9.988645579886456e-06, "loss": 0.5974, "step": 28093 }, { "epoch": 0.8202388251437914, "grad_norm": 0.5257984316189391, "learning_rate": 9.987023519870235e-06, "loss": 0.6109, "step": 28094 }, { "epoch": 0.8202680213716388, "grad_norm": 0.48352364562429595, "learning_rate": 9.985401459854016e-06, "loss": 0.5358, "step": 28095 }, { "epoch": 0.8202972175994862, "grad_norm": 0.4593670189581119, "learning_rate": 9.983779399837795e-06, "loss": 0.4404, "step": 28096 }, { "epoch": 0.8203264138273335, "grad_norm": 0.513081293516768, "learning_rate": 9.982157339821575e-06, "loss": 0.5495, "step": 28097 }, { "epoch": 0.8203556100551809, "grad_norm": 0.4871972407871308, "learning_rate": 9.980535279805353e-06, "loss": 0.5072, "step": 28098 }, { "epoch": 0.8203848062830282, "grad_norm": 0.5381665185617389, "learning_rate": 9.978913219789132e-06, "loss": 0.5967, "step": 28099 }, { "epoch": 0.8204140025108756, "grad_norm": 0.5517967925724824, "learning_rate": 9.977291159772913e-06, "loss": 0.595, "step": 28100 }, { "epoch": 0.820443198738723, "grad_norm": 0.5118741260481833, "learning_rate": 9.975669099756692e-06, "loss": 0.5582, "step": 28101 }, { "epoch": 0.8204723949665703, "grad_norm": 0.5067769524347444, "learning_rate": 9.97404703974047e-06, "loss": 0.5195, "step": 28102 }, { "epoch": 0.8205015911944177, "grad_norm": 0.49237869161940206, "learning_rate": 9.97242497972425e-06, "loss": 0.5211, "step": 28103 }, { "epoch": 0.820530787422265, "grad_norm": 0.5063804834614114, "learning_rate": 9.970802919708028e-06, "loss": 0.5592, "step": 28104 }, { "epoch": 0.8205599836501124, "grad_norm": 0.5217862229610752, "learning_rate": 9.96918085969181e-06, "loss": 0.5792, "step": 28105 }, { "epoch": 0.8205891798779598, "grad_norm": 0.5561080427284616, "learning_rate": 9.967558799675589e-06, "loss": 0.6432, "step": 28106 }, { "epoch": 0.8206183761058071, "grad_norm": 0.539157858151967, "learning_rate": 9.965936739659367e-06, "loss": 0.6254, "step": 28107 }, { "epoch": 0.8206475723336545, "grad_norm": 0.500828652744147, "learning_rate": 9.964314679643147e-06, "loss": 0.5328, "step": 28108 }, { "epoch": 0.8206767685615018, "grad_norm": 0.5135312376589058, "learning_rate": 9.962692619626925e-06, "loss": 0.5535, "step": 28109 }, { "epoch": 0.8207059647893492, "grad_norm": 0.541020963824964, "learning_rate": 9.961070559610707e-06, "loss": 0.6232, "step": 28110 }, { "epoch": 0.8207351610171966, "grad_norm": 0.511311975623401, "learning_rate": 9.959448499594486e-06, "loss": 0.5975, "step": 28111 }, { "epoch": 0.8207643572450439, "grad_norm": 0.5585541201227012, "learning_rate": 9.957826439578264e-06, "loss": 0.6325, "step": 28112 }, { "epoch": 0.8207935534728913, "grad_norm": 0.4901012636151777, "learning_rate": 9.956204379562044e-06, "loss": 0.5245, "step": 28113 }, { "epoch": 0.8208227497007387, "grad_norm": 0.5084627575005898, "learning_rate": 9.954582319545824e-06, "loss": 0.574, "step": 28114 }, { "epoch": 0.820851945928586, "grad_norm": 0.5653545558221652, "learning_rate": 9.952960259529604e-06, "loss": 0.6394, "step": 28115 }, { "epoch": 0.8208811421564334, "grad_norm": 0.5311284535897266, "learning_rate": 9.951338199513383e-06, "loss": 0.5678, "step": 28116 }, { "epoch": 0.8209103383842807, "grad_norm": 0.5368153214392261, "learning_rate": 9.949716139497161e-06, "loss": 0.5767, "step": 28117 }, { "epoch": 0.8209395346121281, "grad_norm": 0.5600187965316824, "learning_rate": 9.948094079480941e-06, "loss": 0.5822, "step": 28118 }, { "epoch": 0.8209687308399755, "grad_norm": 0.5354867480429601, "learning_rate": 9.946472019464721e-06, "loss": 0.613, "step": 28119 }, { "epoch": 0.8209979270678228, "grad_norm": 0.48396769588549376, "learning_rate": 9.944849959448501e-06, "loss": 0.528, "step": 28120 }, { "epoch": 0.8210271232956702, "grad_norm": 0.5256954055765077, "learning_rate": 9.94322789943228e-06, "loss": 0.6064, "step": 28121 }, { "epoch": 0.8210563195235175, "grad_norm": 0.5408996888029872, "learning_rate": 9.941605839416058e-06, "loss": 0.626, "step": 28122 }, { "epoch": 0.8210855157513649, "grad_norm": 0.5330336474027628, "learning_rate": 9.939983779399838e-06, "loss": 0.5688, "step": 28123 }, { "epoch": 0.8211147119792123, "grad_norm": 0.4953158773968173, "learning_rate": 9.938361719383618e-06, "loss": 0.5274, "step": 28124 }, { "epoch": 0.8211439082070596, "grad_norm": 0.5552135796460642, "learning_rate": 9.936739659367398e-06, "loss": 0.6233, "step": 28125 }, { "epoch": 0.821173104434907, "grad_norm": 0.5424320932444219, "learning_rate": 9.935117599351176e-06, "loss": 0.6134, "step": 28126 }, { "epoch": 0.8212023006627543, "grad_norm": 0.5010440768992486, "learning_rate": 9.933495539334955e-06, "loss": 0.5138, "step": 28127 }, { "epoch": 0.8212314968906017, "grad_norm": 0.5248889790874156, "learning_rate": 9.931873479318737e-06, "loss": 0.5486, "step": 28128 }, { "epoch": 0.8212606931184491, "grad_norm": 0.5465408298973592, "learning_rate": 9.930251419302515e-06, "loss": 0.6408, "step": 28129 }, { "epoch": 0.8212898893462964, "grad_norm": 0.5757519776062198, "learning_rate": 9.928629359286295e-06, "loss": 0.6893, "step": 28130 }, { "epoch": 0.8213190855741438, "grad_norm": 0.5036285352745679, "learning_rate": 9.927007299270073e-06, "loss": 0.5433, "step": 28131 }, { "epoch": 0.8213482818019912, "grad_norm": 0.5238944199813647, "learning_rate": 9.925385239253852e-06, "loss": 0.595, "step": 28132 }, { "epoch": 0.8213774780298385, "grad_norm": 0.5093792701825288, "learning_rate": 9.923763179237634e-06, "loss": 0.5557, "step": 28133 }, { "epoch": 0.8214066742576859, "grad_norm": 0.5352527536053866, "learning_rate": 9.922141119221412e-06, "loss": 0.5823, "step": 28134 }, { "epoch": 0.8214358704855332, "grad_norm": 0.5417402315055446, "learning_rate": 9.92051905920519e-06, "loss": 0.6146, "step": 28135 }, { "epoch": 0.8214650667133806, "grad_norm": 0.5255335720326587, "learning_rate": 9.91889699918897e-06, "loss": 0.5887, "step": 28136 }, { "epoch": 0.821494262941228, "grad_norm": 0.5281122060780692, "learning_rate": 9.917274939172749e-06, "loss": 0.5962, "step": 28137 }, { "epoch": 0.8215234591690753, "grad_norm": 0.46532369694502795, "learning_rate": 9.91565287915653e-06, "loss": 0.4876, "step": 28138 }, { "epoch": 0.8215526553969227, "grad_norm": 0.5420222229388506, "learning_rate": 9.914030819140309e-06, "loss": 0.6276, "step": 28139 }, { "epoch": 0.82158185162477, "grad_norm": 0.5263084073971005, "learning_rate": 9.912408759124087e-06, "loss": 0.5876, "step": 28140 }, { "epoch": 0.8216110478526174, "grad_norm": 0.48734094075183987, "learning_rate": 9.910786699107867e-06, "loss": 0.4679, "step": 28141 }, { "epoch": 0.8216402440804648, "grad_norm": 0.53275145065072, "learning_rate": 9.909164639091646e-06, "loss": 0.6019, "step": 28142 }, { "epoch": 0.8216694403083121, "grad_norm": 0.5067176081806125, "learning_rate": 9.907542579075427e-06, "loss": 0.5734, "step": 28143 }, { "epoch": 0.8216986365361595, "grad_norm": 0.49400223874577776, "learning_rate": 9.905920519059206e-06, "loss": 0.5236, "step": 28144 }, { "epoch": 0.8217278327640068, "grad_norm": 0.5164197884242437, "learning_rate": 9.904298459042984e-06, "loss": 0.6003, "step": 28145 }, { "epoch": 0.8217570289918542, "grad_norm": 0.5278317865382309, "learning_rate": 9.902676399026764e-06, "loss": 0.5667, "step": 28146 }, { "epoch": 0.8217862252197016, "grad_norm": 0.4903620102061758, "learning_rate": 9.901054339010544e-06, "loss": 0.4977, "step": 28147 }, { "epoch": 0.8218154214475489, "grad_norm": 0.49923165863023167, "learning_rate": 9.899432278994324e-06, "loss": 0.5333, "step": 28148 }, { "epoch": 0.8218446176753963, "grad_norm": 0.49443639743388956, "learning_rate": 9.897810218978103e-06, "loss": 0.5396, "step": 28149 }, { "epoch": 0.8218738139032437, "grad_norm": 0.5232969231561432, "learning_rate": 9.896188158961881e-06, "loss": 0.6109, "step": 28150 }, { "epoch": 0.821903010131091, "grad_norm": 0.5097858721210662, "learning_rate": 9.894566098945661e-06, "loss": 0.5683, "step": 28151 }, { "epoch": 0.8219322063589384, "grad_norm": 0.5257569597533174, "learning_rate": 9.892944038929441e-06, "loss": 0.6054, "step": 28152 }, { "epoch": 0.8219614025867857, "grad_norm": 0.5139227179522952, "learning_rate": 9.891321978913221e-06, "loss": 0.5802, "step": 28153 }, { "epoch": 0.8219905988146331, "grad_norm": 0.5054116603212212, "learning_rate": 9.889699918897e-06, "loss": 0.5682, "step": 28154 }, { "epoch": 0.8220197950424805, "grad_norm": 0.5651138538859962, "learning_rate": 9.888077858880778e-06, "loss": 0.595, "step": 28155 }, { "epoch": 0.8220489912703278, "grad_norm": 0.5261829148249478, "learning_rate": 9.886455798864558e-06, "loss": 0.5698, "step": 28156 }, { "epoch": 0.8220781874981752, "grad_norm": 0.519205352505525, "learning_rate": 9.884833738848338e-06, "loss": 0.517, "step": 28157 }, { "epoch": 0.8221073837260225, "grad_norm": 0.5402577775762082, "learning_rate": 9.883211678832118e-06, "loss": 0.6183, "step": 28158 }, { "epoch": 0.8221365799538699, "grad_norm": 0.5190294767334285, "learning_rate": 9.881589618815897e-06, "loss": 0.5944, "step": 28159 }, { "epoch": 0.8221657761817173, "grad_norm": 0.5009761144201857, "learning_rate": 9.879967558799675e-06, "loss": 0.5426, "step": 28160 }, { "epoch": 0.8221949724095647, "grad_norm": 0.5041227137411907, "learning_rate": 9.878345498783455e-06, "loss": 0.5182, "step": 28161 }, { "epoch": 0.8222241686374121, "grad_norm": 0.5484518123911812, "learning_rate": 9.876723438767235e-06, "loss": 0.6015, "step": 28162 }, { "epoch": 0.8222533648652595, "grad_norm": 0.4824507912995697, "learning_rate": 9.875101378751014e-06, "loss": 0.5145, "step": 28163 }, { "epoch": 0.8222825610931068, "grad_norm": 0.49587589501452123, "learning_rate": 9.873479318734794e-06, "loss": 0.5272, "step": 28164 }, { "epoch": 0.8223117573209542, "grad_norm": 0.5003810281825738, "learning_rate": 9.871857258718572e-06, "loss": 0.5378, "step": 28165 }, { "epoch": 0.8223409535488015, "grad_norm": 0.546691173980928, "learning_rate": 9.870235198702354e-06, "loss": 0.6109, "step": 28166 }, { "epoch": 0.8223701497766489, "grad_norm": 0.5029554172211341, "learning_rate": 9.868613138686132e-06, "loss": 0.5369, "step": 28167 }, { "epoch": 0.8223993460044963, "grad_norm": 0.48974870981649865, "learning_rate": 9.86699107866991e-06, "loss": 0.5083, "step": 28168 }, { "epoch": 0.8224285422323436, "grad_norm": 0.5358646433536525, "learning_rate": 9.86536901865369e-06, "loss": 0.6156, "step": 28169 }, { "epoch": 0.822457738460191, "grad_norm": 0.4989298922818306, "learning_rate": 9.863746958637469e-06, "loss": 0.5309, "step": 28170 }, { "epoch": 0.8224869346880384, "grad_norm": 0.5204335934349508, "learning_rate": 9.86212489862125e-06, "loss": 0.5973, "step": 28171 }, { "epoch": 0.8225161309158857, "grad_norm": 0.5621907905632066, "learning_rate": 9.860502838605029e-06, "loss": 0.6804, "step": 28172 }, { "epoch": 0.8225453271437331, "grad_norm": 0.4989281579636524, "learning_rate": 9.858880778588807e-06, "loss": 0.5396, "step": 28173 }, { "epoch": 0.8225745233715804, "grad_norm": 0.5487329292340486, "learning_rate": 9.857258718572588e-06, "loss": 0.6224, "step": 28174 }, { "epoch": 0.8226037195994278, "grad_norm": 0.5449538727886479, "learning_rate": 9.855636658556366e-06, "loss": 0.6041, "step": 28175 }, { "epoch": 0.8226329158272752, "grad_norm": 0.5145597157328315, "learning_rate": 9.854014598540148e-06, "loss": 0.5762, "step": 28176 }, { "epoch": 0.8226621120551225, "grad_norm": 0.539045205605516, "learning_rate": 9.852392538523926e-06, "loss": 0.6031, "step": 28177 }, { "epoch": 0.8226913082829699, "grad_norm": 0.5107975455114717, "learning_rate": 9.850770478507704e-06, "loss": 0.5368, "step": 28178 }, { "epoch": 0.8227205045108172, "grad_norm": 0.5660323359342659, "learning_rate": 9.849148418491484e-06, "loss": 0.6581, "step": 28179 }, { "epoch": 0.8227497007386646, "grad_norm": 0.5891906255203705, "learning_rate": 9.847526358475265e-06, "loss": 0.6669, "step": 28180 }, { "epoch": 0.822778896966512, "grad_norm": 0.5347576486089297, "learning_rate": 9.845904298459045e-06, "loss": 0.6226, "step": 28181 }, { "epoch": 0.8228080931943593, "grad_norm": 0.528632567043828, "learning_rate": 9.844282238442823e-06, "loss": 0.5672, "step": 28182 }, { "epoch": 0.8228372894222067, "grad_norm": 0.5075383555391898, "learning_rate": 9.842660178426601e-06, "loss": 0.559, "step": 28183 }, { "epoch": 0.822866485650054, "grad_norm": 0.5269815591516412, "learning_rate": 9.841038118410381e-06, "loss": 0.5923, "step": 28184 }, { "epoch": 0.8228956818779014, "grad_norm": 0.5447170763349912, "learning_rate": 9.839416058394161e-06, "loss": 0.6401, "step": 28185 }, { "epoch": 0.8229248781057488, "grad_norm": 0.5208204153579601, "learning_rate": 9.837793998377942e-06, "loss": 0.5936, "step": 28186 }, { "epoch": 0.8229540743335961, "grad_norm": 0.496412392113678, "learning_rate": 9.83617193836172e-06, "loss": 0.5681, "step": 28187 }, { "epoch": 0.8229832705614435, "grad_norm": 0.5128962258337978, "learning_rate": 9.834549878345498e-06, "loss": 0.5684, "step": 28188 }, { "epoch": 0.8230124667892909, "grad_norm": 0.5150221601340939, "learning_rate": 9.832927818329278e-06, "loss": 0.5876, "step": 28189 }, { "epoch": 0.8230416630171382, "grad_norm": 0.5419439644904595, "learning_rate": 9.831305758313058e-06, "loss": 0.6373, "step": 28190 }, { "epoch": 0.8230708592449856, "grad_norm": 0.5377625681661472, "learning_rate": 9.829683698296839e-06, "loss": 0.5579, "step": 28191 }, { "epoch": 0.8231000554728329, "grad_norm": 0.5213607726683088, "learning_rate": 9.828061638280617e-06, "loss": 0.5593, "step": 28192 }, { "epoch": 0.8231292517006803, "grad_norm": 0.5342634703567009, "learning_rate": 9.826439578264395e-06, "loss": 0.5674, "step": 28193 }, { "epoch": 0.8231584479285277, "grad_norm": 0.5348792978477361, "learning_rate": 9.824817518248175e-06, "loss": 0.5531, "step": 28194 }, { "epoch": 0.823187644156375, "grad_norm": 0.517358219939348, "learning_rate": 9.823195458231955e-06, "loss": 0.5555, "step": 28195 }, { "epoch": 0.8232168403842224, "grad_norm": 0.5433880112292688, "learning_rate": 9.821573398215734e-06, "loss": 0.5989, "step": 28196 }, { "epoch": 0.8232460366120697, "grad_norm": 0.5234111174982903, "learning_rate": 9.819951338199514e-06, "loss": 0.5283, "step": 28197 }, { "epoch": 0.8232752328399171, "grad_norm": 0.5080161522600931, "learning_rate": 9.818329278183292e-06, "loss": 0.5491, "step": 28198 }, { "epoch": 0.8233044290677645, "grad_norm": 0.5339441464137743, "learning_rate": 9.816707218167074e-06, "loss": 0.4949, "step": 28199 }, { "epoch": 0.8233336252956118, "grad_norm": 0.559727054194872, "learning_rate": 9.815085158150852e-06, "loss": 0.6578, "step": 28200 }, { "epoch": 0.8233628215234592, "grad_norm": 0.5067594234329759, "learning_rate": 9.81346309813463e-06, "loss": 0.516, "step": 28201 }, { "epoch": 0.8233920177513065, "grad_norm": 0.5322456230546028, "learning_rate": 9.81184103811841e-06, "loss": 0.597, "step": 28202 }, { "epoch": 0.8234212139791539, "grad_norm": 0.5271959978893204, "learning_rate": 9.810218978102189e-06, "loss": 0.576, "step": 28203 }, { "epoch": 0.8234504102070013, "grad_norm": 0.532920111064011, "learning_rate": 9.808596918085971e-06, "loss": 0.5879, "step": 28204 }, { "epoch": 0.8234796064348486, "grad_norm": 0.5473753462962496, "learning_rate": 9.80697485806975e-06, "loss": 0.6104, "step": 28205 }, { "epoch": 0.823508802662696, "grad_norm": 0.49773425286239226, "learning_rate": 9.805352798053528e-06, "loss": 0.5388, "step": 28206 }, { "epoch": 0.8235379988905434, "grad_norm": 0.5378596245478288, "learning_rate": 9.803730738037308e-06, "loss": 0.5789, "step": 28207 }, { "epoch": 0.8235671951183907, "grad_norm": 0.5279245185782218, "learning_rate": 9.802108678021086e-06, "loss": 0.593, "step": 28208 }, { "epoch": 0.8235963913462381, "grad_norm": 0.4944142785523992, "learning_rate": 9.800486618004868e-06, "loss": 0.5268, "step": 28209 }, { "epoch": 0.8236255875740854, "grad_norm": 0.4874736302159175, "learning_rate": 9.798864557988646e-06, "loss": 0.5138, "step": 28210 }, { "epoch": 0.8236547838019328, "grad_norm": 0.6341508388022108, "learning_rate": 9.797242497972425e-06, "loss": 0.6391, "step": 28211 }, { "epoch": 0.8236839800297802, "grad_norm": 0.4780831415261212, "learning_rate": 9.795620437956205e-06, "loss": 0.4912, "step": 28212 }, { "epoch": 0.8237131762576275, "grad_norm": 0.5731778822564755, "learning_rate": 9.793998377939985e-06, "loss": 0.603, "step": 28213 }, { "epoch": 0.8237423724854749, "grad_norm": 0.5358721503761698, "learning_rate": 9.792376317923765e-06, "loss": 0.582, "step": 28214 }, { "epoch": 0.8237715687133222, "grad_norm": 0.5354280276601472, "learning_rate": 9.790754257907543e-06, "loss": 0.5848, "step": 28215 }, { "epoch": 0.8238007649411696, "grad_norm": 0.5539848495464518, "learning_rate": 9.789132197891322e-06, "loss": 0.6202, "step": 28216 }, { "epoch": 0.823829961169017, "grad_norm": 0.6511165186238697, "learning_rate": 9.787510137875102e-06, "loss": 0.5257, "step": 28217 }, { "epoch": 0.8238591573968643, "grad_norm": 0.5192635108872907, "learning_rate": 9.785888077858882e-06, "loss": 0.5969, "step": 28218 }, { "epoch": 0.8238883536247117, "grad_norm": 0.537452313760864, "learning_rate": 9.784266017842662e-06, "loss": 0.5908, "step": 28219 }, { "epoch": 0.823917549852559, "grad_norm": 0.5331427582035263, "learning_rate": 9.78264395782644e-06, "loss": 0.5553, "step": 28220 }, { "epoch": 0.8239467460804064, "grad_norm": 0.514457352008066, "learning_rate": 9.781021897810219e-06, "loss": 0.5798, "step": 28221 }, { "epoch": 0.8239759423082538, "grad_norm": 0.4778680270773148, "learning_rate": 9.779399837793999e-06, "loss": 0.509, "step": 28222 }, { "epoch": 0.8240051385361011, "grad_norm": 0.5127653822893118, "learning_rate": 9.777777777777779e-06, "loss": 0.5431, "step": 28223 }, { "epoch": 0.8240343347639485, "grad_norm": 0.5004183113536498, "learning_rate": 9.776155717761557e-06, "loss": 0.5313, "step": 28224 }, { "epoch": 0.8240635309917959, "grad_norm": 0.5113465244025408, "learning_rate": 9.774533657745337e-06, "loss": 0.5808, "step": 28225 }, { "epoch": 0.8240927272196432, "grad_norm": 0.5167288790527266, "learning_rate": 9.772911597729115e-06, "loss": 0.5472, "step": 28226 }, { "epoch": 0.8241219234474906, "grad_norm": 0.55311390251724, "learning_rate": 9.771289537712896e-06, "loss": 0.6515, "step": 28227 }, { "epoch": 0.8241511196753379, "grad_norm": 0.5049194724277045, "learning_rate": 9.769667477696676e-06, "loss": 0.5761, "step": 28228 }, { "epoch": 0.8241803159031853, "grad_norm": 0.4958438391832344, "learning_rate": 9.768045417680454e-06, "loss": 0.5739, "step": 28229 }, { "epoch": 0.8242095121310327, "grad_norm": 0.5529648539044503, "learning_rate": 9.766423357664234e-06, "loss": 0.6485, "step": 28230 }, { "epoch": 0.82423870835888, "grad_norm": 0.5241427225464841, "learning_rate": 9.764801297648012e-06, "loss": 0.6127, "step": 28231 }, { "epoch": 0.8242679045867274, "grad_norm": 0.4969952055693524, "learning_rate": 9.763179237631794e-06, "loss": 0.5602, "step": 28232 }, { "epoch": 0.8242971008145747, "grad_norm": 0.5119230586256235, "learning_rate": 9.761557177615573e-06, "loss": 0.5845, "step": 28233 }, { "epoch": 0.8243262970424221, "grad_norm": 0.5350533375800833, "learning_rate": 9.759935117599351e-06, "loss": 0.5834, "step": 28234 }, { "epoch": 0.8243554932702695, "grad_norm": 0.5303323287298818, "learning_rate": 9.758313057583131e-06, "loss": 0.6104, "step": 28235 }, { "epoch": 0.8243846894981168, "grad_norm": 0.5394249548633087, "learning_rate": 9.75669099756691e-06, "loss": 0.5994, "step": 28236 }, { "epoch": 0.8244138857259642, "grad_norm": 0.481743324686111, "learning_rate": 9.755068937550691e-06, "loss": 0.5179, "step": 28237 }, { "epoch": 0.8244430819538116, "grad_norm": 0.5568743408603539, "learning_rate": 9.75344687753447e-06, "loss": 0.6147, "step": 28238 }, { "epoch": 0.8244722781816589, "grad_norm": 0.5230524187319939, "learning_rate": 9.751824817518248e-06, "loss": 0.5861, "step": 28239 }, { "epoch": 0.8245014744095063, "grad_norm": 0.5359580662941964, "learning_rate": 9.750202757502028e-06, "loss": 0.6411, "step": 28240 }, { "epoch": 0.8245306706373536, "grad_norm": 0.5123269564130727, "learning_rate": 9.748580697485806e-06, "loss": 0.5381, "step": 28241 }, { "epoch": 0.824559866865201, "grad_norm": 0.520799215095442, "learning_rate": 9.746958637469588e-06, "loss": 0.5849, "step": 28242 }, { "epoch": 0.8245890630930484, "grad_norm": 0.5055939388367356, "learning_rate": 9.745336577453366e-06, "loss": 0.5006, "step": 28243 }, { "epoch": 0.8246182593208957, "grad_norm": 0.4919985293273001, "learning_rate": 9.743714517437145e-06, "loss": 0.5357, "step": 28244 }, { "epoch": 0.8246474555487431, "grad_norm": 0.5462502018625855, "learning_rate": 9.742092457420925e-06, "loss": 0.5971, "step": 28245 }, { "epoch": 0.8246766517765904, "grad_norm": 0.5370817900513529, "learning_rate": 9.740470397404705e-06, "loss": 0.6488, "step": 28246 }, { "epoch": 0.8247058480044378, "grad_norm": 0.5143050131307307, "learning_rate": 9.738848337388485e-06, "loss": 0.506, "step": 28247 }, { "epoch": 0.8247350442322852, "grad_norm": 0.5178636023168848, "learning_rate": 9.737226277372263e-06, "loss": 0.5494, "step": 28248 }, { "epoch": 0.8247642404601325, "grad_norm": 0.5012151529796961, "learning_rate": 9.735604217356042e-06, "loss": 0.5332, "step": 28249 }, { "epoch": 0.8247934366879799, "grad_norm": 0.5107528907025792, "learning_rate": 9.733982157339822e-06, "loss": 0.5371, "step": 28250 }, { "epoch": 0.8248226329158272, "grad_norm": 0.5093462643452565, "learning_rate": 9.732360097323602e-06, "loss": 0.5183, "step": 28251 }, { "epoch": 0.8248518291436746, "grad_norm": 0.527638989983464, "learning_rate": 9.730738037307382e-06, "loss": 0.5911, "step": 28252 }, { "epoch": 0.824881025371522, "grad_norm": 0.49880687358861286, "learning_rate": 9.72911597729116e-06, "loss": 0.5452, "step": 28253 }, { "epoch": 0.8249102215993693, "grad_norm": 0.47791862877093777, "learning_rate": 9.727493917274939e-06, "loss": 0.5075, "step": 28254 }, { "epoch": 0.8249394178272167, "grad_norm": 0.5108223268365206, "learning_rate": 9.725871857258719e-06, "loss": 0.5822, "step": 28255 }, { "epoch": 0.824968614055064, "grad_norm": 0.5237528884892406, "learning_rate": 9.724249797242499e-06, "loss": 0.5651, "step": 28256 }, { "epoch": 0.8249978102829114, "grad_norm": 0.514792447848301, "learning_rate": 9.722627737226277e-06, "loss": 0.5406, "step": 28257 }, { "epoch": 0.8250270065107588, "grad_norm": 0.5469206561219376, "learning_rate": 9.721005677210057e-06, "loss": 0.643, "step": 28258 }, { "epoch": 0.8250562027386061, "grad_norm": 0.5313243341147578, "learning_rate": 9.719383617193836e-06, "loss": 0.55, "step": 28259 }, { "epoch": 0.8250853989664535, "grad_norm": 0.5824836962625403, "learning_rate": 9.717761557177616e-06, "loss": 0.7219, "step": 28260 }, { "epoch": 0.8251145951943009, "grad_norm": 0.5595411660686586, "learning_rate": 9.716139497161396e-06, "loss": 0.6293, "step": 28261 }, { "epoch": 0.8251437914221482, "grad_norm": 0.5930455110395846, "learning_rate": 9.714517437145174e-06, "loss": 0.6693, "step": 28262 }, { "epoch": 0.8251729876499956, "grad_norm": 0.5769678721091703, "learning_rate": 9.712895377128954e-06, "loss": 0.689, "step": 28263 }, { "epoch": 0.8252021838778429, "grad_norm": 0.5149959955635802, "learning_rate": 9.711273317112733e-06, "loss": 0.5961, "step": 28264 }, { "epoch": 0.8252313801056903, "grad_norm": 0.533893376949332, "learning_rate": 9.709651257096514e-06, "loss": 0.5803, "step": 28265 }, { "epoch": 0.8252605763335377, "grad_norm": 0.5470557001249948, "learning_rate": 9.708029197080293e-06, "loss": 0.6509, "step": 28266 }, { "epoch": 0.825289772561385, "grad_norm": 0.5206247362509959, "learning_rate": 9.706407137064071e-06, "loss": 0.6098, "step": 28267 }, { "epoch": 0.8253189687892324, "grad_norm": 0.5356522431541835, "learning_rate": 9.704785077047851e-06, "loss": 0.5924, "step": 28268 }, { "epoch": 0.8253481650170797, "grad_norm": 0.5549214120808938, "learning_rate": 9.70316301703163e-06, "loss": 0.6107, "step": 28269 }, { "epoch": 0.8253773612449271, "grad_norm": 0.5464174959773168, "learning_rate": 9.701540957015411e-06, "loss": 0.623, "step": 28270 }, { "epoch": 0.8254065574727745, "grad_norm": 0.5223010464191573, "learning_rate": 9.69991889699919e-06, "loss": 0.5905, "step": 28271 }, { "epoch": 0.8254357537006218, "grad_norm": 0.52708115176205, "learning_rate": 9.698296836982968e-06, "loss": 0.5842, "step": 28272 }, { "epoch": 0.8254649499284692, "grad_norm": 0.508510652548486, "learning_rate": 9.696674776966748e-06, "loss": 0.5277, "step": 28273 }, { "epoch": 0.8254941461563166, "grad_norm": 0.5164536980985989, "learning_rate": 9.695052716950527e-06, "loss": 0.5945, "step": 28274 }, { "epoch": 0.8255233423841639, "grad_norm": 0.5316968116334987, "learning_rate": 9.693430656934308e-06, "loss": 0.5616, "step": 28275 }, { "epoch": 0.8255525386120113, "grad_norm": 0.5311143061595528, "learning_rate": 9.691808596918087e-06, "loss": 0.6218, "step": 28276 }, { "epoch": 0.8255817348398586, "grad_norm": 0.6320949962226214, "learning_rate": 9.690186536901865e-06, "loss": 0.6469, "step": 28277 }, { "epoch": 0.825610931067706, "grad_norm": 0.5425654898567305, "learning_rate": 9.688564476885645e-06, "loss": 0.6165, "step": 28278 }, { "epoch": 0.8256401272955534, "grad_norm": 0.5229442221961645, "learning_rate": 9.686942416869423e-06, "loss": 0.5876, "step": 28279 }, { "epoch": 0.8256693235234007, "grad_norm": 0.5094954137511168, "learning_rate": 9.685320356853205e-06, "loss": 0.5578, "step": 28280 }, { "epoch": 0.8256985197512481, "grad_norm": 0.5148120105468129, "learning_rate": 9.683698296836984e-06, "loss": 0.5831, "step": 28281 }, { "epoch": 0.8257277159790956, "grad_norm": 0.5051156237289487, "learning_rate": 9.682076236820762e-06, "loss": 0.5566, "step": 28282 }, { "epoch": 0.8257569122069429, "grad_norm": 0.512503471438061, "learning_rate": 9.680454176804542e-06, "loss": 0.5695, "step": 28283 }, { "epoch": 0.8257861084347903, "grad_norm": 0.48902723510639884, "learning_rate": 9.678832116788322e-06, "loss": 0.5226, "step": 28284 }, { "epoch": 0.8258153046626376, "grad_norm": 0.5421408326882682, "learning_rate": 9.6772100567721e-06, "loss": 0.6338, "step": 28285 }, { "epoch": 0.825844500890485, "grad_norm": 0.5412577024307667, "learning_rate": 9.67558799675588e-06, "loss": 0.5863, "step": 28286 }, { "epoch": 0.8258736971183324, "grad_norm": 0.5088083841685486, "learning_rate": 9.673965936739659e-06, "loss": 0.5875, "step": 28287 }, { "epoch": 0.8259028933461797, "grad_norm": 0.5803920466600567, "learning_rate": 9.672343876723439e-06, "loss": 0.6948, "step": 28288 }, { "epoch": 0.8259320895740271, "grad_norm": 0.6094827379513497, "learning_rate": 9.670721816707219e-06, "loss": 0.5642, "step": 28289 }, { "epoch": 0.8259612858018744, "grad_norm": 0.5516806846666259, "learning_rate": 9.669099756690997e-06, "loss": 0.6247, "step": 28290 }, { "epoch": 0.8259904820297218, "grad_norm": 0.544892946067015, "learning_rate": 9.667477696674778e-06, "loss": 0.6402, "step": 28291 }, { "epoch": 0.8260196782575692, "grad_norm": 0.5233014972687745, "learning_rate": 9.665855636658556e-06, "loss": 0.5789, "step": 28292 }, { "epoch": 0.8260488744854165, "grad_norm": 0.5312934173122629, "learning_rate": 9.664233576642336e-06, "loss": 0.6368, "step": 28293 }, { "epoch": 0.8260780707132639, "grad_norm": 0.5017569292419057, "learning_rate": 9.662611516626116e-06, "loss": 0.5446, "step": 28294 }, { "epoch": 0.8261072669411113, "grad_norm": 0.5118769271070678, "learning_rate": 9.660989456609894e-06, "loss": 0.5533, "step": 28295 }, { "epoch": 0.8261364631689586, "grad_norm": 0.5391184190452194, "learning_rate": 9.659367396593674e-06, "loss": 0.6396, "step": 28296 }, { "epoch": 0.826165659396806, "grad_norm": 0.5305250693852551, "learning_rate": 9.657745336577453e-06, "loss": 0.5844, "step": 28297 }, { "epoch": 0.8261948556246533, "grad_norm": 0.5228191359531121, "learning_rate": 9.656123276561235e-06, "loss": 0.6034, "step": 28298 }, { "epoch": 0.8262240518525007, "grad_norm": 0.496993967205632, "learning_rate": 9.654501216545013e-06, "loss": 0.5466, "step": 28299 }, { "epoch": 0.8262532480803481, "grad_norm": 0.5784169712024693, "learning_rate": 9.652879156528791e-06, "loss": 0.5888, "step": 28300 }, { "epoch": 0.8262824443081954, "grad_norm": 0.5452161121651563, "learning_rate": 9.651257096512571e-06, "loss": 0.6149, "step": 28301 }, { "epoch": 0.8263116405360428, "grad_norm": 0.5413534329319263, "learning_rate": 9.64963503649635e-06, "loss": 0.5991, "step": 28302 }, { "epoch": 0.8263408367638901, "grad_norm": 0.5140149288677189, "learning_rate": 9.648012976480132e-06, "loss": 0.5403, "step": 28303 }, { "epoch": 0.8263700329917375, "grad_norm": 0.5129139980869072, "learning_rate": 9.64639091646391e-06, "loss": 0.5616, "step": 28304 }, { "epoch": 0.8263992292195849, "grad_norm": 0.46881703807347525, "learning_rate": 9.644768856447688e-06, "loss": 0.4662, "step": 28305 }, { "epoch": 0.8264284254474322, "grad_norm": 0.5154185627939465, "learning_rate": 9.643146796431468e-06, "loss": 0.5818, "step": 28306 }, { "epoch": 0.8264576216752796, "grad_norm": 0.520033927342132, "learning_rate": 9.641524736415247e-06, "loss": 0.5557, "step": 28307 }, { "epoch": 0.826486817903127, "grad_norm": 0.5241006685347158, "learning_rate": 9.639902676399029e-06, "loss": 0.5836, "step": 28308 }, { "epoch": 0.8265160141309743, "grad_norm": 0.4686482084383647, "learning_rate": 9.638280616382807e-06, "loss": 0.516, "step": 28309 }, { "epoch": 0.8265452103588217, "grad_norm": 0.5321378546353674, "learning_rate": 9.636658556366585e-06, "loss": 0.5776, "step": 28310 }, { "epoch": 0.826574406586669, "grad_norm": 0.5107102201698487, "learning_rate": 9.635036496350365e-06, "loss": 0.5442, "step": 28311 }, { "epoch": 0.8266036028145164, "grad_norm": 0.5022231162146551, "learning_rate": 9.633414436334144e-06, "loss": 0.5343, "step": 28312 }, { "epoch": 0.8266327990423638, "grad_norm": 0.5216895943371649, "learning_rate": 9.631792376317925e-06, "loss": 0.5285, "step": 28313 }, { "epoch": 0.8266619952702111, "grad_norm": 0.5369185169406604, "learning_rate": 9.630170316301704e-06, "loss": 0.6407, "step": 28314 }, { "epoch": 0.8266911914980585, "grad_norm": 0.48779204765378564, "learning_rate": 9.628548256285482e-06, "loss": 0.507, "step": 28315 }, { "epoch": 0.8267203877259058, "grad_norm": 0.5342417458437664, "learning_rate": 9.626926196269262e-06, "loss": 0.6086, "step": 28316 }, { "epoch": 0.8267495839537532, "grad_norm": 0.5262477823403925, "learning_rate": 9.625304136253042e-06, "loss": 0.5745, "step": 28317 }, { "epoch": 0.8267787801816006, "grad_norm": 0.5300315092445733, "learning_rate": 9.62368207623682e-06, "loss": 0.5302, "step": 28318 }, { "epoch": 0.8268079764094479, "grad_norm": 0.5167602832981687, "learning_rate": 9.6220600162206e-06, "loss": 0.5134, "step": 28319 }, { "epoch": 0.8268371726372953, "grad_norm": 0.5327192327124626, "learning_rate": 9.62043795620438e-06, "loss": 0.5933, "step": 28320 }, { "epoch": 0.8268663688651426, "grad_norm": 0.5124769967300898, "learning_rate": 9.61881589618816e-06, "loss": 0.5334, "step": 28321 }, { "epoch": 0.82689556509299, "grad_norm": 0.5437393813698775, "learning_rate": 9.61719383617194e-06, "loss": 0.6054, "step": 28322 }, { "epoch": 0.8269247613208374, "grad_norm": 0.5535498381762362, "learning_rate": 9.615571776155718e-06, "loss": 0.6313, "step": 28323 }, { "epoch": 0.8269539575486847, "grad_norm": 0.6981004430542053, "learning_rate": 9.613949716139498e-06, "loss": 0.6776, "step": 28324 }, { "epoch": 0.8269831537765321, "grad_norm": 0.5107432620479021, "learning_rate": 9.612327656123276e-06, "loss": 0.5379, "step": 28325 }, { "epoch": 0.8270123500043794, "grad_norm": 0.5761291316107359, "learning_rate": 9.610705596107056e-06, "loss": 0.6619, "step": 28326 }, { "epoch": 0.8270415462322268, "grad_norm": 0.4998156665726659, "learning_rate": 9.609083536090836e-06, "loss": 0.522, "step": 28327 }, { "epoch": 0.8270707424600742, "grad_norm": 0.5198997434465669, "learning_rate": 9.607461476074615e-06, "loss": 0.5507, "step": 28328 }, { "epoch": 0.8270999386879215, "grad_norm": 0.515910130239785, "learning_rate": 9.605839416058395e-06, "loss": 0.5724, "step": 28329 }, { "epoch": 0.8271291349157689, "grad_norm": 0.49932931957751675, "learning_rate": 9.604217356042173e-06, "loss": 0.5183, "step": 28330 }, { "epoch": 0.8271583311436163, "grad_norm": 0.6260324549763567, "learning_rate": 9.602595296025955e-06, "loss": 0.6594, "step": 28331 }, { "epoch": 0.8271875273714636, "grad_norm": 0.5819855307512554, "learning_rate": 9.600973236009733e-06, "loss": 0.6089, "step": 28332 }, { "epoch": 0.827216723599311, "grad_norm": 0.5706269786053523, "learning_rate": 9.599351175993512e-06, "loss": 0.6238, "step": 28333 }, { "epoch": 0.8272459198271583, "grad_norm": 0.5270906057492225, "learning_rate": 9.597729115977292e-06, "loss": 0.5691, "step": 28334 }, { "epoch": 0.8272751160550057, "grad_norm": 0.5313292414942357, "learning_rate": 9.59610705596107e-06, "loss": 0.6273, "step": 28335 }, { "epoch": 0.8273043122828531, "grad_norm": 0.5638331195788667, "learning_rate": 9.594484995944852e-06, "loss": 0.6759, "step": 28336 }, { "epoch": 0.8273335085107004, "grad_norm": 0.5402390975948832, "learning_rate": 9.59286293592863e-06, "loss": 0.6541, "step": 28337 }, { "epoch": 0.8273627047385478, "grad_norm": 0.49686681294639223, "learning_rate": 9.591240875912409e-06, "loss": 0.5281, "step": 28338 }, { "epoch": 0.8273919009663951, "grad_norm": 0.5298197525398332, "learning_rate": 9.589618815896189e-06, "loss": 0.6137, "step": 28339 }, { "epoch": 0.8274210971942425, "grad_norm": 0.5404465542100697, "learning_rate": 9.587996755879967e-06, "loss": 0.5924, "step": 28340 }, { "epoch": 0.8274502934220899, "grad_norm": 0.4872035224126575, "learning_rate": 9.586374695863749e-06, "loss": 0.5311, "step": 28341 }, { "epoch": 0.8274794896499372, "grad_norm": 0.5246437662516409, "learning_rate": 9.584752635847527e-06, "loss": 0.5862, "step": 28342 }, { "epoch": 0.8275086858777846, "grad_norm": 0.5360116875149181, "learning_rate": 9.583130575831305e-06, "loss": 0.6011, "step": 28343 }, { "epoch": 0.827537882105632, "grad_norm": 0.5106617575189951, "learning_rate": 9.581508515815086e-06, "loss": 0.5933, "step": 28344 }, { "epoch": 0.8275670783334793, "grad_norm": 0.49488074869531506, "learning_rate": 9.579886455798864e-06, "loss": 0.5323, "step": 28345 }, { "epoch": 0.8275962745613267, "grad_norm": 0.5202658818747455, "learning_rate": 9.578264395782644e-06, "loss": 0.5617, "step": 28346 }, { "epoch": 0.827625470789174, "grad_norm": 0.5150633256805622, "learning_rate": 9.576642335766424e-06, "loss": 0.5769, "step": 28347 }, { "epoch": 0.8276546670170214, "grad_norm": 0.49116743774837285, "learning_rate": 9.575020275750202e-06, "loss": 0.5501, "step": 28348 }, { "epoch": 0.8276838632448688, "grad_norm": 0.5107438582644248, "learning_rate": 9.573398215733983e-06, "loss": 0.5906, "step": 28349 }, { "epoch": 0.8277130594727161, "grad_norm": 0.514662116754449, "learning_rate": 9.571776155717763e-06, "loss": 0.5599, "step": 28350 }, { "epoch": 0.8277422557005635, "grad_norm": 0.5111201122553858, "learning_rate": 9.570154095701541e-06, "loss": 0.5617, "step": 28351 }, { "epoch": 0.8277714519284108, "grad_norm": 0.5324512220596519, "learning_rate": 9.568532035685321e-06, "loss": 0.5611, "step": 28352 }, { "epoch": 0.8278006481562582, "grad_norm": 0.6231080903460497, "learning_rate": 9.5669099756691e-06, "loss": 0.651, "step": 28353 }, { "epoch": 0.8278298443841056, "grad_norm": 0.5334993965073985, "learning_rate": 9.56528791565288e-06, "loss": 0.6299, "step": 28354 }, { "epoch": 0.8278590406119529, "grad_norm": 0.540435945592843, "learning_rate": 9.56366585563666e-06, "loss": 0.5636, "step": 28355 }, { "epoch": 0.8278882368398003, "grad_norm": 0.5511732361283029, "learning_rate": 9.562043795620438e-06, "loss": 0.557, "step": 28356 }, { "epoch": 0.8279174330676476, "grad_norm": 0.5883006947700754, "learning_rate": 9.560421735604218e-06, "loss": 0.6879, "step": 28357 }, { "epoch": 0.827946629295495, "grad_norm": 0.5475449728828938, "learning_rate": 9.558799675587996e-06, "loss": 0.6089, "step": 28358 }, { "epoch": 0.8279758255233424, "grad_norm": 0.4852826081437451, "learning_rate": 9.557177615571776e-06, "loss": 0.5103, "step": 28359 }, { "epoch": 0.8280050217511897, "grad_norm": 0.516694979895738, "learning_rate": 9.555555555555556e-06, "loss": 0.5768, "step": 28360 }, { "epoch": 0.8280342179790371, "grad_norm": 0.5222598070015452, "learning_rate": 9.553933495539335e-06, "loss": 0.5508, "step": 28361 }, { "epoch": 0.8280634142068845, "grad_norm": 0.5619381448204477, "learning_rate": 9.552311435523115e-06, "loss": 0.6592, "step": 28362 }, { "epoch": 0.8280926104347318, "grad_norm": 0.5341929520383296, "learning_rate": 9.550689375506893e-06, "loss": 0.5727, "step": 28363 }, { "epoch": 0.8281218066625792, "grad_norm": 0.5192911430693002, "learning_rate": 9.549067315490675e-06, "loss": 0.5456, "step": 28364 }, { "epoch": 0.8281510028904265, "grad_norm": 0.5161619216825355, "learning_rate": 9.547445255474453e-06, "loss": 0.5622, "step": 28365 }, { "epoch": 0.8281801991182739, "grad_norm": 0.5055761539816203, "learning_rate": 9.545823195458232e-06, "loss": 0.5429, "step": 28366 }, { "epoch": 0.8282093953461213, "grad_norm": 0.5341665153290487, "learning_rate": 9.544201135442012e-06, "loss": 0.6108, "step": 28367 }, { "epoch": 0.8282385915739686, "grad_norm": 0.4921761590210449, "learning_rate": 9.54257907542579e-06, "loss": 0.5236, "step": 28368 }, { "epoch": 0.828267787801816, "grad_norm": 0.526596315432335, "learning_rate": 9.540957015409572e-06, "loss": 0.5668, "step": 28369 }, { "epoch": 0.8282969840296633, "grad_norm": 0.5111759932432197, "learning_rate": 9.53933495539335e-06, "loss": 0.5773, "step": 28370 }, { "epoch": 0.8283261802575107, "grad_norm": 0.5224964201969514, "learning_rate": 9.537712895377129e-06, "loss": 0.5652, "step": 28371 }, { "epoch": 0.8283553764853581, "grad_norm": 0.5212713846872662, "learning_rate": 9.536090835360909e-06, "loss": 0.591, "step": 28372 }, { "epoch": 0.8283845727132054, "grad_norm": 0.5515522250522876, "learning_rate": 9.534468775344687e-06, "loss": 0.6245, "step": 28373 }, { "epoch": 0.8284137689410528, "grad_norm": 0.5303313631569462, "learning_rate": 9.532846715328469e-06, "loss": 0.6371, "step": 28374 }, { "epoch": 0.8284429651689001, "grad_norm": 0.5544339087950029, "learning_rate": 9.531224655312247e-06, "loss": 0.6802, "step": 28375 }, { "epoch": 0.8284721613967475, "grad_norm": 0.5548575735941783, "learning_rate": 9.529602595296026e-06, "loss": 0.6294, "step": 28376 }, { "epoch": 0.8285013576245949, "grad_norm": 0.5990254122584624, "learning_rate": 9.527980535279806e-06, "loss": 0.665, "step": 28377 }, { "epoch": 0.8285305538524422, "grad_norm": 0.509975991136818, "learning_rate": 9.526358475263584e-06, "loss": 0.5477, "step": 28378 }, { "epoch": 0.8285597500802896, "grad_norm": 0.5322086695491012, "learning_rate": 9.524736415247364e-06, "loss": 0.6044, "step": 28379 }, { "epoch": 0.828588946308137, "grad_norm": 0.5435688759791014, "learning_rate": 9.523114355231144e-06, "loss": 0.6649, "step": 28380 }, { "epoch": 0.8286181425359843, "grad_norm": 0.6132004744211389, "learning_rate": 9.521492295214923e-06, "loss": 0.6851, "step": 28381 }, { "epoch": 0.8286473387638317, "grad_norm": 0.5148411321467505, "learning_rate": 9.519870235198703e-06, "loss": 0.5255, "step": 28382 }, { "epoch": 0.828676534991679, "grad_norm": 0.5414720918945016, "learning_rate": 9.518248175182483e-06, "loss": 0.6167, "step": 28383 }, { "epoch": 0.8287057312195264, "grad_norm": 0.5546356763103023, "learning_rate": 9.516626115166261e-06, "loss": 0.6346, "step": 28384 }, { "epoch": 0.8287349274473738, "grad_norm": 0.544117725209902, "learning_rate": 9.515004055150041e-06, "loss": 0.6041, "step": 28385 }, { "epoch": 0.8287641236752211, "grad_norm": 0.8790669973825199, "learning_rate": 9.51338199513382e-06, "loss": 0.6268, "step": 28386 }, { "epoch": 0.8287933199030685, "grad_norm": 0.5297392645719443, "learning_rate": 9.5117599351176e-06, "loss": 0.5898, "step": 28387 }, { "epoch": 0.8288225161309158, "grad_norm": 0.5391691172278895, "learning_rate": 9.51013787510138e-06, "loss": 0.6329, "step": 28388 }, { "epoch": 0.8288517123587632, "grad_norm": 0.5732472141243876, "learning_rate": 9.508515815085158e-06, "loss": 0.6551, "step": 28389 }, { "epoch": 0.8288809085866106, "grad_norm": 0.5103999302345275, "learning_rate": 9.506893755068938e-06, "loss": 0.5424, "step": 28390 }, { "epoch": 0.8289101048144579, "grad_norm": 0.5303233523218892, "learning_rate": 9.505271695052717e-06, "loss": 0.6198, "step": 28391 }, { "epoch": 0.8289393010423053, "grad_norm": 0.4902179507083937, "learning_rate": 9.503649635036497e-06, "loss": 0.5229, "step": 28392 }, { "epoch": 0.8289684972701526, "grad_norm": 0.536623894121794, "learning_rate": 9.502027575020277e-06, "loss": 0.5843, "step": 28393 }, { "epoch": 0.828997693498, "grad_norm": 0.5461160736105941, "learning_rate": 9.500405515004055e-06, "loss": 0.6156, "step": 28394 }, { "epoch": 0.8290268897258474, "grad_norm": 0.5050521765921174, "learning_rate": 9.498783454987835e-06, "loss": 0.5498, "step": 28395 }, { "epoch": 0.8290560859536947, "grad_norm": 0.5166468629731161, "learning_rate": 9.497161394971614e-06, "loss": 0.5956, "step": 28396 }, { "epoch": 0.8290852821815421, "grad_norm": 0.5557178018970398, "learning_rate": 9.495539334955395e-06, "loss": 0.6183, "step": 28397 }, { "epoch": 0.8291144784093895, "grad_norm": 0.5073035543385865, "learning_rate": 9.493917274939174e-06, "loss": 0.5624, "step": 28398 }, { "epoch": 0.8291436746372368, "grad_norm": 0.47603755254671704, "learning_rate": 9.492295214922952e-06, "loss": 0.4849, "step": 28399 }, { "epoch": 0.8291728708650842, "grad_norm": 0.5195491071245566, "learning_rate": 9.490673154906732e-06, "loss": 0.5622, "step": 28400 }, { "epoch": 0.8292020670929315, "grad_norm": 0.5276813801040235, "learning_rate": 9.48905109489051e-06, "loss": 0.6146, "step": 28401 }, { "epoch": 0.829231263320779, "grad_norm": 0.5313456006053986, "learning_rate": 9.487429034874292e-06, "loss": 0.602, "step": 28402 }, { "epoch": 0.8292604595486264, "grad_norm": 0.4982527221710253, "learning_rate": 9.48580697485807e-06, "loss": 0.5446, "step": 28403 }, { "epoch": 0.8292896557764737, "grad_norm": 0.55845527613133, "learning_rate": 9.484184914841849e-06, "loss": 0.6547, "step": 28404 }, { "epoch": 0.8293188520043211, "grad_norm": 0.5217010423531474, "learning_rate": 9.482562854825629e-06, "loss": 0.5494, "step": 28405 }, { "epoch": 0.8293480482321685, "grad_norm": 0.5448583503811931, "learning_rate": 9.480940794809407e-06, "loss": 0.5901, "step": 28406 }, { "epoch": 0.8293772444600158, "grad_norm": 0.5071224315025123, "learning_rate": 9.479318734793187e-06, "loss": 0.5223, "step": 28407 }, { "epoch": 0.8294064406878632, "grad_norm": 0.557909873815824, "learning_rate": 9.477696674776968e-06, "loss": 0.6277, "step": 28408 }, { "epoch": 0.8294356369157105, "grad_norm": 0.5291468995972052, "learning_rate": 9.476074614760746e-06, "loss": 0.5882, "step": 28409 }, { "epoch": 0.8294648331435579, "grad_norm": 0.5333740990849656, "learning_rate": 9.474452554744526e-06, "loss": 0.6031, "step": 28410 }, { "epoch": 0.8294940293714053, "grad_norm": 0.5158691290819609, "learning_rate": 9.472830494728304e-06, "loss": 0.5799, "step": 28411 }, { "epoch": 0.8295232255992526, "grad_norm": 0.5099391106575621, "learning_rate": 9.471208434712084e-06, "loss": 0.5405, "step": 28412 }, { "epoch": 0.8295524218271, "grad_norm": 0.4856236388298891, "learning_rate": 9.469586374695865e-06, "loss": 0.506, "step": 28413 }, { "epoch": 0.8295816180549473, "grad_norm": 0.5259292912318507, "learning_rate": 9.467964314679643e-06, "loss": 0.5808, "step": 28414 }, { "epoch": 0.8296108142827947, "grad_norm": 0.5247086364042576, "learning_rate": 9.466342254663423e-06, "loss": 0.575, "step": 28415 }, { "epoch": 0.8296400105106421, "grad_norm": 0.5608209499497435, "learning_rate": 9.464720194647203e-06, "loss": 0.6654, "step": 28416 }, { "epoch": 0.8296692067384894, "grad_norm": 0.5405584956267779, "learning_rate": 9.463098134630981e-06, "loss": 0.6176, "step": 28417 }, { "epoch": 0.8296984029663368, "grad_norm": 0.5371540049011903, "learning_rate": 9.461476074614761e-06, "loss": 0.6057, "step": 28418 }, { "epoch": 0.8297275991941842, "grad_norm": 0.5233688544759205, "learning_rate": 9.45985401459854e-06, "loss": 0.5719, "step": 28419 }, { "epoch": 0.8297567954220315, "grad_norm": 0.5344330821467458, "learning_rate": 9.45823195458232e-06, "loss": 0.5961, "step": 28420 }, { "epoch": 0.8297859916498789, "grad_norm": 0.5103549042830753, "learning_rate": 9.4566098945661e-06, "loss": 0.5484, "step": 28421 }, { "epoch": 0.8298151878777262, "grad_norm": 0.547212593587668, "learning_rate": 9.454987834549878e-06, "loss": 0.6609, "step": 28422 }, { "epoch": 0.8298443841055736, "grad_norm": 0.47724281831379806, "learning_rate": 9.453365774533658e-06, "loss": 0.4992, "step": 28423 }, { "epoch": 0.829873580333421, "grad_norm": 0.49140567455491735, "learning_rate": 9.451743714517437e-06, "loss": 0.5202, "step": 28424 }, { "epoch": 0.8299027765612683, "grad_norm": 0.4792293705190889, "learning_rate": 9.450121654501217e-06, "loss": 0.4981, "step": 28425 }, { "epoch": 0.8299319727891157, "grad_norm": 0.5268756383975709, "learning_rate": 9.448499594484997e-06, "loss": 0.6112, "step": 28426 }, { "epoch": 0.829961169016963, "grad_norm": 0.516412737308125, "learning_rate": 9.446877534468775e-06, "loss": 0.5545, "step": 28427 }, { "epoch": 0.8299903652448104, "grad_norm": 0.5736532625155649, "learning_rate": 9.445255474452555e-06, "loss": 0.6342, "step": 28428 }, { "epoch": 0.8300195614726578, "grad_norm": 0.5919000228661756, "learning_rate": 9.443633414436334e-06, "loss": 0.665, "step": 28429 }, { "epoch": 0.8300487577005051, "grad_norm": 0.5231436138074886, "learning_rate": 9.442011354420114e-06, "loss": 0.5598, "step": 28430 }, { "epoch": 0.8300779539283525, "grad_norm": 0.525582314747119, "learning_rate": 9.440389294403894e-06, "loss": 0.6076, "step": 28431 }, { "epoch": 0.8301071501561998, "grad_norm": 0.5302377521175025, "learning_rate": 9.438767234387672e-06, "loss": 0.4829, "step": 28432 }, { "epoch": 0.8301363463840472, "grad_norm": 0.5177421171527133, "learning_rate": 9.437145174371452e-06, "loss": 0.5527, "step": 28433 }, { "epoch": 0.8301655426118946, "grad_norm": 0.558070907967045, "learning_rate": 9.43552311435523e-06, "loss": 0.6753, "step": 28434 }, { "epoch": 0.8301947388397419, "grad_norm": 0.557295388044131, "learning_rate": 9.433901054339012e-06, "loss": 0.6801, "step": 28435 }, { "epoch": 0.8302239350675893, "grad_norm": 0.536097178097617, "learning_rate": 9.43227899432279e-06, "loss": 0.5615, "step": 28436 }, { "epoch": 0.8302531312954367, "grad_norm": 0.5139844897095689, "learning_rate": 9.43065693430657e-06, "loss": 0.6125, "step": 28437 }, { "epoch": 0.830282327523284, "grad_norm": 0.4974723124463852, "learning_rate": 9.42903487429035e-06, "loss": 0.5163, "step": 28438 }, { "epoch": 0.8303115237511314, "grad_norm": 0.5162381713652485, "learning_rate": 9.427412814274128e-06, "loss": 0.5383, "step": 28439 }, { "epoch": 0.8303407199789787, "grad_norm": 0.49991851677573845, "learning_rate": 9.425790754257908e-06, "loss": 0.5292, "step": 28440 }, { "epoch": 0.8303699162068261, "grad_norm": 0.4629949775707807, "learning_rate": 9.424168694241688e-06, "loss": 0.4796, "step": 28441 }, { "epoch": 0.8303991124346735, "grad_norm": 0.5354202284764235, "learning_rate": 9.422546634225466e-06, "loss": 0.6075, "step": 28442 }, { "epoch": 0.8304283086625208, "grad_norm": 0.5327595545946033, "learning_rate": 9.420924574209246e-06, "loss": 0.5995, "step": 28443 }, { "epoch": 0.8304575048903682, "grad_norm": 0.5249512356167104, "learning_rate": 9.419302514193025e-06, "loss": 0.599, "step": 28444 }, { "epoch": 0.8304867011182155, "grad_norm": 0.5566231010851971, "learning_rate": 9.417680454176805e-06, "loss": 0.6628, "step": 28445 }, { "epoch": 0.8305158973460629, "grad_norm": 0.5484779070503283, "learning_rate": 9.416058394160585e-06, "loss": 0.5728, "step": 28446 }, { "epoch": 0.8305450935739103, "grad_norm": 0.5480269205369975, "learning_rate": 9.414436334144363e-06, "loss": 0.61, "step": 28447 }, { "epoch": 0.8305742898017576, "grad_norm": 0.5054658906832403, "learning_rate": 9.412814274128143e-06, "loss": 0.5345, "step": 28448 }, { "epoch": 0.830603486029605, "grad_norm": 0.5418457160805324, "learning_rate": 9.411192214111923e-06, "loss": 0.6027, "step": 28449 }, { "epoch": 0.8306326822574523, "grad_norm": 0.5283973053991139, "learning_rate": 9.409570154095702e-06, "loss": 0.632, "step": 28450 }, { "epoch": 0.8306618784852997, "grad_norm": 0.5065256628132647, "learning_rate": 9.407948094079482e-06, "loss": 0.5337, "step": 28451 }, { "epoch": 0.8306910747131471, "grad_norm": 0.5202381047631258, "learning_rate": 9.40632603406326e-06, "loss": 0.5638, "step": 28452 }, { "epoch": 0.8307202709409944, "grad_norm": 0.5475113349428351, "learning_rate": 9.40470397404704e-06, "loss": 0.6526, "step": 28453 }, { "epoch": 0.8307494671688418, "grad_norm": 0.5177935202377186, "learning_rate": 9.40308191403082e-06, "loss": 0.5815, "step": 28454 }, { "epoch": 0.8307786633966892, "grad_norm": 0.5264239295993001, "learning_rate": 9.401459854014599e-06, "loss": 0.558, "step": 28455 }, { "epoch": 0.8308078596245365, "grad_norm": 0.49219705527266955, "learning_rate": 9.399837793998379e-06, "loss": 0.5025, "step": 28456 }, { "epoch": 0.8308370558523839, "grad_norm": 0.6057604597012419, "learning_rate": 9.398215733982157e-06, "loss": 0.6873, "step": 28457 }, { "epoch": 0.8308662520802312, "grad_norm": 0.5050374156406033, "learning_rate": 9.396593673965937e-06, "loss": 0.5617, "step": 28458 }, { "epoch": 0.8308954483080786, "grad_norm": 0.541274954271401, "learning_rate": 9.394971613949717e-06, "loss": 0.6417, "step": 28459 }, { "epoch": 0.830924644535926, "grad_norm": 0.5100837273805109, "learning_rate": 9.393349553933496e-06, "loss": 0.5717, "step": 28460 }, { "epoch": 0.8309538407637733, "grad_norm": 0.5090085776374745, "learning_rate": 9.391727493917276e-06, "loss": 0.5619, "step": 28461 }, { "epoch": 0.8309830369916207, "grad_norm": 0.508553726346784, "learning_rate": 9.390105433901054e-06, "loss": 0.5172, "step": 28462 }, { "epoch": 0.831012233219468, "grad_norm": 0.5133759460471877, "learning_rate": 9.388483373884834e-06, "loss": 0.5715, "step": 28463 }, { "epoch": 0.8310414294473154, "grad_norm": 0.49884489672493837, "learning_rate": 9.386861313868614e-06, "loss": 0.5322, "step": 28464 }, { "epoch": 0.8310706256751628, "grad_norm": 0.5023080598347374, "learning_rate": 9.385239253852392e-06, "loss": 0.5126, "step": 28465 }, { "epoch": 0.8310998219030101, "grad_norm": 0.5350957309742974, "learning_rate": 9.383617193836173e-06, "loss": 0.5751, "step": 28466 }, { "epoch": 0.8311290181308575, "grad_norm": 0.4970987682117491, "learning_rate": 9.381995133819951e-06, "loss": 0.5387, "step": 28467 }, { "epoch": 0.8311582143587048, "grad_norm": 0.520909962897779, "learning_rate": 9.380373073803731e-06, "loss": 0.5615, "step": 28468 }, { "epoch": 0.8311874105865522, "grad_norm": 0.5183670052067365, "learning_rate": 9.378751013787511e-06, "loss": 0.5454, "step": 28469 }, { "epoch": 0.8312166068143996, "grad_norm": 0.5066489066691643, "learning_rate": 9.37712895377129e-06, "loss": 0.5718, "step": 28470 }, { "epoch": 0.8312458030422469, "grad_norm": 0.5119908680444669, "learning_rate": 9.37550689375507e-06, "loss": 0.5742, "step": 28471 }, { "epoch": 0.8312749992700943, "grad_norm": 0.46839926660265435, "learning_rate": 9.373884833738848e-06, "loss": 0.4389, "step": 28472 }, { "epoch": 0.8313041954979417, "grad_norm": 0.5475479078429067, "learning_rate": 9.372262773722628e-06, "loss": 0.6468, "step": 28473 }, { "epoch": 0.831333391725789, "grad_norm": 0.5356469826199387, "learning_rate": 9.370640713706408e-06, "loss": 0.603, "step": 28474 }, { "epoch": 0.8313625879536364, "grad_norm": 0.5513441098968924, "learning_rate": 9.369018653690186e-06, "loss": 0.6393, "step": 28475 }, { "epoch": 0.8313917841814837, "grad_norm": 0.5269597894722493, "learning_rate": 9.367396593673966e-06, "loss": 0.595, "step": 28476 }, { "epoch": 0.8314209804093311, "grad_norm": 0.4984943510432079, "learning_rate": 9.365774533657745e-06, "loss": 0.5096, "step": 28477 }, { "epoch": 0.8314501766371785, "grad_norm": 0.5539477087271168, "learning_rate": 9.364152473641525e-06, "loss": 0.6031, "step": 28478 }, { "epoch": 0.8314793728650258, "grad_norm": 0.533739650314727, "learning_rate": 9.362530413625305e-06, "loss": 0.6004, "step": 28479 }, { "epoch": 0.8315085690928732, "grad_norm": 0.5513610618792003, "learning_rate": 9.360908353609083e-06, "loss": 0.6139, "step": 28480 }, { "epoch": 0.8315377653207205, "grad_norm": 0.5171441712839655, "learning_rate": 9.359286293592863e-06, "loss": 0.5604, "step": 28481 }, { "epoch": 0.8315669615485679, "grad_norm": 0.5229090157422037, "learning_rate": 9.357664233576643e-06, "loss": 0.5406, "step": 28482 }, { "epoch": 0.8315961577764153, "grad_norm": 0.4968811620919065, "learning_rate": 9.356042173560422e-06, "loss": 0.5189, "step": 28483 }, { "epoch": 0.8316253540042626, "grad_norm": 0.5713219903494103, "learning_rate": 9.354420113544202e-06, "loss": 0.6176, "step": 28484 }, { "epoch": 0.83165455023211, "grad_norm": 0.49127847339658165, "learning_rate": 9.35279805352798e-06, "loss": 0.5163, "step": 28485 }, { "epoch": 0.8316837464599574, "grad_norm": 0.5452507007374688, "learning_rate": 9.35117599351176e-06, "loss": 0.6213, "step": 28486 }, { "epoch": 0.8317129426878047, "grad_norm": 0.5307107625144546, "learning_rate": 9.34955393349554e-06, "loss": 0.6153, "step": 28487 }, { "epoch": 0.8317421389156521, "grad_norm": 0.5388398113014828, "learning_rate": 9.347931873479319e-06, "loss": 0.656, "step": 28488 }, { "epoch": 0.8317713351434994, "grad_norm": 0.560588913559701, "learning_rate": 9.346309813463099e-06, "loss": 0.5639, "step": 28489 }, { "epoch": 0.8318005313713468, "grad_norm": 0.5231506707967502, "learning_rate": 9.344687753446877e-06, "loss": 0.5624, "step": 28490 }, { "epoch": 0.8318297275991942, "grad_norm": 0.5225501928021293, "learning_rate": 9.343065693430657e-06, "loss": 0.5806, "step": 28491 }, { "epoch": 0.8318589238270415, "grad_norm": 0.5027745009322255, "learning_rate": 9.341443633414437e-06, "loss": 0.5573, "step": 28492 }, { "epoch": 0.8318881200548889, "grad_norm": 0.5792070442573137, "learning_rate": 9.339821573398216e-06, "loss": 0.6733, "step": 28493 }, { "epoch": 0.8319173162827362, "grad_norm": 0.5267895108230614, "learning_rate": 9.338199513381996e-06, "loss": 0.5695, "step": 28494 }, { "epoch": 0.8319465125105836, "grad_norm": 0.5291491880642075, "learning_rate": 9.336577453365774e-06, "loss": 0.6134, "step": 28495 }, { "epoch": 0.831975708738431, "grad_norm": 0.46162909638738564, "learning_rate": 9.334955393349554e-06, "loss": 0.4612, "step": 28496 }, { "epoch": 0.8320049049662783, "grad_norm": 0.5352348806804441, "learning_rate": 9.333333333333334e-06, "loss": 0.6289, "step": 28497 }, { "epoch": 0.8320341011941257, "grad_norm": 0.5310483097727673, "learning_rate": 9.331711273317113e-06, "loss": 0.6093, "step": 28498 }, { "epoch": 0.832063297421973, "grad_norm": 0.5490799496291127, "learning_rate": 9.330089213300893e-06, "loss": 0.6734, "step": 28499 }, { "epoch": 0.8320924936498204, "grad_norm": 0.5659628765666761, "learning_rate": 9.328467153284671e-06, "loss": 0.6019, "step": 28500 }, { "epoch": 0.8321216898776678, "grad_norm": 0.523915309880613, "learning_rate": 9.326845093268451e-06, "loss": 0.5949, "step": 28501 }, { "epoch": 0.8321508861055151, "grad_norm": 0.5600661169928184, "learning_rate": 9.325223033252231e-06, "loss": 0.6523, "step": 28502 }, { "epoch": 0.8321800823333625, "grad_norm": 0.5355913140275312, "learning_rate": 9.32360097323601e-06, "loss": 0.6161, "step": 28503 }, { "epoch": 0.8322092785612099, "grad_norm": 0.5215213211951168, "learning_rate": 9.32197891321979e-06, "loss": 0.5668, "step": 28504 }, { "epoch": 0.8322384747890572, "grad_norm": 0.5653185888474037, "learning_rate": 9.320356853203568e-06, "loss": 0.6365, "step": 28505 }, { "epoch": 0.8322676710169046, "grad_norm": 0.5489144433679178, "learning_rate": 9.318734793187348e-06, "loss": 0.6586, "step": 28506 }, { "epoch": 0.8322968672447519, "grad_norm": 0.5183219557752419, "learning_rate": 9.317112733171128e-06, "loss": 0.5386, "step": 28507 }, { "epoch": 0.8323260634725993, "grad_norm": 0.5375238817487673, "learning_rate": 9.315490673154907e-06, "loss": 0.5929, "step": 28508 }, { "epoch": 0.8323552597004467, "grad_norm": 0.561376521182735, "learning_rate": 9.313868613138687e-06, "loss": 0.5908, "step": 28509 }, { "epoch": 0.832384455928294, "grad_norm": 0.5272599130401864, "learning_rate": 9.312246553122465e-06, "loss": 0.5869, "step": 28510 }, { "epoch": 0.8324136521561414, "grad_norm": 0.5330887114295654, "learning_rate": 9.310624493106245e-06, "loss": 0.6094, "step": 28511 }, { "epoch": 0.8324428483839887, "grad_norm": 0.5391221747968206, "learning_rate": 9.309002433090025e-06, "loss": 0.5829, "step": 28512 }, { "epoch": 0.8324720446118361, "grad_norm": 0.5041873527278578, "learning_rate": 9.307380373073804e-06, "loss": 0.5294, "step": 28513 }, { "epoch": 0.8325012408396835, "grad_norm": 0.501462421307791, "learning_rate": 9.305758313057584e-06, "loss": 0.5391, "step": 28514 }, { "epoch": 0.8325304370675308, "grad_norm": 0.5387677785615815, "learning_rate": 9.304136253041364e-06, "loss": 0.5956, "step": 28515 }, { "epoch": 0.8325596332953782, "grad_norm": 0.5298536503892186, "learning_rate": 9.302514193025142e-06, "loss": 0.6214, "step": 28516 }, { "epoch": 0.8325888295232255, "grad_norm": 0.5089913573020228, "learning_rate": 9.300892133008922e-06, "loss": 0.4949, "step": 28517 }, { "epoch": 0.8326180257510729, "grad_norm": 0.5375505404397452, "learning_rate": 9.2992700729927e-06, "loss": 0.6073, "step": 28518 }, { "epoch": 0.8326472219789203, "grad_norm": 0.49180942858781, "learning_rate": 9.29764801297648e-06, "loss": 0.5357, "step": 28519 }, { "epoch": 0.8326764182067676, "grad_norm": 0.5110634526064958, "learning_rate": 9.29602595296026e-06, "loss": 0.5653, "step": 28520 }, { "epoch": 0.832705614434615, "grad_norm": 0.5088816604403659, "learning_rate": 9.294403892944039e-06, "loss": 0.5863, "step": 28521 }, { "epoch": 0.8327348106624624, "grad_norm": 0.5676035510269326, "learning_rate": 9.292781832927819e-06, "loss": 0.6616, "step": 28522 }, { "epoch": 0.8327640068903098, "grad_norm": 0.5177278199869416, "learning_rate": 9.291159772911597e-06, "loss": 0.545, "step": 28523 }, { "epoch": 0.8327932031181572, "grad_norm": 0.5598782190018279, "learning_rate": 9.289537712895378e-06, "loss": 0.656, "step": 28524 }, { "epoch": 0.8328223993460045, "grad_norm": 0.5453595822534507, "learning_rate": 9.287915652879158e-06, "loss": 0.63, "step": 28525 }, { "epoch": 0.8328515955738519, "grad_norm": 0.5403312266212995, "learning_rate": 9.286293592862936e-06, "loss": 0.6052, "step": 28526 }, { "epoch": 0.8328807918016993, "grad_norm": 0.5098059268707359, "learning_rate": 9.284671532846716e-06, "loss": 0.5795, "step": 28527 }, { "epoch": 0.8329099880295466, "grad_norm": 0.5124094655349911, "learning_rate": 9.283049472830494e-06, "loss": 0.5784, "step": 28528 }, { "epoch": 0.832939184257394, "grad_norm": 0.557008264659499, "learning_rate": 9.281427412814274e-06, "loss": 0.6576, "step": 28529 }, { "epoch": 0.8329683804852414, "grad_norm": 0.5105500138089187, "learning_rate": 9.279805352798055e-06, "loss": 0.582, "step": 28530 }, { "epoch": 0.8329975767130887, "grad_norm": 0.5387269757591023, "learning_rate": 9.278183292781833e-06, "loss": 0.6186, "step": 28531 }, { "epoch": 0.8330267729409361, "grad_norm": 0.5358989890028962, "learning_rate": 9.276561232765613e-06, "loss": 0.6146, "step": 28532 }, { "epoch": 0.8330559691687834, "grad_norm": 0.5365359570167163, "learning_rate": 9.274939172749391e-06, "loss": 0.543, "step": 28533 }, { "epoch": 0.8330851653966308, "grad_norm": 0.5728143673233305, "learning_rate": 9.273317112733171e-06, "loss": 0.6614, "step": 28534 }, { "epoch": 0.8331143616244782, "grad_norm": 0.5474001673774188, "learning_rate": 9.271695052716951e-06, "loss": 0.572, "step": 28535 }, { "epoch": 0.8331435578523255, "grad_norm": 0.588497356126765, "learning_rate": 9.27007299270073e-06, "loss": 0.7014, "step": 28536 }, { "epoch": 0.8331727540801729, "grad_norm": 0.5154846947368995, "learning_rate": 9.26845093268451e-06, "loss": 0.591, "step": 28537 }, { "epoch": 0.8332019503080202, "grad_norm": 0.5003284388109497, "learning_rate": 9.266828872668288e-06, "loss": 0.5715, "step": 28538 }, { "epoch": 0.8332311465358676, "grad_norm": 0.5559513351888973, "learning_rate": 9.265206812652068e-06, "loss": 0.6254, "step": 28539 }, { "epoch": 0.833260342763715, "grad_norm": 0.4881563125062271, "learning_rate": 9.263584752635848e-06, "loss": 0.4997, "step": 28540 }, { "epoch": 0.8332895389915623, "grad_norm": 0.510139139487977, "learning_rate": 9.261962692619627e-06, "loss": 0.5701, "step": 28541 }, { "epoch": 0.8333187352194097, "grad_norm": 0.5452395521645106, "learning_rate": 9.260340632603407e-06, "loss": 0.6069, "step": 28542 }, { "epoch": 0.833347931447257, "grad_norm": 0.47293776475682053, "learning_rate": 9.258718572587185e-06, "loss": 0.485, "step": 28543 }, { "epoch": 0.8333771276751044, "grad_norm": 0.5083323174773062, "learning_rate": 9.257096512570965e-06, "loss": 0.5681, "step": 28544 }, { "epoch": 0.8334063239029518, "grad_norm": 0.5112082377203104, "learning_rate": 9.255474452554745e-06, "loss": 0.5507, "step": 28545 }, { "epoch": 0.8334355201307991, "grad_norm": 0.5052175967400133, "learning_rate": 9.253852392538524e-06, "loss": 0.537, "step": 28546 }, { "epoch": 0.8334647163586465, "grad_norm": 0.5491166872733522, "learning_rate": 9.252230332522304e-06, "loss": 0.6141, "step": 28547 }, { "epoch": 0.8334939125864939, "grad_norm": 0.5511312173789764, "learning_rate": 9.250608272506084e-06, "loss": 0.6354, "step": 28548 }, { "epoch": 0.8335231088143412, "grad_norm": 0.5047124570334327, "learning_rate": 9.248986212489862e-06, "loss": 0.5563, "step": 28549 }, { "epoch": 0.8335523050421886, "grad_norm": 0.507170037979481, "learning_rate": 9.247364152473642e-06, "loss": 0.5472, "step": 28550 }, { "epoch": 0.8335815012700359, "grad_norm": 0.5244054496193185, "learning_rate": 9.24574209245742e-06, "loss": 0.5473, "step": 28551 }, { "epoch": 0.8336106974978833, "grad_norm": 0.5670392756596679, "learning_rate": 9.2441200324412e-06, "loss": 0.6239, "step": 28552 }, { "epoch": 0.8336398937257307, "grad_norm": 0.5296429043912133, "learning_rate": 9.242497972424981e-06, "loss": 0.5602, "step": 28553 }, { "epoch": 0.833669089953578, "grad_norm": 0.5606525779804296, "learning_rate": 9.24087591240876e-06, "loss": 0.6681, "step": 28554 }, { "epoch": 0.8336982861814254, "grad_norm": 0.531921516162938, "learning_rate": 9.23925385239254e-06, "loss": 0.5795, "step": 28555 }, { "epoch": 0.8337274824092727, "grad_norm": 0.5368542991507071, "learning_rate": 9.237631792376318e-06, "loss": 0.576, "step": 28556 }, { "epoch": 0.8337566786371201, "grad_norm": 0.550489196320755, "learning_rate": 9.236009732360098e-06, "loss": 0.6412, "step": 28557 }, { "epoch": 0.8337858748649675, "grad_norm": 0.5384180384173426, "learning_rate": 9.234387672343878e-06, "loss": 0.5642, "step": 28558 }, { "epoch": 0.8338150710928148, "grad_norm": 0.5334387040262011, "learning_rate": 9.232765612327656e-06, "loss": 0.558, "step": 28559 }, { "epoch": 0.8338442673206622, "grad_norm": 0.537943862765128, "learning_rate": 9.231143552311436e-06, "loss": 0.6095, "step": 28560 }, { "epoch": 0.8338734635485096, "grad_norm": 0.5120540232582075, "learning_rate": 9.229521492295215e-06, "loss": 0.5731, "step": 28561 }, { "epoch": 0.8339026597763569, "grad_norm": 0.4811850607869788, "learning_rate": 9.227899432278995e-06, "loss": 0.5242, "step": 28562 }, { "epoch": 0.8339318560042043, "grad_norm": 0.5081570885433343, "learning_rate": 9.226277372262775e-06, "loss": 0.5703, "step": 28563 }, { "epoch": 0.8339610522320516, "grad_norm": 0.5168221371519541, "learning_rate": 9.224655312246553e-06, "loss": 0.5618, "step": 28564 }, { "epoch": 0.833990248459899, "grad_norm": 0.5088055298287462, "learning_rate": 9.223033252230333e-06, "loss": 0.5897, "step": 28565 }, { "epoch": 0.8340194446877464, "grad_norm": 0.5227293116638219, "learning_rate": 9.221411192214112e-06, "loss": 0.5115, "step": 28566 }, { "epoch": 0.8340486409155937, "grad_norm": 0.639827952447668, "learning_rate": 9.219789132197892e-06, "loss": 0.6325, "step": 28567 }, { "epoch": 0.8340778371434411, "grad_norm": 0.544033336244542, "learning_rate": 9.218167072181672e-06, "loss": 0.6107, "step": 28568 }, { "epoch": 0.8341070333712884, "grad_norm": 0.5176399396036443, "learning_rate": 9.21654501216545e-06, "loss": 0.5594, "step": 28569 }, { "epoch": 0.8341362295991358, "grad_norm": 0.5248940180930814, "learning_rate": 9.21492295214923e-06, "loss": 0.5505, "step": 28570 }, { "epoch": 0.8341654258269832, "grad_norm": 0.5571317490813519, "learning_rate": 9.213300892133009e-06, "loss": 0.6202, "step": 28571 }, { "epoch": 0.8341946220548305, "grad_norm": 0.5228523834682998, "learning_rate": 9.211678832116789e-06, "loss": 0.5967, "step": 28572 }, { "epoch": 0.8342238182826779, "grad_norm": 0.5314780075530061, "learning_rate": 9.210056772100569e-06, "loss": 0.6193, "step": 28573 }, { "epoch": 0.8342530145105252, "grad_norm": 0.5069054424240828, "learning_rate": 9.208434712084347e-06, "loss": 0.5555, "step": 28574 }, { "epoch": 0.8342822107383726, "grad_norm": 0.5610876049714469, "learning_rate": 9.206812652068127e-06, "loss": 0.6199, "step": 28575 }, { "epoch": 0.83431140696622, "grad_norm": 0.5292416632257654, "learning_rate": 9.205190592051905e-06, "loss": 0.5492, "step": 28576 }, { "epoch": 0.8343406031940673, "grad_norm": 0.5043538638709909, "learning_rate": 9.203568532035686e-06, "loss": 0.5809, "step": 28577 }, { "epoch": 0.8343697994219147, "grad_norm": 0.5246560646514854, "learning_rate": 9.201946472019466e-06, "loss": 0.5902, "step": 28578 }, { "epoch": 0.834398995649762, "grad_norm": 0.5433864173122785, "learning_rate": 9.200324412003244e-06, "loss": 0.6332, "step": 28579 }, { "epoch": 0.8344281918776094, "grad_norm": 0.5413680582249794, "learning_rate": 9.198702351987024e-06, "loss": 0.5641, "step": 28580 }, { "epoch": 0.8344573881054568, "grad_norm": 0.5332309997774787, "learning_rate": 9.197080291970802e-06, "loss": 0.6312, "step": 28581 }, { "epoch": 0.8344865843333041, "grad_norm": 0.5222255889235741, "learning_rate": 9.195458231954583e-06, "loss": 0.5566, "step": 28582 }, { "epoch": 0.8345157805611515, "grad_norm": 0.5346698563078047, "learning_rate": 9.193836171938363e-06, "loss": 0.6262, "step": 28583 }, { "epoch": 0.8345449767889989, "grad_norm": 0.5239575640773005, "learning_rate": 9.192214111922141e-06, "loss": 0.5673, "step": 28584 }, { "epoch": 0.8345741730168462, "grad_norm": 0.46691737482375995, "learning_rate": 9.190592051905921e-06, "loss": 0.4184, "step": 28585 }, { "epoch": 0.8346033692446936, "grad_norm": 0.5417182479493462, "learning_rate": 9.188969991889701e-06, "loss": 0.6091, "step": 28586 }, { "epoch": 0.8346325654725409, "grad_norm": 0.5020426854118623, "learning_rate": 9.18734793187348e-06, "loss": 0.5263, "step": 28587 }, { "epoch": 0.8346617617003883, "grad_norm": 0.5089413355731094, "learning_rate": 9.18572587185726e-06, "loss": 0.5568, "step": 28588 }, { "epoch": 0.8346909579282357, "grad_norm": 0.557387874530483, "learning_rate": 9.184103811841038e-06, "loss": 0.6589, "step": 28589 }, { "epoch": 0.834720154156083, "grad_norm": 0.5509214870194997, "learning_rate": 9.182481751824818e-06, "loss": 0.6557, "step": 28590 }, { "epoch": 0.8347493503839304, "grad_norm": 0.5763375818558752, "learning_rate": 9.180859691808598e-06, "loss": 0.561, "step": 28591 }, { "epoch": 0.8347785466117777, "grad_norm": 0.520371319958181, "learning_rate": 9.179237631792376e-06, "loss": 0.5993, "step": 28592 }, { "epoch": 0.8348077428396251, "grad_norm": 0.49517644677568023, "learning_rate": 9.177615571776156e-06, "loss": 0.5622, "step": 28593 }, { "epoch": 0.8348369390674725, "grad_norm": 0.5424270675188745, "learning_rate": 9.175993511759935e-06, "loss": 0.6281, "step": 28594 }, { "epoch": 0.8348661352953198, "grad_norm": 0.5298100790774875, "learning_rate": 9.174371451743715e-06, "loss": 0.5913, "step": 28595 }, { "epoch": 0.8348953315231672, "grad_norm": 0.5760605248549132, "learning_rate": 9.172749391727495e-06, "loss": 0.6425, "step": 28596 }, { "epoch": 0.8349245277510146, "grad_norm": 0.5044886693465988, "learning_rate": 9.171127331711273e-06, "loss": 0.6044, "step": 28597 }, { "epoch": 0.8349537239788619, "grad_norm": 0.5359064365580034, "learning_rate": 9.169505271695053e-06, "loss": 0.6405, "step": 28598 }, { "epoch": 0.8349829202067093, "grad_norm": 0.5102266155947806, "learning_rate": 9.167883211678832e-06, "loss": 0.5522, "step": 28599 }, { "epoch": 0.8350121164345566, "grad_norm": 0.5198122744539476, "learning_rate": 9.166261151662612e-06, "loss": 0.5566, "step": 28600 }, { "epoch": 0.835041312662404, "grad_norm": 0.5217843664463662, "learning_rate": 9.164639091646392e-06, "loss": 0.5249, "step": 28601 }, { "epoch": 0.8350705088902514, "grad_norm": 0.5173776235872579, "learning_rate": 9.16301703163017e-06, "loss": 0.5748, "step": 28602 }, { "epoch": 0.8350997051180987, "grad_norm": 0.5388904708670125, "learning_rate": 9.16139497161395e-06, "loss": 0.5584, "step": 28603 }, { "epoch": 0.8351289013459461, "grad_norm": 0.4899445858570142, "learning_rate": 9.159772911597729e-06, "loss": 0.504, "step": 28604 }, { "epoch": 0.8351580975737934, "grad_norm": 0.5122334861834406, "learning_rate": 9.158150851581509e-06, "loss": 0.5497, "step": 28605 }, { "epoch": 0.8351872938016408, "grad_norm": 0.5486213198017039, "learning_rate": 9.156528791565289e-06, "loss": 0.5857, "step": 28606 }, { "epoch": 0.8352164900294882, "grad_norm": 0.5665945043536272, "learning_rate": 9.154906731549067e-06, "loss": 0.6344, "step": 28607 }, { "epoch": 0.8352456862573355, "grad_norm": 0.4723461859402006, "learning_rate": 9.153284671532847e-06, "loss": 0.4903, "step": 28608 }, { "epoch": 0.8352748824851829, "grad_norm": 0.5199669819406945, "learning_rate": 9.151662611516626e-06, "loss": 0.5599, "step": 28609 }, { "epoch": 0.8353040787130303, "grad_norm": 0.5640104890050991, "learning_rate": 9.150040551500406e-06, "loss": 0.6595, "step": 28610 }, { "epoch": 0.8353332749408776, "grad_norm": 0.5271246343397623, "learning_rate": 9.148418491484186e-06, "loss": 0.5943, "step": 28611 }, { "epoch": 0.835362471168725, "grad_norm": 0.5249093091479733, "learning_rate": 9.146796431467964e-06, "loss": 0.5823, "step": 28612 }, { "epoch": 0.8353916673965723, "grad_norm": 0.5739542414777257, "learning_rate": 9.145174371451744e-06, "loss": 0.6131, "step": 28613 }, { "epoch": 0.8354208636244197, "grad_norm": 0.5145710360030927, "learning_rate": 9.143552311435523e-06, "loss": 0.5682, "step": 28614 }, { "epoch": 0.8354500598522671, "grad_norm": 0.5319744645441646, "learning_rate": 9.141930251419303e-06, "loss": 0.5672, "step": 28615 }, { "epoch": 0.8354792560801144, "grad_norm": 0.5286793289959938, "learning_rate": 9.140308191403083e-06, "loss": 0.6072, "step": 28616 }, { "epoch": 0.8355084523079618, "grad_norm": 0.5233311843734528, "learning_rate": 9.138686131386861e-06, "loss": 0.5655, "step": 28617 }, { "epoch": 0.8355376485358091, "grad_norm": 0.5571728040229077, "learning_rate": 9.137064071370641e-06, "loss": 0.6548, "step": 28618 }, { "epoch": 0.8355668447636565, "grad_norm": 0.5280951776545689, "learning_rate": 9.135442011354421e-06, "loss": 0.5537, "step": 28619 }, { "epoch": 0.8355960409915039, "grad_norm": 0.5092785201965777, "learning_rate": 9.1338199513382e-06, "loss": 0.5362, "step": 28620 }, { "epoch": 0.8356252372193512, "grad_norm": 0.49989498877007676, "learning_rate": 9.13219789132198e-06, "loss": 0.5814, "step": 28621 }, { "epoch": 0.8356544334471986, "grad_norm": 0.5350792673741069, "learning_rate": 9.130575831305758e-06, "loss": 0.593, "step": 28622 }, { "epoch": 0.835683629675046, "grad_norm": 0.5153324357827537, "learning_rate": 9.128953771289538e-06, "loss": 0.524, "step": 28623 }, { "epoch": 0.8357128259028933, "grad_norm": 0.5682227309334954, "learning_rate": 9.127331711273318e-06, "loss": 0.5819, "step": 28624 }, { "epoch": 0.8357420221307407, "grad_norm": 0.5208565793537091, "learning_rate": 9.125709651257097e-06, "loss": 0.5911, "step": 28625 }, { "epoch": 0.835771218358588, "grad_norm": 0.5152305343702771, "learning_rate": 9.124087591240877e-06, "loss": 0.5467, "step": 28626 }, { "epoch": 0.8358004145864354, "grad_norm": 0.5560359758694674, "learning_rate": 9.122465531224655e-06, "loss": 0.6707, "step": 28627 }, { "epoch": 0.8358296108142828, "grad_norm": 0.5385064076609952, "learning_rate": 9.120843471208435e-06, "loss": 0.6104, "step": 28628 }, { "epoch": 0.8358588070421301, "grad_norm": 0.5124939011262301, "learning_rate": 9.119221411192215e-06, "loss": 0.5788, "step": 28629 }, { "epoch": 0.8358880032699775, "grad_norm": 0.49313830653899926, "learning_rate": 9.117599351175994e-06, "loss": 0.5154, "step": 28630 }, { "epoch": 0.8359171994978248, "grad_norm": 0.5278832475025858, "learning_rate": 9.115977291159774e-06, "loss": 0.559, "step": 28631 }, { "epoch": 0.8359463957256722, "grad_norm": 0.5425374097230423, "learning_rate": 9.114355231143552e-06, "loss": 0.5832, "step": 28632 }, { "epoch": 0.8359755919535196, "grad_norm": 0.5465844627340436, "learning_rate": 9.112733171127332e-06, "loss": 0.617, "step": 28633 }, { "epoch": 0.8360047881813669, "grad_norm": 0.5332630182662909, "learning_rate": 9.111111111111112e-06, "loss": 0.6046, "step": 28634 }, { "epoch": 0.8360339844092143, "grad_norm": 0.5285757720730941, "learning_rate": 9.10948905109489e-06, "loss": 0.6096, "step": 28635 }, { "epoch": 0.8360631806370616, "grad_norm": 0.5096611811923006, "learning_rate": 9.10786699107867e-06, "loss": 0.5721, "step": 28636 }, { "epoch": 0.836092376864909, "grad_norm": 0.5000702233601791, "learning_rate": 9.106244931062449e-06, "loss": 0.5409, "step": 28637 }, { "epoch": 0.8361215730927564, "grad_norm": 0.5174236551398164, "learning_rate": 9.104622871046229e-06, "loss": 0.5929, "step": 28638 }, { "epoch": 0.8361507693206037, "grad_norm": 0.4971290127317702, "learning_rate": 9.103000811030009e-06, "loss": 0.496, "step": 28639 }, { "epoch": 0.8361799655484511, "grad_norm": 0.5176606792802575, "learning_rate": 9.101378751013787e-06, "loss": 0.5647, "step": 28640 }, { "epoch": 0.8362091617762984, "grad_norm": 0.5192948044035448, "learning_rate": 9.099756690997568e-06, "loss": 0.5515, "step": 28641 }, { "epoch": 0.8362383580041458, "grad_norm": 0.5155550261437634, "learning_rate": 9.098134630981346e-06, "loss": 0.5937, "step": 28642 }, { "epoch": 0.8362675542319932, "grad_norm": 0.5163029360058634, "learning_rate": 9.096512570965126e-06, "loss": 0.5837, "step": 28643 }, { "epoch": 0.8362967504598406, "grad_norm": 0.5296141810044017, "learning_rate": 9.094890510948906e-06, "loss": 0.5889, "step": 28644 }, { "epoch": 0.836325946687688, "grad_norm": 0.520946441202278, "learning_rate": 9.093268450932684e-06, "loss": 0.595, "step": 28645 }, { "epoch": 0.8363551429155354, "grad_norm": 0.5362213969493977, "learning_rate": 9.091646390916465e-06, "loss": 0.5799, "step": 28646 }, { "epoch": 0.8363843391433827, "grad_norm": 0.5390587469554059, "learning_rate": 9.090024330900243e-06, "loss": 0.5534, "step": 28647 }, { "epoch": 0.8364135353712301, "grad_norm": 0.4957361470134571, "learning_rate": 9.088402270884023e-06, "loss": 0.5435, "step": 28648 }, { "epoch": 0.8364427315990774, "grad_norm": 0.5195148936256581, "learning_rate": 9.086780210867803e-06, "loss": 0.5565, "step": 28649 }, { "epoch": 0.8364719278269248, "grad_norm": 0.5293611584112943, "learning_rate": 9.085158150851581e-06, "loss": 0.5394, "step": 28650 }, { "epoch": 0.8365011240547722, "grad_norm": 0.5353447383376322, "learning_rate": 9.083536090835361e-06, "loss": 0.6101, "step": 28651 }, { "epoch": 0.8365303202826195, "grad_norm": 0.5366414436288363, "learning_rate": 9.081914030819142e-06, "loss": 0.5795, "step": 28652 }, { "epoch": 0.8365595165104669, "grad_norm": 0.5415051829826559, "learning_rate": 9.08029197080292e-06, "loss": 0.635, "step": 28653 }, { "epoch": 0.8365887127383143, "grad_norm": 0.5440251769472234, "learning_rate": 9.0786699107867e-06, "loss": 0.6056, "step": 28654 }, { "epoch": 0.8366179089661616, "grad_norm": 0.5069846742947023, "learning_rate": 9.077047850770478e-06, "loss": 0.5242, "step": 28655 }, { "epoch": 0.836647105194009, "grad_norm": 0.4988193240225277, "learning_rate": 9.075425790754258e-06, "loss": 0.5282, "step": 28656 }, { "epoch": 0.8366763014218563, "grad_norm": 0.5532872920684752, "learning_rate": 9.073803730738038e-06, "loss": 0.577, "step": 28657 }, { "epoch": 0.8367054976497037, "grad_norm": 0.5268926985753912, "learning_rate": 9.072181670721817e-06, "loss": 0.5802, "step": 28658 }, { "epoch": 0.8367346938775511, "grad_norm": 0.5015513952427435, "learning_rate": 9.070559610705597e-06, "loss": 0.5311, "step": 28659 }, { "epoch": 0.8367638901053984, "grad_norm": 0.5368630523668337, "learning_rate": 9.068937550689375e-06, "loss": 0.5989, "step": 28660 }, { "epoch": 0.8367930863332458, "grad_norm": 0.5106600692828058, "learning_rate": 9.067315490673155e-06, "loss": 0.5672, "step": 28661 }, { "epoch": 0.8368222825610931, "grad_norm": 0.5302728839748106, "learning_rate": 9.065693430656935e-06, "loss": 0.5747, "step": 28662 }, { "epoch": 0.8368514787889405, "grad_norm": 0.49552033944390916, "learning_rate": 9.064071370640714e-06, "loss": 0.4906, "step": 28663 }, { "epoch": 0.8368806750167879, "grad_norm": 0.5096435395115599, "learning_rate": 9.062449310624494e-06, "loss": 0.5223, "step": 28664 }, { "epoch": 0.8369098712446352, "grad_norm": 0.5962251236953255, "learning_rate": 9.060827250608272e-06, "loss": 0.769, "step": 28665 }, { "epoch": 0.8369390674724826, "grad_norm": 0.5216637596847217, "learning_rate": 9.059205190592052e-06, "loss": 0.5877, "step": 28666 }, { "epoch": 0.83696826370033, "grad_norm": 0.48322652262969057, "learning_rate": 9.057583130575832e-06, "loss": 0.5016, "step": 28667 }, { "epoch": 0.8369974599281773, "grad_norm": 0.5244458345053206, "learning_rate": 9.05596107055961e-06, "loss": 0.5642, "step": 28668 }, { "epoch": 0.8370266561560247, "grad_norm": 0.5423472974566974, "learning_rate": 9.05433901054339e-06, "loss": 0.5903, "step": 28669 }, { "epoch": 0.837055852383872, "grad_norm": 0.556529543665175, "learning_rate": 9.05271695052717e-06, "loss": 0.6338, "step": 28670 }, { "epoch": 0.8370850486117194, "grad_norm": 0.5197168448755545, "learning_rate": 9.05109489051095e-06, "loss": 0.5557, "step": 28671 }, { "epoch": 0.8371142448395668, "grad_norm": 0.5487013800275922, "learning_rate": 9.04947283049473e-06, "loss": 0.6431, "step": 28672 }, { "epoch": 0.8371434410674141, "grad_norm": 0.4951840477896347, "learning_rate": 9.047850770478508e-06, "loss": 0.4949, "step": 28673 }, { "epoch": 0.8371726372952615, "grad_norm": 0.5148496627515261, "learning_rate": 9.046228710462288e-06, "loss": 0.5356, "step": 28674 }, { "epoch": 0.8372018335231088, "grad_norm": 0.5108540855879565, "learning_rate": 9.044606650446066e-06, "loss": 0.5724, "step": 28675 }, { "epoch": 0.8372310297509562, "grad_norm": 0.4861292247309391, "learning_rate": 9.042984590429846e-06, "loss": 0.5335, "step": 28676 }, { "epoch": 0.8372602259788036, "grad_norm": 0.587141014102532, "learning_rate": 9.041362530413626e-06, "loss": 0.6562, "step": 28677 }, { "epoch": 0.8372894222066509, "grad_norm": 0.5544998352356529, "learning_rate": 9.039740470397405e-06, "loss": 0.631, "step": 28678 }, { "epoch": 0.8373186184344983, "grad_norm": 0.5156040178919687, "learning_rate": 9.038118410381185e-06, "loss": 0.5274, "step": 28679 }, { "epoch": 0.8373478146623456, "grad_norm": 0.4922153918521367, "learning_rate": 9.036496350364963e-06, "loss": 0.5233, "step": 28680 }, { "epoch": 0.837377010890193, "grad_norm": 0.5158754036988579, "learning_rate": 9.034874290348743e-06, "loss": 0.5995, "step": 28681 }, { "epoch": 0.8374062071180404, "grad_norm": 0.5174032653810416, "learning_rate": 9.033252230332523e-06, "loss": 0.5877, "step": 28682 }, { "epoch": 0.8374354033458877, "grad_norm": 0.5469960377628299, "learning_rate": 9.031630170316302e-06, "loss": 0.6161, "step": 28683 }, { "epoch": 0.8374645995737351, "grad_norm": 0.5203160871392735, "learning_rate": 9.030008110300082e-06, "loss": 0.5739, "step": 28684 }, { "epoch": 0.8374937958015825, "grad_norm": 0.4973255249818024, "learning_rate": 9.028386050283862e-06, "loss": 0.5336, "step": 28685 }, { "epoch": 0.8375229920294298, "grad_norm": 0.526964558553179, "learning_rate": 9.02676399026764e-06, "loss": 0.5778, "step": 28686 }, { "epoch": 0.8375521882572772, "grad_norm": 0.5017844778496326, "learning_rate": 9.02514193025142e-06, "loss": 0.5467, "step": 28687 }, { "epoch": 0.8375813844851245, "grad_norm": 0.5308558814798018, "learning_rate": 9.023519870235199e-06, "loss": 0.5922, "step": 28688 }, { "epoch": 0.8376105807129719, "grad_norm": 0.5709503625514162, "learning_rate": 9.021897810218979e-06, "loss": 0.6409, "step": 28689 }, { "epoch": 0.8376397769408193, "grad_norm": 0.5759200151892805, "learning_rate": 9.020275750202759e-06, "loss": 0.6994, "step": 28690 }, { "epoch": 0.8376689731686666, "grad_norm": 0.5223230962025726, "learning_rate": 9.018653690186537e-06, "loss": 0.5603, "step": 28691 }, { "epoch": 0.837698169396514, "grad_norm": 0.4893596027930062, "learning_rate": 9.017031630170317e-06, "loss": 0.5345, "step": 28692 }, { "epoch": 0.8377273656243613, "grad_norm": 0.5468426709923841, "learning_rate": 9.015409570154096e-06, "loss": 0.5941, "step": 28693 }, { "epoch": 0.8377565618522087, "grad_norm": 0.5124251102115982, "learning_rate": 9.013787510137876e-06, "loss": 0.5391, "step": 28694 }, { "epoch": 0.8377857580800561, "grad_norm": 0.5097875179735635, "learning_rate": 9.012165450121656e-06, "loss": 0.5625, "step": 28695 }, { "epoch": 0.8378149543079034, "grad_norm": 0.5070787512895821, "learning_rate": 9.010543390105434e-06, "loss": 0.5682, "step": 28696 }, { "epoch": 0.8378441505357508, "grad_norm": 0.5271054542840171, "learning_rate": 9.008921330089214e-06, "loss": 0.6008, "step": 28697 }, { "epoch": 0.8378733467635981, "grad_norm": 0.5233692207034137, "learning_rate": 9.007299270072992e-06, "loss": 0.5517, "step": 28698 }, { "epoch": 0.8379025429914455, "grad_norm": 0.5709000459383857, "learning_rate": 9.005677210056773e-06, "loss": 0.6452, "step": 28699 }, { "epoch": 0.8379317392192929, "grad_norm": 0.47057055155957245, "learning_rate": 9.004055150040553e-06, "loss": 0.4505, "step": 28700 }, { "epoch": 0.8379609354471402, "grad_norm": 0.5282464424058766, "learning_rate": 9.002433090024331e-06, "loss": 0.543, "step": 28701 }, { "epoch": 0.8379901316749876, "grad_norm": 0.4928800182151577, "learning_rate": 9.000811030008111e-06, "loss": 0.5307, "step": 28702 }, { "epoch": 0.838019327902835, "grad_norm": 0.5100596643135471, "learning_rate": 8.99918896999189e-06, "loss": 0.5852, "step": 28703 }, { "epoch": 0.8380485241306823, "grad_norm": 0.5464314151539613, "learning_rate": 8.99756690997567e-06, "loss": 0.5809, "step": 28704 }, { "epoch": 0.8380777203585297, "grad_norm": 0.5410973161327904, "learning_rate": 8.99594484995945e-06, "loss": 0.6104, "step": 28705 }, { "epoch": 0.838106916586377, "grad_norm": 0.5560006517864674, "learning_rate": 8.994322789943228e-06, "loss": 0.5995, "step": 28706 }, { "epoch": 0.8381361128142244, "grad_norm": 0.5141362774948941, "learning_rate": 8.992700729927008e-06, "loss": 0.557, "step": 28707 }, { "epoch": 0.8381653090420718, "grad_norm": 0.5052206926495172, "learning_rate": 8.991078669910786e-06, "loss": 0.525, "step": 28708 }, { "epoch": 0.8381945052699191, "grad_norm": 0.5509153293747637, "learning_rate": 8.989456609894566e-06, "loss": 0.6317, "step": 28709 }, { "epoch": 0.8382237014977665, "grad_norm": 0.5460797369607656, "learning_rate": 8.987834549878347e-06, "loss": 0.6116, "step": 28710 }, { "epoch": 0.8382528977256138, "grad_norm": 0.5382114145711797, "learning_rate": 8.986212489862125e-06, "loss": 0.5757, "step": 28711 }, { "epoch": 0.8382820939534612, "grad_norm": 0.4978506955961719, "learning_rate": 8.984590429845905e-06, "loss": 0.5464, "step": 28712 }, { "epoch": 0.8383112901813086, "grad_norm": 0.5359984172221234, "learning_rate": 8.982968369829683e-06, "loss": 0.595, "step": 28713 }, { "epoch": 0.8383404864091559, "grad_norm": 0.5484813745891126, "learning_rate": 8.981346309813463e-06, "loss": 0.5683, "step": 28714 }, { "epoch": 0.8383696826370033, "grad_norm": 0.5275564423807602, "learning_rate": 8.979724249797243e-06, "loss": 0.5513, "step": 28715 }, { "epoch": 0.8383988788648506, "grad_norm": 0.5112535319573396, "learning_rate": 8.978102189781022e-06, "loss": 0.5879, "step": 28716 }, { "epoch": 0.838428075092698, "grad_norm": 0.5435700235497712, "learning_rate": 8.976480129764802e-06, "loss": 0.6325, "step": 28717 }, { "epoch": 0.8384572713205454, "grad_norm": 0.5656402867264925, "learning_rate": 8.974858069748582e-06, "loss": 0.6246, "step": 28718 }, { "epoch": 0.8384864675483927, "grad_norm": 0.46513452732635224, "learning_rate": 8.97323600973236e-06, "loss": 0.486, "step": 28719 }, { "epoch": 0.8385156637762401, "grad_norm": 0.5219663368953648, "learning_rate": 8.97161394971614e-06, "loss": 0.5458, "step": 28720 }, { "epoch": 0.8385448600040875, "grad_norm": 0.5422761506137797, "learning_rate": 8.969991889699919e-06, "loss": 0.5789, "step": 28721 }, { "epoch": 0.8385740562319348, "grad_norm": 0.5542038742294786, "learning_rate": 8.968369829683699e-06, "loss": 0.6139, "step": 28722 }, { "epoch": 0.8386032524597822, "grad_norm": 0.5364570817546686, "learning_rate": 8.966747769667479e-06, "loss": 0.6095, "step": 28723 }, { "epoch": 0.8386324486876295, "grad_norm": 0.5137748301415243, "learning_rate": 8.965125709651257e-06, "loss": 0.5475, "step": 28724 }, { "epoch": 0.8386616449154769, "grad_norm": 0.5361169910092726, "learning_rate": 8.963503649635037e-06, "loss": 0.6139, "step": 28725 }, { "epoch": 0.8386908411433243, "grad_norm": 0.5541392390239563, "learning_rate": 8.961881589618816e-06, "loss": 0.6112, "step": 28726 }, { "epoch": 0.8387200373711716, "grad_norm": 0.5088140515173002, "learning_rate": 8.960259529602596e-06, "loss": 0.5537, "step": 28727 }, { "epoch": 0.838749233599019, "grad_norm": 0.5521850003603725, "learning_rate": 8.958637469586376e-06, "loss": 0.6398, "step": 28728 }, { "epoch": 0.8387784298268663, "grad_norm": 0.5710612786676779, "learning_rate": 8.957015409570154e-06, "loss": 0.6539, "step": 28729 }, { "epoch": 0.8388076260547137, "grad_norm": 0.522790974880055, "learning_rate": 8.955393349553934e-06, "loss": 0.5369, "step": 28730 }, { "epoch": 0.8388368222825611, "grad_norm": 0.4879187659281252, "learning_rate": 8.953771289537713e-06, "loss": 0.517, "step": 28731 }, { "epoch": 0.8388660185104084, "grad_norm": 0.5307601218968504, "learning_rate": 8.952149229521493e-06, "loss": 0.5724, "step": 28732 }, { "epoch": 0.8388952147382558, "grad_norm": 0.5126653493389527, "learning_rate": 8.950527169505273e-06, "loss": 0.5591, "step": 28733 }, { "epoch": 0.8389244109661032, "grad_norm": 0.5327960531903124, "learning_rate": 8.948905109489051e-06, "loss": 0.6081, "step": 28734 }, { "epoch": 0.8389536071939505, "grad_norm": 0.5332835716794644, "learning_rate": 8.947283049472831e-06, "loss": 0.6057, "step": 28735 }, { "epoch": 0.8389828034217979, "grad_norm": 0.5272451254351714, "learning_rate": 8.94566098945661e-06, "loss": 0.5958, "step": 28736 }, { "epoch": 0.8390119996496452, "grad_norm": 0.5087300948677952, "learning_rate": 8.94403892944039e-06, "loss": 0.554, "step": 28737 }, { "epoch": 0.8390411958774926, "grad_norm": 0.5125366166352745, "learning_rate": 8.94241686942417e-06, "loss": 0.6025, "step": 28738 }, { "epoch": 0.83907039210534, "grad_norm": 0.5152566620820506, "learning_rate": 8.940794809407948e-06, "loss": 0.5626, "step": 28739 }, { "epoch": 0.8390995883331873, "grad_norm": 0.5260206432099959, "learning_rate": 8.939172749391728e-06, "loss": 0.6257, "step": 28740 }, { "epoch": 0.8391287845610347, "grad_norm": 0.5360015174481549, "learning_rate": 8.937550689375507e-06, "loss": 0.5291, "step": 28741 }, { "epoch": 0.839157980788882, "grad_norm": 0.5138831900489385, "learning_rate": 8.935928629359287e-06, "loss": 0.545, "step": 28742 }, { "epoch": 0.8391871770167294, "grad_norm": 0.5325361932793933, "learning_rate": 8.934306569343067e-06, "loss": 0.6156, "step": 28743 }, { "epoch": 0.8392163732445768, "grad_norm": 0.5219352266204415, "learning_rate": 8.932684509326845e-06, "loss": 0.5793, "step": 28744 }, { "epoch": 0.8392455694724241, "grad_norm": 0.5316162863691684, "learning_rate": 8.931062449310625e-06, "loss": 0.6088, "step": 28745 }, { "epoch": 0.8392747657002715, "grad_norm": 0.5303716618020486, "learning_rate": 8.929440389294404e-06, "loss": 0.5498, "step": 28746 }, { "epoch": 0.8393039619281188, "grad_norm": 0.5359334238772049, "learning_rate": 8.927818329278184e-06, "loss": 0.628, "step": 28747 }, { "epoch": 0.8393331581559662, "grad_norm": 0.5068422495407268, "learning_rate": 8.926196269261964e-06, "loss": 0.5405, "step": 28748 }, { "epoch": 0.8393623543838136, "grad_norm": 0.49433190706714664, "learning_rate": 8.924574209245742e-06, "loss": 0.5539, "step": 28749 }, { "epoch": 0.8393915506116609, "grad_norm": 0.5411840022073999, "learning_rate": 8.922952149229522e-06, "loss": 0.6079, "step": 28750 }, { "epoch": 0.8394207468395083, "grad_norm": 0.5128980022127186, "learning_rate": 8.921330089213302e-06, "loss": 0.557, "step": 28751 }, { "epoch": 0.8394499430673557, "grad_norm": 0.5367925582759727, "learning_rate": 8.91970802919708e-06, "loss": 0.5789, "step": 28752 }, { "epoch": 0.839479139295203, "grad_norm": 0.5934019779028743, "learning_rate": 8.91808596918086e-06, "loss": 0.6018, "step": 28753 }, { "epoch": 0.8395083355230504, "grad_norm": 0.5089749662439195, "learning_rate": 8.916463909164639e-06, "loss": 0.5724, "step": 28754 }, { "epoch": 0.8395375317508977, "grad_norm": 0.5589918203114045, "learning_rate": 8.914841849148419e-06, "loss": 0.6031, "step": 28755 }, { "epoch": 0.8395667279787451, "grad_norm": 0.5332534328102969, "learning_rate": 8.913219789132199e-06, "loss": 0.5953, "step": 28756 }, { "epoch": 0.8395959242065925, "grad_norm": 0.5408069858053077, "learning_rate": 8.911597729115978e-06, "loss": 0.5945, "step": 28757 }, { "epoch": 0.8396251204344398, "grad_norm": 0.5610045656645061, "learning_rate": 8.909975669099758e-06, "loss": 0.6447, "step": 28758 }, { "epoch": 0.8396543166622872, "grad_norm": 0.5445171921074222, "learning_rate": 8.908353609083536e-06, "loss": 0.5874, "step": 28759 }, { "epoch": 0.8396835128901345, "grad_norm": 0.5104681782311428, "learning_rate": 8.906731549067316e-06, "loss": 0.5966, "step": 28760 }, { "epoch": 0.8397127091179819, "grad_norm": 0.5267272283200586, "learning_rate": 8.905109489051096e-06, "loss": 0.5923, "step": 28761 }, { "epoch": 0.8397419053458293, "grad_norm": 0.5348629131654605, "learning_rate": 8.903487429034874e-06, "loss": 0.6155, "step": 28762 }, { "epoch": 0.8397711015736766, "grad_norm": 0.5136592886162634, "learning_rate": 8.901865369018655e-06, "loss": 0.5865, "step": 28763 }, { "epoch": 0.8398002978015241, "grad_norm": 0.5331151489987325, "learning_rate": 8.900243309002433e-06, "loss": 0.5944, "step": 28764 }, { "epoch": 0.8398294940293715, "grad_norm": 0.5697597831630917, "learning_rate": 8.898621248986213e-06, "loss": 0.6514, "step": 28765 }, { "epoch": 0.8398586902572188, "grad_norm": 0.49705961641969726, "learning_rate": 8.896999188969993e-06, "loss": 0.5203, "step": 28766 }, { "epoch": 0.8398878864850662, "grad_norm": 0.535571989274486, "learning_rate": 8.895377128953771e-06, "loss": 0.5829, "step": 28767 }, { "epoch": 0.8399170827129135, "grad_norm": 0.5338333991629179, "learning_rate": 8.893755068937551e-06, "loss": 0.6187, "step": 28768 }, { "epoch": 0.8399462789407609, "grad_norm": 0.5889692377563829, "learning_rate": 8.89213300892133e-06, "loss": 0.6758, "step": 28769 }, { "epoch": 0.8399754751686083, "grad_norm": 0.5297825079852424, "learning_rate": 8.89051094890511e-06, "loss": 0.5428, "step": 28770 }, { "epoch": 0.8400046713964556, "grad_norm": 0.551464719571485, "learning_rate": 8.88888888888889e-06, "loss": 0.6145, "step": 28771 }, { "epoch": 0.840033867624303, "grad_norm": 0.5510450613756713, "learning_rate": 8.887266828872668e-06, "loss": 0.6448, "step": 28772 }, { "epoch": 0.8400630638521503, "grad_norm": 0.4975613186122962, "learning_rate": 8.885644768856448e-06, "loss": 0.4927, "step": 28773 }, { "epoch": 0.8400922600799977, "grad_norm": 0.5185256989259222, "learning_rate": 8.884022708840227e-06, "loss": 0.593, "step": 28774 }, { "epoch": 0.8401214563078451, "grad_norm": 0.5682534420225842, "learning_rate": 8.882400648824007e-06, "loss": 0.5563, "step": 28775 }, { "epoch": 0.8401506525356924, "grad_norm": 0.556185518212021, "learning_rate": 8.880778588807787e-06, "loss": 0.6873, "step": 28776 }, { "epoch": 0.8401798487635398, "grad_norm": 0.5280872002006792, "learning_rate": 8.879156528791565e-06, "loss": 0.5644, "step": 28777 }, { "epoch": 0.8402090449913872, "grad_norm": 0.5301436951460266, "learning_rate": 8.877534468775345e-06, "loss": 0.5963, "step": 28778 }, { "epoch": 0.8402382412192345, "grad_norm": 0.5452313062119246, "learning_rate": 8.875912408759124e-06, "loss": 0.6091, "step": 28779 }, { "epoch": 0.8402674374470819, "grad_norm": 0.5952993768619258, "learning_rate": 8.874290348742904e-06, "loss": 0.754, "step": 28780 }, { "epoch": 0.8402966336749292, "grad_norm": 0.5214824248883124, "learning_rate": 8.872668288726684e-06, "loss": 0.5504, "step": 28781 }, { "epoch": 0.8403258299027766, "grad_norm": 0.4961398185211435, "learning_rate": 8.871046228710462e-06, "loss": 0.5366, "step": 28782 }, { "epoch": 0.840355026130624, "grad_norm": 0.527910887082911, "learning_rate": 8.869424168694242e-06, "loss": 0.5809, "step": 28783 }, { "epoch": 0.8403842223584713, "grad_norm": 0.5194185134610217, "learning_rate": 8.867802108678022e-06, "loss": 0.5657, "step": 28784 }, { "epoch": 0.8404134185863187, "grad_norm": 0.49476905194936377, "learning_rate": 8.8661800486618e-06, "loss": 0.5062, "step": 28785 }, { "epoch": 0.840442614814166, "grad_norm": 0.5125703950847543, "learning_rate": 8.864557988645581e-06, "loss": 0.5352, "step": 28786 }, { "epoch": 0.8404718110420134, "grad_norm": 0.5037973767853973, "learning_rate": 8.86293592862936e-06, "loss": 0.5558, "step": 28787 }, { "epoch": 0.8405010072698608, "grad_norm": 0.5033569125353218, "learning_rate": 8.86131386861314e-06, "loss": 0.5492, "step": 28788 }, { "epoch": 0.8405302034977081, "grad_norm": 0.5635874896825551, "learning_rate": 8.85969180859692e-06, "loss": 0.6171, "step": 28789 }, { "epoch": 0.8405593997255555, "grad_norm": 0.513583934809134, "learning_rate": 8.858069748580698e-06, "loss": 0.5897, "step": 28790 }, { "epoch": 0.8405885959534029, "grad_norm": 0.5181762582679883, "learning_rate": 8.856447688564478e-06, "loss": 0.592, "step": 28791 }, { "epoch": 0.8406177921812502, "grad_norm": 0.5709490618810327, "learning_rate": 8.854825628548256e-06, "loss": 0.6004, "step": 28792 }, { "epoch": 0.8406469884090976, "grad_norm": 0.5112423275389884, "learning_rate": 8.853203568532036e-06, "loss": 0.5655, "step": 28793 }, { "epoch": 0.8406761846369449, "grad_norm": 0.6251072320290696, "learning_rate": 8.851581508515816e-06, "loss": 0.6992, "step": 28794 }, { "epoch": 0.8407053808647923, "grad_norm": 0.5219031074548443, "learning_rate": 8.849959448499595e-06, "loss": 0.6127, "step": 28795 }, { "epoch": 0.8407345770926397, "grad_norm": 0.5262046001378634, "learning_rate": 8.848337388483375e-06, "loss": 0.5843, "step": 28796 }, { "epoch": 0.840763773320487, "grad_norm": 0.48994219893105717, "learning_rate": 8.846715328467153e-06, "loss": 0.5306, "step": 28797 }, { "epoch": 0.8407929695483344, "grad_norm": 0.5371422414310475, "learning_rate": 8.845093268450933e-06, "loss": 0.5976, "step": 28798 }, { "epoch": 0.8408221657761817, "grad_norm": 0.5113250311834361, "learning_rate": 8.843471208434713e-06, "loss": 0.5358, "step": 28799 }, { "epoch": 0.8408513620040291, "grad_norm": 0.5817674274465272, "learning_rate": 8.841849148418492e-06, "loss": 0.6872, "step": 28800 }, { "epoch": 0.8408805582318765, "grad_norm": 0.4990049804248058, "learning_rate": 8.840227088402272e-06, "loss": 0.5604, "step": 28801 }, { "epoch": 0.8409097544597238, "grad_norm": 0.5201368867692346, "learning_rate": 8.83860502838605e-06, "loss": 0.5739, "step": 28802 }, { "epoch": 0.8409389506875712, "grad_norm": 0.5242917770132829, "learning_rate": 8.83698296836983e-06, "loss": 0.5794, "step": 28803 }, { "epoch": 0.8409681469154185, "grad_norm": 0.4844073351824226, "learning_rate": 8.83536090835361e-06, "loss": 0.4947, "step": 28804 }, { "epoch": 0.8409973431432659, "grad_norm": 0.5291981677198766, "learning_rate": 8.833738848337389e-06, "loss": 0.6123, "step": 28805 }, { "epoch": 0.8410265393711133, "grad_norm": 0.5341044854431464, "learning_rate": 8.832116788321169e-06, "loss": 0.572, "step": 28806 }, { "epoch": 0.8410557355989606, "grad_norm": 0.525662468461954, "learning_rate": 8.830494728304947e-06, "loss": 0.5829, "step": 28807 }, { "epoch": 0.841084931826808, "grad_norm": 0.500596614780388, "learning_rate": 8.828872668288727e-06, "loss": 0.5171, "step": 28808 }, { "epoch": 0.8411141280546554, "grad_norm": 0.5528186227976818, "learning_rate": 8.827250608272507e-06, "loss": 0.6107, "step": 28809 }, { "epoch": 0.8411433242825027, "grad_norm": 0.49434754898702116, "learning_rate": 8.825628548256286e-06, "loss": 0.5149, "step": 28810 }, { "epoch": 0.8411725205103501, "grad_norm": 0.5147132494287726, "learning_rate": 8.824006488240066e-06, "loss": 0.5624, "step": 28811 }, { "epoch": 0.8412017167381974, "grad_norm": 0.5287023626158088, "learning_rate": 8.822384428223844e-06, "loss": 0.6118, "step": 28812 }, { "epoch": 0.8412309129660448, "grad_norm": 0.5262503852127073, "learning_rate": 8.820762368207624e-06, "loss": 0.5837, "step": 28813 }, { "epoch": 0.8412601091938922, "grad_norm": 0.5516499140457151, "learning_rate": 8.819140308191404e-06, "loss": 0.6381, "step": 28814 }, { "epoch": 0.8412893054217395, "grad_norm": 0.5151121234455757, "learning_rate": 8.817518248175182e-06, "loss": 0.5577, "step": 28815 }, { "epoch": 0.8413185016495869, "grad_norm": 0.5272441842501756, "learning_rate": 8.815896188158963e-06, "loss": 0.5976, "step": 28816 }, { "epoch": 0.8413476978774342, "grad_norm": 0.5516773126203177, "learning_rate": 8.814274128142743e-06, "loss": 0.5666, "step": 28817 }, { "epoch": 0.8413768941052816, "grad_norm": 0.5215863222184205, "learning_rate": 8.812652068126521e-06, "loss": 0.582, "step": 28818 }, { "epoch": 0.841406090333129, "grad_norm": 0.4894663285498765, "learning_rate": 8.811030008110301e-06, "loss": 0.5156, "step": 28819 }, { "epoch": 0.8414352865609763, "grad_norm": 0.5494859601659479, "learning_rate": 8.80940794809408e-06, "loss": 0.637, "step": 28820 }, { "epoch": 0.8414644827888237, "grad_norm": 0.5315310886537908, "learning_rate": 8.80778588807786e-06, "loss": 0.5752, "step": 28821 }, { "epoch": 0.841493679016671, "grad_norm": 0.5027793118114317, "learning_rate": 8.80616382806164e-06, "loss": 0.5157, "step": 28822 }, { "epoch": 0.8415228752445184, "grad_norm": 0.5011524307459432, "learning_rate": 8.804541768045418e-06, "loss": 0.5096, "step": 28823 }, { "epoch": 0.8415520714723658, "grad_norm": 0.5199168239364779, "learning_rate": 8.802919708029198e-06, "loss": 0.5701, "step": 28824 }, { "epoch": 0.8415812677002131, "grad_norm": 0.5423510918513005, "learning_rate": 8.801297648012976e-06, "loss": 0.6079, "step": 28825 }, { "epoch": 0.8416104639280605, "grad_norm": 0.4970384420589598, "learning_rate": 8.799675587996756e-06, "loss": 0.5585, "step": 28826 }, { "epoch": 0.8416396601559079, "grad_norm": 0.5224688119367906, "learning_rate": 8.798053527980537e-06, "loss": 0.5557, "step": 28827 }, { "epoch": 0.8416688563837552, "grad_norm": 0.5303603256075295, "learning_rate": 8.796431467964315e-06, "loss": 0.5613, "step": 28828 }, { "epoch": 0.8416980526116026, "grad_norm": 0.5332057098105489, "learning_rate": 8.794809407948095e-06, "loss": 0.6487, "step": 28829 }, { "epoch": 0.8417272488394499, "grad_norm": 0.5059705815328478, "learning_rate": 8.793187347931873e-06, "loss": 0.5846, "step": 28830 }, { "epoch": 0.8417564450672973, "grad_norm": 0.5863202376883541, "learning_rate": 8.791565287915653e-06, "loss": 0.5746, "step": 28831 }, { "epoch": 0.8417856412951447, "grad_norm": 0.5159227857040755, "learning_rate": 8.789943227899433e-06, "loss": 0.5423, "step": 28832 }, { "epoch": 0.841814837522992, "grad_norm": 0.5639326404410363, "learning_rate": 8.788321167883212e-06, "loss": 0.6585, "step": 28833 }, { "epoch": 0.8418440337508394, "grad_norm": 0.578847661527273, "learning_rate": 8.786699107866992e-06, "loss": 0.6668, "step": 28834 }, { "epoch": 0.8418732299786867, "grad_norm": 0.513483497833911, "learning_rate": 8.78507704785077e-06, "loss": 0.5714, "step": 28835 }, { "epoch": 0.8419024262065341, "grad_norm": 0.5146394517189952, "learning_rate": 8.78345498783455e-06, "loss": 0.5694, "step": 28836 }, { "epoch": 0.8419316224343815, "grad_norm": 0.5596250576516767, "learning_rate": 8.78183292781833e-06, "loss": 0.6939, "step": 28837 }, { "epoch": 0.8419608186622288, "grad_norm": 0.51579398009903, "learning_rate": 8.780210867802109e-06, "loss": 0.577, "step": 28838 }, { "epoch": 0.8419900148900762, "grad_norm": 0.5080190460500539, "learning_rate": 8.778588807785889e-06, "loss": 0.5659, "step": 28839 }, { "epoch": 0.8420192111179235, "grad_norm": 0.5493753983542505, "learning_rate": 8.776966747769667e-06, "loss": 0.6166, "step": 28840 }, { "epoch": 0.8420484073457709, "grad_norm": 0.5209015738347731, "learning_rate": 8.775344687753447e-06, "loss": 0.5973, "step": 28841 }, { "epoch": 0.8420776035736183, "grad_norm": 0.5151841588787367, "learning_rate": 8.773722627737227e-06, "loss": 0.5426, "step": 28842 }, { "epoch": 0.8421067998014656, "grad_norm": 0.5197722854232542, "learning_rate": 8.772100567721006e-06, "loss": 0.5243, "step": 28843 }, { "epoch": 0.842135996029313, "grad_norm": 0.5111493955230022, "learning_rate": 8.770478507704786e-06, "loss": 0.5535, "step": 28844 }, { "epoch": 0.8421651922571604, "grad_norm": 0.5498343933371147, "learning_rate": 8.768856447688564e-06, "loss": 0.6499, "step": 28845 }, { "epoch": 0.8421943884850077, "grad_norm": 0.5721751047191971, "learning_rate": 8.767234387672344e-06, "loss": 0.6259, "step": 28846 }, { "epoch": 0.8422235847128551, "grad_norm": 0.5404935001222531, "learning_rate": 8.765612327656124e-06, "loss": 0.6053, "step": 28847 }, { "epoch": 0.8422527809407024, "grad_norm": 0.4991565555677477, "learning_rate": 8.763990267639903e-06, "loss": 0.5514, "step": 28848 }, { "epoch": 0.8422819771685498, "grad_norm": 0.5334722326661937, "learning_rate": 8.762368207623683e-06, "loss": 0.5659, "step": 28849 }, { "epoch": 0.8423111733963972, "grad_norm": 0.5126152316608518, "learning_rate": 8.760746147607461e-06, "loss": 0.5563, "step": 28850 }, { "epoch": 0.8423403696242445, "grad_norm": 0.5692381680011165, "learning_rate": 8.759124087591241e-06, "loss": 0.6951, "step": 28851 }, { "epoch": 0.8423695658520919, "grad_norm": 0.5441039127453063, "learning_rate": 8.757502027575021e-06, "loss": 0.6054, "step": 28852 }, { "epoch": 0.8423987620799392, "grad_norm": 0.5257467295702927, "learning_rate": 8.7558799675588e-06, "loss": 0.5995, "step": 28853 }, { "epoch": 0.8424279583077866, "grad_norm": 0.48707500224070804, "learning_rate": 8.75425790754258e-06, "loss": 0.5004, "step": 28854 }, { "epoch": 0.842457154535634, "grad_norm": 0.5496087031745611, "learning_rate": 8.75263584752636e-06, "loss": 0.6237, "step": 28855 }, { "epoch": 0.8424863507634813, "grad_norm": 0.5262324080831237, "learning_rate": 8.751013787510138e-06, "loss": 0.5951, "step": 28856 }, { "epoch": 0.8425155469913287, "grad_norm": 0.5630823893130656, "learning_rate": 8.749391727493918e-06, "loss": 0.6532, "step": 28857 }, { "epoch": 0.842544743219176, "grad_norm": 0.49412508423649426, "learning_rate": 8.747769667477697e-06, "loss": 0.5375, "step": 28858 }, { "epoch": 0.8425739394470234, "grad_norm": 0.5765979253529881, "learning_rate": 8.746147607461477e-06, "loss": 0.6246, "step": 28859 }, { "epoch": 0.8426031356748708, "grad_norm": 0.48188381280605036, "learning_rate": 8.744525547445257e-06, "loss": 0.4926, "step": 28860 }, { "epoch": 0.8426323319027181, "grad_norm": 0.5188401808250935, "learning_rate": 8.742903487429035e-06, "loss": 0.5533, "step": 28861 }, { "epoch": 0.8426615281305655, "grad_norm": 0.510945736676649, "learning_rate": 8.741281427412815e-06, "loss": 0.5927, "step": 28862 }, { "epoch": 0.8426907243584129, "grad_norm": 0.5389409086728014, "learning_rate": 8.739659367396594e-06, "loss": 0.5756, "step": 28863 }, { "epoch": 0.8427199205862602, "grad_norm": 0.5649978855073717, "learning_rate": 8.738037307380374e-06, "loss": 0.6407, "step": 28864 }, { "epoch": 0.8427491168141076, "grad_norm": 0.5362575833808465, "learning_rate": 8.736415247364154e-06, "loss": 0.5711, "step": 28865 }, { "epoch": 0.8427783130419549, "grad_norm": 0.5205329749826598, "learning_rate": 8.734793187347932e-06, "loss": 0.5524, "step": 28866 }, { "epoch": 0.8428075092698023, "grad_norm": 0.5424004783008312, "learning_rate": 8.733171127331712e-06, "loss": 0.6106, "step": 28867 }, { "epoch": 0.8428367054976497, "grad_norm": 0.5782051573801144, "learning_rate": 8.73154906731549e-06, "loss": 0.6634, "step": 28868 }, { "epoch": 0.842865901725497, "grad_norm": 0.49157651857380025, "learning_rate": 8.72992700729927e-06, "loss": 0.5121, "step": 28869 }, { "epoch": 0.8428950979533444, "grad_norm": 0.5115385879526592, "learning_rate": 8.72830494728305e-06, "loss": 0.5648, "step": 28870 }, { "epoch": 0.8429242941811917, "grad_norm": 0.5156789762045175, "learning_rate": 8.726682887266829e-06, "loss": 0.5824, "step": 28871 }, { "epoch": 0.8429534904090391, "grad_norm": 0.5053921602147078, "learning_rate": 8.725060827250609e-06, "loss": 0.5348, "step": 28872 }, { "epoch": 0.8429826866368865, "grad_norm": 0.5146953286883937, "learning_rate": 8.723438767234387e-06, "loss": 0.5599, "step": 28873 }, { "epoch": 0.8430118828647338, "grad_norm": 0.5224110477418675, "learning_rate": 8.721816707218168e-06, "loss": 0.5524, "step": 28874 }, { "epoch": 0.8430410790925812, "grad_norm": 0.5155831710875106, "learning_rate": 8.720194647201948e-06, "loss": 0.5466, "step": 28875 }, { "epoch": 0.8430702753204286, "grad_norm": 0.5099282432846693, "learning_rate": 8.718572587185726e-06, "loss": 0.5652, "step": 28876 }, { "epoch": 0.8430994715482759, "grad_norm": 0.5115550712902273, "learning_rate": 8.716950527169506e-06, "loss": 0.5309, "step": 28877 }, { "epoch": 0.8431286677761233, "grad_norm": 0.4775626786671054, "learning_rate": 8.715328467153284e-06, "loss": 0.4987, "step": 28878 }, { "epoch": 0.8431578640039706, "grad_norm": 0.5271120323650191, "learning_rate": 8.713706407137064e-06, "loss": 0.5858, "step": 28879 }, { "epoch": 0.843187060231818, "grad_norm": 0.5356727362548329, "learning_rate": 8.712084347120845e-06, "loss": 0.6226, "step": 28880 }, { "epoch": 0.8432162564596654, "grad_norm": 0.5081114790135755, "learning_rate": 8.710462287104623e-06, "loss": 0.5446, "step": 28881 }, { "epoch": 0.8432454526875127, "grad_norm": 0.5393764954883739, "learning_rate": 8.708840227088403e-06, "loss": 0.5754, "step": 28882 }, { "epoch": 0.8432746489153601, "grad_norm": 0.5427835124956939, "learning_rate": 8.707218167072181e-06, "loss": 0.6007, "step": 28883 }, { "epoch": 0.8433038451432074, "grad_norm": 0.5317621551715607, "learning_rate": 8.705596107055961e-06, "loss": 0.6211, "step": 28884 }, { "epoch": 0.8433330413710549, "grad_norm": 0.5288771483203498, "learning_rate": 8.703974047039742e-06, "loss": 0.5699, "step": 28885 }, { "epoch": 0.8433622375989023, "grad_norm": 0.5300489838128191, "learning_rate": 8.70235198702352e-06, "loss": 0.574, "step": 28886 }, { "epoch": 0.8433914338267496, "grad_norm": 0.5235922951160723, "learning_rate": 8.7007299270073e-06, "loss": 0.5963, "step": 28887 }, { "epoch": 0.843420630054597, "grad_norm": 0.5604469479603353, "learning_rate": 8.69910786699108e-06, "loss": 0.6274, "step": 28888 }, { "epoch": 0.8434498262824444, "grad_norm": 0.5381179065673746, "learning_rate": 8.697485806974858e-06, "loss": 0.622, "step": 28889 }, { "epoch": 0.8434790225102917, "grad_norm": 0.5302727962384429, "learning_rate": 8.695863746958638e-06, "loss": 0.6036, "step": 28890 }, { "epoch": 0.8435082187381391, "grad_norm": 0.5049908227567601, "learning_rate": 8.694241686942417e-06, "loss": 0.5454, "step": 28891 }, { "epoch": 0.8435374149659864, "grad_norm": 0.5184047278710765, "learning_rate": 8.692619626926197e-06, "loss": 0.6089, "step": 28892 }, { "epoch": 0.8435666111938338, "grad_norm": 0.5359636374804952, "learning_rate": 8.690997566909977e-06, "loss": 0.5925, "step": 28893 }, { "epoch": 0.8435958074216812, "grad_norm": 0.5167262199429459, "learning_rate": 8.689375506893755e-06, "loss": 0.5711, "step": 28894 }, { "epoch": 0.8436250036495285, "grad_norm": 0.5443863634347399, "learning_rate": 8.687753446877535e-06, "loss": 0.6127, "step": 28895 }, { "epoch": 0.8436541998773759, "grad_norm": 0.5307549914299156, "learning_rate": 8.686131386861314e-06, "loss": 0.6103, "step": 28896 }, { "epoch": 0.8436833961052232, "grad_norm": 0.5141553136808251, "learning_rate": 8.684509326845094e-06, "loss": 0.5558, "step": 28897 }, { "epoch": 0.8437125923330706, "grad_norm": 0.5108564328819944, "learning_rate": 8.682887266828874e-06, "loss": 0.5457, "step": 28898 }, { "epoch": 0.843741788560918, "grad_norm": 0.5058991550307026, "learning_rate": 8.681265206812652e-06, "loss": 0.5261, "step": 28899 }, { "epoch": 0.8437709847887653, "grad_norm": 0.5417973566628904, "learning_rate": 8.679643146796432e-06, "loss": 0.6553, "step": 28900 }, { "epoch": 0.8438001810166127, "grad_norm": 0.5132296506786092, "learning_rate": 8.67802108678021e-06, "loss": 0.5945, "step": 28901 }, { "epoch": 0.84382937724446, "grad_norm": 0.4940067233540309, "learning_rate": 8.67639902676399e-06, "loss": 0.5218, "step": 28902 }, { "epoch": 0.8438585734723074, "grad_norm": 0.525779530567418, "learning_rate": 8.674776966747771e-06, "loss": 0.5571, "step": 28903 }, { "epoch": 0.8438877697001548, "grad_norm": 0.5429360005786683, "learning_rate": 8.67315490673155e-06, "loss": 0.623, "step": 28904 }, { "epoch": 0.8439169659280021, "grad_norm": 0.4896378106808448, "learning_rate": 8.67153284671533e-06, "loss": 0.5261, "step": 28905 }, { "epoch": 0.8439461621558495, "grad_norm": 0.5042925579288421, "learning_rate": 8.669910786699108e-06, "loss": 0.5427, "step": 28906 }, { "epoch": 0.8439753583836969, "grad_norm": 0.5113301644774617, "learning_rate": 8.668288726682888e-06, "loss": 0.541, "step": 28907 }, { "epoch": 0.8440045546115442, "grad_norm": 0.49713698671986994, "learning_rate": 8.666666666666668e-06, "loss": 0.5549, "step": 28908 }, { "epoch": 0.8440337508393916, "grad_norm": 0.5674360212575645, "learning_rate": 8.665044606650446e-06, "loss": 0.6577, "step": 28909 }, { "epoch": 0.844062947067239, "grad_norm": 0.5035427796004569, "learning_rate": 8.663422546634226e-06, "loss": 0.5597, "step": 28910 }, { "epoch": 0.8440921432950863, "grad_norm": 0.5027491663103824, "learning_rate": 8.661800486618005e-06, "loss": 0.5339, "step": 28911 }, { "epoch": 0.8441213395229337, "grad_norm": 0.5598442727831932, "learning_rate": 8.660178426601785e-06, "loss": 0.628, "step": 28912 }, { "epoch": 0.844150535750781, "grad_norm": 0.545642934050522, "learning_rate": 8.658556366585565e-06, "loss": 0.6174, "step": 28913 }, { "epoch": 0.8441797319786284, "grad_norm": 0.509115580755401, "learning_rate": 8.656934306569343e-06, "loss": 0.5711, "step": 28914 }, { "epoch": 0.8442089282064758, "grad_norm": 0.5425943380308368, "learning_rate": 8.655312246553123e-06, "loss": 0.6019, "step": 28915 }, { "epoch": 0.8442381244343231, "grad_norm": 0.539360959282786, "learning_rate": 8.653690186536902e-06, "loss": 0.5895, "step": 28916 }, { "epoch": 0.8442673206621705, "grad_norm": 0.5362045600396229, "learning_rate": 8.652068126520682e-06, "loss": 0.5798, "step": 28917 }, { "epoch": 0.8442965168900178, "grad_norm": 0.528946211460507, "learning_rate": 8.650446066504462e-06, "loss": 0.5793, "step": 28918 }, { "epoch": 0.8443257131178652, "grad_norm": 0.5475024850337672, "learning_rate": 8.64882400648824e-06, "loss": 0.5973, "step": 28919 }, { "epoch": 0.8443549093457126, "grad_norm": 0.5216654265834828, "learning_rate": 8.64720194647202e-06, "loss": 0.6102, "step": 28920 }, { "epoch": 0.8443841055735599, "grad_norm": 0.5487958809203343, "learning_rate": 8.6455798864558e-06, "loss": 0.6347, "step": 28921 }, { "epoch": 0.8444133018014073, "grad_norm": 0.5704906121167518, "learning_rate": 8.643957826439579e-06, "loss": 0.6659, "step": 28922 }, { "epoch": 0.8444424980292546, "grad_norm": 0.5133450867744399, "learning_rate": 8.642335766423359e-06, "loss": 0.5498, "step": 28923 }, { "epoch": 0.844471694257102, "grad_norm": 0.5465841837074948, "learning_rate": 8.640713706407137e-06, "loss": 0.6591, "step": 28924 }, { "epoch": 0.8445008904849494, "grad_norm": 0.5082139550807484, "learning_rate": 8.639091646390917e-06, "loss": 0.5538, "step": 28925 }, { "epoch": 0.8445300867127967, "grad_norm": 0.5156448155528623, "learning_rate": 8.637469586374697e-06, "loss": 0.5582, "step": 28926 }, { "epoch": 0.8445592829406441, "grad_norm": 0.5198214230338292, "learning_rate": 8.635847526358476e-06, "loss": 0.5673, "step": 28927 }, { "epoch": 0.8445884791684914, "grad_norm": 0.5372049368279556, "learning_rate": 8.634225466342256e-06, "loss": 0.5942, "step": 28928 }, { "epoch": 0.8446176753963388, "grad_norm": 0.5144355163637943, "learning_rate": 8.632603406326034e-06, "loss": 0.5717, "step": 28929 }, { "epoch": 0.8446468716241862, "grad_norm": 0.5185792288635446, "learning_rate": 8.630981346309814e-06, "loss": 0.5657, "step": 28930 }, { "epoch": 0.8446760678520335, "grad_norm": 0.5139674454132438, "learning_rate": 8.629359286293594e-06, "loss": 0.5541, "step": 28931 }, { "epoch": 0.8447052640798809, "grad_norm": 0.5407729152290868, "learning_rate": 8.627737226277373e-06, "loss": 0.6272, "step": 28932 }, { "epoch": 0.8447344603077283, "grad_norm": 0.5227007535621565, "learning_rate": 8.626115166261153e-06, "loss": 0.5543, "step": 28933 }, { "epoch": 0.8447636565355756, "grad_norm": 0.5360253204830884, "learning_rate": 8.624493106244931e-06, "loss": 0.6383, "step": 28934 }, { "epoch": 0.844792852763423, "grad_norm": 0.5316021699116091, "learning_rate": 8.622871046228711e-06, "loss": 0.5574, "step": 28935 }, { "epoch": 0.8448220489912703, "grad_norm": 0.5383534422389198, "learning_rate": 8.621248986212491e-06, "loss": 0.6164, "step": 28936 }, { "epoch": 0.8448512452191177, "grad_norm": 0.5386822316864092, "learning_rate": 8.61962692619627e-06, "loss": 0.6316, "step": 28937 }, { "epoch": 0.8448804414469651, "grad_norm": 0.5303805463386602, "learning_rate": 8.61800486618005e-06, "loss": 0.576, "step": 28938 }, { "epoch": 0.8449096376748124, "grad_norm": 0.5282822553386458, "learning_rate": 8.616382806163828e-06, "loss": 0.5588, "step": 28939 }, { "epoch": 0.8449388339026598, "grad_norm": 0.5179209395225833, "learning_rate": 8.614760746147608e-06, "loss": 0.5786, "step": 28940 }, { "epoch": 0.8449680301305071, "grad_norm": 0.5182867556878636, "learning_rate": 8.613138686131388e-06, "loss": 0.5452, "step": 28941 }, { "epoch": 0.8449972263583545, "grad_norm": 0.5350145350329333, "learning_rate": 8.611516626115166e-06, "loss": 0.6027, "step": 28942 }, { "epoch": 0.8450264225862019, "grad_norm": 0.5443589428892178, "learning_rate": 8.609894566098946e-06, "loss": 0.5979, "step": 28943 }, { "epoch": 0.8450556188140492, "grad_norm": 0.5269273917148577, "learning_rate": 8.608272506082725e-06, "loss": 0.5725, "step": 28944 }, { "epoch": 0.8450848150418966, "grad_norm": 0.4850463203428554, "learning_rate": 8.606650446066505e-06, "loss": 0.5043, "step": 28945 }, { "epoch": 0.845114011269744, "grad_norm": 0.5550829859609396, "learning_rate": 8.605028386050285e-06, "loss": 0.6292, "step": 28946 }, { "epoch": 0.8451432074975913, "grad_norm": 0.5150475455182679, "learning_rate": 8.603406326034063e-06, "loss": 0.5664, "step": 28947 }, { "epoch": 0.8451724037254387, "grad_norm": 0.5570086507973299, "learning_rate": 8.601784266017843e-06, "loss": 0.6153, "step": 28948 }, { "epoch": 0.845201599953286, "grad_norm": 0.5335070195214433, "learning_rate": 8.600162206001622e-06, "loss": 0.5798, "step": 28949 }, { "epoch": 0.8452307961811334, "grad_norm": 0.5221577264354348, "learning_rate": 8.598540145985402e-06, "loss": 0.541, "step": 28950 }, { "epoch": 0.8452599924089808, "grad_norm": 0.5443005648765094, "learning_rate": 8.596918085969182e-06, "loss": 0.5918, "step": 28951 }, { "epoch": 0.8452891886368281, "grad_norm": 0.5548104182967752, "learning_rate": 8.59529602595296e-06, "loss": 0.6286, "step": 28952 }, { "epoch": 0.8453183848646755, "grad_norm": 0.568021774066626, "learning_rate": 8.59367396593674e-06, "loss": 0.6511, "step": 28953 }, { "epoch": 0.8453475810925228, "grad_norm": 0.5527713948566935, "learning_rate": 8.59205190592052e-06, "loss": 0.6053, "step": 28954 }, { "epoch": 0.8453767773203702, "grad_norm": 0.5047286803292788, "learning_rate": 8.590429845904299e-06, "loss": 0.5314, "step": 28955 }, { "epoch": 0.8454059735482176, "grad_norm": 0.5083197417444948, "learning_rate": 8.588807785888079e-06, "loss": 0.5806, "step": 28956 }, { "epoch": 0.8454351697760649, "grad_norm": 0.5415805702977271, "learning_rate": 8.587185725871857e-06, "loss": 0.6161, "step": 28957 }, { "epoch": 0.8454643660039123, "grad_norm": 0.5213070151459084, "learning_rate": 8.585563665855637e-06, "loss": 0.5707, "step": 28958 }, { "epoch": 0.8454935622317596, "grad_norm": 0.5461711882300362, "learning_rate": 8.583941605839417e-06, "loss": 0.6426, "step": 28959 }, { "epoch": 0.845522758459607, "grad_norm": 0.5047953299099204, "learning_rate": 8.582319545823196e-06, "loss": 0.5414, "step": 28960 }, { "epoch": 0.8455519546874544, "grad_norm": 0.5951613431379205, "learning_rate": 8.580697485806976e-06, "loss": 0.6246, "step": 28961 }, { "epoch": 0.8455811509153017, "grad_norm": 0.4817554803149423, "learning_rate": 8.579075425790754e-06, "loss": 0.5292, "step": 28962 }, { "epoch": 0.8456103471431491, "grad_norm": 0.5099286064268033, "learning_rate": 8.577453365774534e-06, "loss": 0.5158, "step": 28963 }, { "epoch": 0.8456395433709964, "grad_norm": 0.5138056765310707, "learning_rate": 8.575831305758314e-06, "loss": 0.5708, "step": 28964 }, { "epoch": 0.8456687395988438, "grad_norm": 0.5801050957747806, "learning_rate": 8.574209245742093e-06, "loss": 0.6755, "step": 28965 }, { "epoch": 0.8456979358266912, "grad_norm": 0.5089968856901274, "learning_rate": 8.572587185725873e-06, "loss": 0.5408, "step": 28966 }, { "epoch": 0.8457271320545385, "grad_norm": 0.4879534708579352, "learning_rate": 8.570965125709651e-06, "loss": 0.5162, "step": 28967 }, { "epoch": 0.8457563282823859, "grad_norm": 0.5569930883813433, "learning_rate": 8.569343065693431e-06, "loss": 0.633, "step": 28968 }, { "epoch": 0.8457855245102333, "grad_norm": 0.5004283968519222, "learning_rate": 8.567721005677211e-06, "loss": 0.5246, "step": 28969 }, { "epoch": 0.8458147207380806, "grad_norm": 0.539344592252948, "learning_rate": 8.56609894566099e-06, "loss": 0.6453, "step": 28970 }, { "epoch": 0.845843916965928, "grad_norm": 0.5311049058388289, "learning_rate": 8.56447688564477e-06, "loss": 0.6231, "step": 28971 }, { "epoch": 0.8458731131937753, "grad_norm": 0.524027937595334, "learning_rate": 8.562854825628548e-06, "loss": 0.6233, "step": 28972 }, { "epoch": 0.8459023094216227, "grad_norm": 0.4837876775885241, "learning_rate": 8.561232765612328e-06, "loss": 0.4879, "step": 28973 }, { "epoch": 0.8459315056494701, "grad_norm": 0.49380591612537433, "learning_rate": 8.559610705596108e-06, "loss": 0.5263, "step": 28974 }, { "epoch": 0.8459607018773174, "grad_norm": 0.5319145191289752, "learning_rate": 8.557988645579887e-06, "loss": 0.5978, "step": 28975 }, { "epoch": 0.8459898981051648, "grad_norm": 0.5000496247959868, "learning_rate": 8.556366585563667e-06, "loss": 0.546, "step": 28976 }, { "epoch": 0.8460190943330121, "grad_norm": 0.5350421006948153, "learning_rate": 8.554744525547445e-06, "loss": 0.5816, "step": 28977 }, { "epoch": 0.8460482905608595, "grad_norm": 0.5432585794162488, "learning_rate": 8.553122465531225e-06, "loss": 0.5736, "step": 28978 }, { "epoch": 0.8460774867887069, "grad_norm": 0.5488131401415691, "learning_rate": 8.551500405515005e-06, "loss": 0.6529, "step": 28979 }, { "epoch": 0.8461066830165542, "grad_norm": 0.5155792064380921, "learning_rate": 8.549878345498784e-06, "loss": 0.537, "step": 28980 }, { "epoch": 0.8461358792444016, "grad_norm": 0.5486704161181226, "learning_rate": 8.548256285482564e-06, "loss": 0.6116, "step": 28981 }, { "epoch": 0.846165075472249, "grad_norm": 0.5309613159948943, "learning_rate": 8.546634225466342e-06, "loss": 0.5818, "step": 28982 }, { "epoch": 0.8461942717000963, "grad_norm": 0.5533737246520507, "learning_rate": 8.545012165450122e-06, "loss": 0.606, "step": 28983 }, { "epoch": 0.8462234679279437, "grad_norm": 0.5677634995074022, "learning_rate": 8.543390105433902e-06, "loss": 0.6972, "step": 28984 }, { "epoch": 0.846252664155791, "grad_norm": 0.4804348151956316, "learning_rate": 8.54176804541768e-06, "loss": 0.4759, "step": 28985 }, { "epoch": 0.8462818603836384, "grad_norm": 0.5562148603189099, "learning_rate": 8.54014598540146e-06, "loss": 0.666, "step": 28986 }, { "epoch": 0.8463110566114858, "grad_norm": 0.546256793338012, "learning_rate": 8.53852392538524e-06, "loss": 0.6407, "step": 28987 }, { "epoch": 0.8463402528393331, "grad_norm": 0.542115597586147, "learning_rate": 8.536901865369019e-06, "loss": 0.6199, "step": 28988 }, { "epoch": 0.8463694490671805, "grad_norm": 0.5193546893504667, "learning_rate": 8.535279805352799e-06, "loss": 0.5857, "step": 28989 }, { "epoch": 0.8463986452950278, "grad_norm": 0.5582339549983142, "learning_rate": 8.533657745336577e-06, "loss": 0.6222, "step": 28990 }, { "epoch": 0.8464278415228752, "grad_norm": 0.5371605379267137, "learning_rate": 8.532035685320358e-06, "loss": 0.6015, "step": 28991 }, { "epoch": 0.8464570377507226, "grad_norm": 0.4823005929811827, "learning_rate": 8.530413625304138e-06, "loss": 0.4975, "step": 28992 }, { "epoch": 0.8464862339785699, "grad_norm": 0.5718135206242998, "learning_rate": 8.528791565287916e-06, "loss": 0.6818, "step": 28993 }, { "epoch": 0.8465154302064173, "grad_norm": 0.5435479981795054, "learning_rate": 8.527169505271696e-06, "loss": 0.6327, "step": 28994 }, { "epoch": 0.8465446264342646, "grad_norm": 0.5125298908258745, "learning_rate": 8.525547445255474e-06, "loss": 0.5565, "step": 28995 }, { "epoch": 0.846573822662112, "grad_norm": 0.4956176390357449, "learning_rate": 8.523925385239253e-06, "loss": 0.5151, "step": 28996 }, { "epoch": 0.8466030188899594, "grad_norm": 0.5075130816983483, "learning_rate": 8.522303325223035e-06, "loss": 0.5442, "step": 28997 }, { "epoch": 0.8466322151178067, "grad_norm": 0.5361451071067167, "learning_rate": 8.520681265206813e-06, "loss": 0.6171, "step": 28998 }, { "epoch": 0.8466614113456541, "grad_norm": 0.523806620475117, "learning_rate": 8.519059205190593e-06, "loss": 0.5565, "step": 28999 }, { "epoch": 0.8466906075735015, "grad_norm": 0.5104397399859041, "learning_rate": 8.517437145174371e-06, "loss": 0.5588, "step": 29000 }, { "epoch": 0.8467198038013488, "grad_norm": 0.4986769692250843, "learning_rate": 8.51581508515815e-06, "loss": 0.499, "step": 29001 }, { "epoch": 0.8467490000291962, "grad_norm": 0.5030016073397411, "learning_rate": 8.514193025141932e-06, "loss": 0.5723, "step": 29002 }, { "epoch": 0.8467781962570435, "grad_norm": 0.505690254290596, "learning_rate": 8.51257096512571e-06, "loss": 0.5714, "step": 29003 }, { "epoch": 0.8468073924848909, "grad_norm": 0.5577823721738117, "learning_rate": 8.51094890510949e-06, "loss": 0.6284, "step": 29004 }, { "epoch": 0.8468365887127384, "grad_norm": 0.5363462085606763, "learning_rate": 8.509326845093268e-06, "loss": 0.6264, "step": 29005 }, { "epoch": 0.8468657849405857, "grad_norm": 0.5498201055508705, "learning_rate": 8.507704785077048e-06, "loss": 0.6217, "step": 29006 }, { "epoch": 0.8468949811684331, "grad_norm": 0.5258141604590737, "learning_rate": 8.506082725060828e-06, "loss": 0.5536, "step": 29007 }, { "epoch": 0.8469241773962805, "grad_norm": 0.48484405403108255, "learning_rate": 8.504460665044607e-06, "loss": 0.5123, "step": 29008 }, { "epoch": 0.8469533736241278, "grad_norm": 0.5312268736075387, "learning_rate": 8.502838605028387e-06, "loss": 0.6007, "step": 29009 }, { "epoch": 0.8469825698519752, "grad_norm": 0.5329931011629787, "learning_rate": 8.501216545012165e-06, "loss": 0.5917, "step": 29010 }, { "epoch": 0.8470117660798225, "grad_norm": 0.5398331374895871, "learning_rate": 8.499594484995945e-06, "loss": 0.6027, "step": 29011 }, { "epoch": 0.8470409623076699, "grad_norm": 0.5301358779214745, "learning_rate": 8.497972424979725e-06, "loss": 0.5731, "step": 29012 }, { "epoch": 0.8470701585355173, "grad_norm": 0.5355181344264852, "learning_rate": 8.496350364963504e-06, "loss": 0.6197, "step": 29013 }, { "epoch": 0.8470993547633646, "grad_norm": 0.5226376219031694, "learning_rate": 8.494728304947284e-06, "loss": 0.5837, "step": 29014 }, { "epoch": 0.847128550991212, "grad_norm": 0.5430902538088587, "learning_rate": 8.493106244931062e-06, "loss": 0.5855, "step": 29015 }, { "epoch": 0.8471577472190593, "grad_norm": 0.48586311871787513, "learning_rate": 8.491484184914842e-06, "loss": 0.5041, "step": 29016 }, { "epoch": 0.8471869434469067, "grad_norm": 0.510792864239652, "learning_rate": 8.489862124898622e-06, "loss": 0.5197, "step": 29017 }, { "epoch": 0.8472161396747541, "grad_norm": 0.5391408003447282, "learning_rate": 8.4882400648824e-06, "loss": 0.6246, "step": 29018 }, { "epoch": 0.8472453359026014, "grad_norm": 0.5194969850971336, "learning_rate": 8.48661800486618e-06, "loss": 0.5876, "step": 29019 }, { "epoch": 0.8472745321304488, "grad_norm": 0.4941854861846985, "learning_rate": 8.484995944849961e-06, "loss": 0.5219, "step": 29020 }, { "epoch": 0.8473037283582961, "grad_norm": 0.507243708540379, "learning_rate": 8.48337388483374e-06, "loss": 0.6108, "step": 29021 }, { "epoch": 0.8473329245861435, "grad_norm": 0.5414374601046191, "learning_rate": 8.48175182481752e-06, "loss": 0.6438, "step": 29022 }, { "epoch": 0.8473621208139909, "grad_norm": 0.4916113737785474, "learning_rate": 8.480129764801298e-06, "loss": 0.5159, "step": 29023 }, { "epoch": 0.8473913170418382, "grad_norm": 0.510984526254183, "learning_rate": 8.478507704785078e-06, "loss": 0.5218, "step": 29024 }, { "epoch": 0.8474205132696856, "grad_norm": 0.5207207060216533, "learning_rate": 8.476885644768858e-06, "loss": 0.5577, "step": 29025 }, { "epoch": 0.847449709497533, "grad_norm": 0.5105288367723878, "learning_rate": 8.475263584752636e-06, "loss": 0.5265, "step": 29026 }, { "epoch": 0.8474789057253803, "grad_norm": 0.5304370619991512, "learning_rate": 8.473641524736416e-06, "loss": 0.6025, "step": 29027 }, { "epoch": 0.8475081019532277, "grad_norm": 0.5137205529345719, "learning_rate": 8.472019464720195e-06, "loss": 0.5623, "step": 29028 }, { "epoch": 0.847537298181075, "grad_norm": 0.5184580861747793, "learning_rate": 8.470397404703973e-06, "loss": 0.5501, "step": 29029 }, { "epoch": 0.8475664944089224, "grad_norm": 0.522045908676526, "learning_rate": 8.468775344687755e-06, "loss": 0.6176, "step": 29030 }, { "epoch": 0.8475956906367698, "grad_norm": 0.47660113252658254, "learning_rate": 8.467153284671533e-06, "loss": 0.4866, "step": 29031 }, { "epoch": 0.8476248868646171, "grad_norm": 0.5515140971528741, "learning_rate": 8.465531224655313e-06, "loss": 0.6215, "step": 29032 }, { "epoch": 0.8476540830924645, "grad_norm": 0.5198412666169776, "learning_rate": 8.463909164639092e-06, "loss": 0.5638, "step": 29033 }, { "epoch": 0.8476832793203118, "grad_norm": 0.5107371806033558, "learning_rate": 8.46228710462287e-06, "loss": 0.5626, "step": 29034 }, { "epoch": 0.8477124755481592, "grad_norm": 0.5217903724576399, "learning_rate": 8.460665044606652e-06, "loss": 0.5312, "step": 29035 }, { "epoch": 0.8477416717760066, "grad_norm": 0.5220170760151917, "learning_rate": 8.45904298459043e-06, "loss": 0.5834, "step": 29036 }, { "epoch": 0.8477708680038539, "grad_norm": 0.5716995142443133, "learning_rate": 8.45742092457421e-06, "loss": 0.6593, "step": 29037 }, { "epoch": 0.8478000642317013, "grad_norm": 0.52368512464658, "learning_rate": 8.455798864557989e-06, "loss": 0.592, "step": 29038 }, { "epoch": 0.8478292604595487, "grad_norm": 0.49791251136877857, "learning_rate": 8.454176804541769e-06, "loss": 0.5317, "step": 29039 }, { "epoch": 0.847858456687396, "grad_norm": 0.5321915808188306, "learning_rate": 8.452554744525549e-06, "loss": 0.6092, "step": 29040 }, { "epoch": 0.8478876529152434, "grad_norm": 0.5092682936994397, "learning_rate": 8.450932684509327e-06, "loss": 0.5128, "step": 29041 }, { "epoch": 0.8479168491430907, "grad_norm": 0.5487476498222651, "learning_rate": 8.449310624493107e-06, "loss": 0.6002, "step": 29042 }, { "epoch": 0.8479460453709381, "grad_norm": 0.5487966604348757, "learning_rate": 8.447688564476886e-06, "loss": 0.6596, "step": 29043 }, { "epoch": 0.8479752415987855, "grad_norm": 0.5042945498353634, "learning_rate": 8.446066504460666e-06, "loss": 0.5432, "step": 29044 }, { "epoch": 0.8480044378266328, "grad_norm": 0.5954075626798334, "learning_rate": 8.444444444444446e-06, "loss": 0.7641, "step": 29045 }, { "epoch": 0.8480336340544802, "grad_norm": 0.5272418261729144, "learning_rate": 8.442822384428224e-06, "loss": 0.5813, "step": 29046 }, { "epoch": 0.8480628302823275, "grad_norm": 0.5416190366228598, "learning_rate": 8.441200324412004e-06, "loss": 0.619, "step": 29047 }, { "epoch": 0.8480920265101749, "grad_norm": 0.4775776523871783, "learning_rate": 8.439578264395782e-06, "loss": 0.4764, "step": 29048 }, { "epoch": 0.8481212227380223, "grad_norm": 0.5195966090805635, "learning_rate": 8.437956204379563e-06, "loss": 0.5835, "step": 29049 }, { "epoch": 0.8481504189658696, "grad_norm": 0.5421632812278366, "learning_rate": 8.436334144363343e-06, "loss": 0.5772, "step": 29050 }, { "epoch": 0.848179615193717, "grad_norm": 0.5235749902556749, "learning_rate": 8.434712084347121e-06, "loss": 0.5171, "step": 29051 }, { "epoch": 0.8482088114215643, "grad_norm": 0.5223961268277573, "learning_rate": 8.433090024330901e-06, "loss": 0.5628, "step": 29052 }, { "epoch": 0.8482380076494117, "grad_norm": 0.5223781864769541, "learning_rate": 8.431467964314681e-06, "loss": 0.5836, "step": 29053 }, { "epoch": 0.8482672038772591, "grad_norm": 0.5257392754658927, "learning_rate": 8.42984590429846e-06, "loss": 0.5553, "step": 29054 }, { "epoch": 0.8482964001051064, "grad_norm": 0.5405454344840536, "learning_rate": 8.42822384428224e-06, "loss": 0.5871, "step": 29055 }, { "epoch": 0.8483255963329538, "grad_norm": 0.5208416172157244, "learning_rate": 8.426601784266018e-06, "loss": 0.5619, "step": 29056 }, { "epoch": 0.8483547925608012, "grad_norm": 0.4942835444604159, "learning_rate": 8.424979724249796e-06, "loss": 0.5213, "step": 29057 }, { "epoch": 0.8483839887886485, "grad_norm": 0.5008488255684983, "learning_rate": 8.423357664233578e-06, "loss": 0.5724, "step": 29058 }, { "epoch": 0.8484131850164959, "grad_norm": 0.5465093224110367, "learning_rate": 8.421735604217356e-06, "loss": 0.6135, "step": 29059 }, { "epoch": 0.8484423812443432, "grad_norm": 0.5173520368262253, "learning_rate": 8.420113544201137e-06, "loss": 0.5523, "step": 29060 }, { "epoch": 0.8484715774721906, "grad_norm": 0.5410226117582774, "learning_rate": 8.418491484184915e-06, "loss": 0.6139, "step": 29061 }, { "epoch": 0.848500773700038, "grad_norm": 0.5344789774319433, "learning_rate": 8.416869424168693e-06, "loss": 0.5862, "step": 29062 }, { "epoch": 0.8485299699278853, "grad_norm": 0.5474723451432486, "learning_rate": 8.415247364152475e-06, "loss": 0.6032, "step": 29063 }, { "epoch": 0.8485591661557327, "grad_norm": 0.5383517556081195, "learning_rate": 8.413625304136253e-06, "loss": 0.5885, "step": 29064 }, { "epoch": 0.84858836238358, "grad_norm": 0.508434410571704, "learning_rate": 8.412003244120033e-06, "loss": 0.5862, "step": 29065 }, { "epoch": 0.8486175586114274, "grad_norm": 0.529071139201235, "learning_rate": 8.410381184103812e-06, "loss": 0.5876, "step": 29066 }, { "epoch": 0.8486467548392748, "grad_norm": 0.5531251231389195, "learning_rate": 8.40875912408759e-06, "loss": 0.5817, "step": 29067 }, { "epoch": 0.8486759510671221, "grad_norm": 0.5155291879521885, "learning_rate": 8.407137064071372e-06, "loss": 0.5517, "step": 29068 }, { "epoch": 0.8487051472949695, "grad_norm": 0.49077763940277136, "learning_rate": 8.40551500405515e-06, "loss": 0.5273, "step": 29069 }, { "epoch": 0.8487343435228168, "grad_norm": 0.543301289993694, "learning_rate": 8.40389294403893e-06, "loss": 0.6354, "step": 29070 }, { "epoch": 0.8487635397506642, "grad_norm": 0.5038649127013108, "learning_rate": 8.402270884022709e-06, "loss": 0.5212, "step": 29071 }, { "epoch": 0.8487927359785116, "grad_norm": 0.5662036123989924, "learning_rate": 8.400648824006489e-06, "loss": 0.6536, "step": 29072 }, { "epoch": 0.8488219322063589, "grad_norm": 0.488506412761828, "learning_rate": 8.399026763990269e-06, "loss": 0.4706, "step": 29073 }, { "epoch": 0.8488511284342063, "grad_norm": 0.5307558517379645, "learning_rate": 8.397404703974047e-06, "loss": 0.583, "step": 29074 }, { "epoch": 0.8488803246620537, "grad_norm": 0.5291588551321242, "learning_rate": 8.395782643957827e-06, "loss": 0.6043, "step": 29075 }, { "epoch": 0.848909520889901, "grad_norm": 0.5523541925270085, "learning_rate": 8.394160583941606e-06, "loss": 0.6312, "step": 29076 }, { "epoch": 0.8489387171177484, "grad_norm": 0.6003080244084927, "learning_rate": 8.392538523925386e-06, "loss": 0.6356, "step": 29077 }, { "epoch": 0.8489679133455957, "grad_norm": 0.5356358870389991, "learning_rate": 8.390916463909166e-06, "loss": 0.5969, "step": 29078 }, { "epoch": 0.8489971095734431, "grad_norm": 0.5209595704255309, "learning_rate": 8.389294403892944e-06, "loss": 0.5795, "step": 29079 }, { "epoch": 0.8490263058012905, "grad_norm": 0.5553318011306071, "learning_rate": 8.387672343876724e-06, "loss": 0.6302, "step": 29080 }, { "epoch": 0.8490555020291378, "grad_norm": 0.5180423905662478, "learning_rate": 8.386050283860503e-06, "loss": 0.5513, "step": 29081 }, { "epoch": 0.8490846982569852, "grad_norm": 0.5973539560494957, "learning_rate": 8.384428223844283e-06, "loss": 0.5489, "step": 29082 }, { "epoch": 0.8491138944848325, "grad_norm": 0.5376698363380169, "learning_rate": 8.382806163828063e-06, "loss": 0.6179, "step": 29083 }, { "epoch": 0.8491430907126799, "grad_norm": 0.5214275717816418, "learning_rate": 8.381184103811841e-06, "loss": 0.5852, "step": 29084 }, { "epoch": 0.8491722869405273, "grad_norm": 0.5415654686007737, "learning_rate": 8.379562043795621e-06, "loss": 0.6021, "step": 29085 }, { "epoch": 0.8492014831683746, "grad_norm": 0.49756308129072974, "learning_rate": 8.377939983779401e-06, "loss": 0.5092, "step": 29086 }, { "epoch": 0.849230679396222, "grad_norm": 0.4730418948508353, "learning_rate": 8.37631792376318e-06, "loss": 0.4419, "step": 29087 }, { "epoch": 0.8492598756240693, "grad_norm": 0.5111512525695793, "learning_rate": 8.37469586374696e-06, "loss": 0.5487, "step": 29088 }, { "epoch": 0.8492890718519167, "grad_norm": 0.5394593570834456, "learning_rate": 8.373073803730738e-06, "loss": 0.635, "step": 29089 }, { "epoch": 0.8493182680797641, "grad_norm": 0.5267387362324616, "learning_rate": 8.371451743714517e-06, "loss": 0.5612, "step": 29090 }, { "epoch": 0.8493474643076114, "grad_norm": 0.5171790580643097, "learning_rate": 8.369829683698298e-06, "loss": 0.5819, "step": 29091 }, { "epoch": 0.8493766605354588, "grad_norm": 0.5206127229106863, "learning_rate": 8.368207623682077e-06, "loss": 0.5126, "step": 29092 }, { "epoch": 0.8494058567633062, "grad_norm": 0.5498345811473941, "learning_rate": 8.366585563665857e-06, "loss": 0.5908, "step": 29093 }, { "epoch": 0.8494350529911535, "grad_norm": 0.5659003999781387, "learning_rate": 8.364963503649635e-06, "loss": 0.6421, "step": 29094 }, { "epoch": 0.8494642492190009, "grad_norm": 0.5635255553153299, "learning_rate": 8.363341443633413e-06, "loss": 0.654, "step": 29095 }, { "epoch": 0.8494934454468482, "grad_norm": 0.5469874468962322, "learning_rate": 8.361719383617195e-06, "loss": 0.5922, "step": 29096 }, { "epoch": 0.8495226416746956, "grad_norm": 0.5955665139151792, "learning_rate": 8.360097323600974e-06, "loss": 0.7405, "step": 29097 }, { "epoch": 0.849551837902543, "grad_norm": 0.5465818642263811, "learning_rate": 8.358475263584754e-06, "loss": 0.609, "step": 29098 }, { "epoch": 0.8495810341303903, "grad_norm": 0.5331063426970052, "learning_rate": 8.356853203568532e-06, "loss": 0.5609, "step": 29099 }, { "epoch": 0.8496102303582377, "grad_norm": 0.5220746218612191, "learning_rate": 8.35523114355231e-06, "loss": 0.5558, "step": 29100 }, { "epoch": 0.849639426586085, "grad_norm": 0.5019660551668677, "learning_rate": 8.353609083536092e-06, "loss": 0.5452, "step": 29101 }, { "epoch": 0.8496686228139324, "grad_norm": 0.5193907010738549, "learning_rate": 8.35198702351987e-06, "loss": 0.5673, "step": 29102 }, { "epoch": 0.8496978190417798, "grad_norm": 0.5585392675687254, "learning_rate": 8.35036496350365e-06, "loss": 0.6485, "step": 29103 }, { "epoch": 0.8497270152696271, "grad_norm": 0.4859948215316212, "learning_rate": 8.348742903487429e-06, "loss": 0.494, "step": 29104 }, { "epoch": 0.8497562114974745, "grad_norm": 0.5629948976772432, "learning_rate": 8.347120843471209e-06, "loss": 0.6298, "step": 29105 }, { "epoch": 0.8497854077253219, "grad_norm": 0.5687187261213319, "learning_rate": 8.345498783454989e-06, "loss": 0.6778, "step": 29106 }, { "epoch": 0.8498146039531692, "grad_norm": 0.5433764778666672, "learning_rate": 8.343876723438768e-06, "loss": 0.6112, "step": 29107 }, { "epoch": 0.8498438001810166, "grad_norm": 0.5815889157225955, "learning_rate": 8.342254663422548e-06, "loss": 0.6505, "step": 29108 }, { "epoch": 0.8498729964088639, "grad_norm": 0.5621924164736779, "learning_rate": 8.340632603406326e-06, "loss": 0.6871, "step": 29109 }, { "epoch": 0.8499021926367113, "grad_norm": 0.5169297934242908, "learning_rate": 8.339010543390106e-06, "loss": 0.5432, "step": 29110 }, { "epoch": 0.8499313888645587, "grad_norm": 0.5366301089708329, "learning_rate": 8.337388483373886e-06, "loss": 0.5668, "step": 29111 }, { "epoch": 0.849960585092406, "grad_norm": 0.5134236456841981, "learning_rate": 8.335766423357664e-06, "loss": 0.608, "step": 29112 }, { "epoch": 0.8499897813202534, "grad_norm": 0.5595110693671344, "learning_rate": 8.334144363341445e-06, "loss": 0.6408, "step": 29113 }, { "epoch": 0.8500189775481007, "grad_norm": 0.4923036254841105, "learning_rate": 8.332522303325223e-06, "loss": 0.5309, "step": 29114 }, { "epoch": 0.8500481737759481, "grad_norm": 0.5441188178610868, "learning_rate": 8.330900243309003e-06, "loss": 0.6103, "step": 29115 }, { "epoch": 0.8500773700037955, "grad_norm": 0.5094300987725981, "learning_rate": 8.329278183292783e-06, "loss": 0.5669, "step": 29116 }, { "epoch": 0.8501065662316428, "grad_norm": 0.5218453843234656, "learning_rate": 8.327656123276561e-06, "loss": 0.6032, "step": 29117 }, { "epoch": 0.8501357624594902, "grad_norm": 0.5151099288053322, "learning_rate": 8.32603406326034e-06, "loss": 0.566, "step": 29118 }, { "epoch": 0.8501649586873375, "grad_norm": 0.5244307333543644, "learning_rate": 8.32441200324412e-06, "loss": 0.5584, "step": 29119 }, { "epoch": 0.8501941549151849, "grad_norm": 0.5577533786212929, "learning_rate": 8.3227899432279e-06, "loss": 0.6218, "step": 29120 }, { "epoch": 0.8502233511430323, "grad_norm": 0.49124559968818676, "learning_rate": 8.32116788321168e-06, "loss": 0.5019, "step": 29121 }, { "epoch": 0.8502525473708796, "grad_norm": 0.48703390612464836, "learning_rate": 8.319545823195458e-06, "loss": 0.5146, "step": 29122 }, { "epoch": 0.850281743598727, "grad_norm": 0.523045429342428, "learning_rate": 8.317923763179237e-06, "loss": 0.5658, "step": 29123 }, { "epoch": 0.8503109398265744, "grad_norm": 0.5385512498675404, "learning_rate": 8.316301703163019e-06, "loss": 0.5904, "step": 29124 }, { "epoch": 0.8503401360544217, "grad_norm": 0.5470838693122178, "learning_rate": 8.314679643146797e-06, "loss": 0.6184, "step": 29125 }, { "epoch": 0.8503693322822692, "grad_norm": 0.5584313022529216, "learning_rate": 8.313057583130577e-06, "loss": 0.6448, "step": 29126 }, { "epoch": 0.8503985285101165, "grad_norm": 0.5291912580658086, "learning_rate": 8.311435523114355e-06, "loss": 0.5978, "step": 29127 }, { "epoch": 0.8504277247379639, "grad_norm": 0.5147726788356015, "learning_rate": 8.309813463098134e-06, "loss": 0.5352, "step": 29128 }, { "epoch": 0.8504569209658113, "grad_norm": 0.5335083056307768, "learning_rate": 8.308191403081915e-06, "loss": 0.5933, "step": 29129 }, { "epoch": 0.8504861171936586, "grad_norm": 0.5636245526678918, "learning_rate": 8.306569343065694e-06, "loss": 0.6087, "step": 29130 }, { "epoch": 0.850515313421506, "grad_norm": 0.5346876827477544, "learning_rate": 8.304947283049474e-06, "loss": 0.5864, "step": 29131 }, { "epoch": 0.8505445096493534, "grad_norm": 0.49610938010653866, "learning_rate": 8.303325223033252e-06, "loss": 0.5129, "step": 29132 }, { "epoch": 0.8505737058772007, "grad_norm": 0.5486292125293313, "learning_rate": 8.30170316301703e-06, "loss": 0.6288, "step": 29133 }, { "epoch": 0.8506029021050481, "grad_norm": 0.5335587058847009, "learning_rate": 8.300081103000812e-06, "loss": 0.6213, "step": 29134 }, { "epoch": 0.8506320983328954, "grad_norm": 0.5439792292194952, "learning_rate": 8.29845904298459e-06, "loss": 0.602, "step": 29135 }, { "epoch": 0.8506612945607428, "grad_norm": 0.5001031988165713, "learning_rate": 8.296836982968371e-06, "loss": 0.5207, "step": 29136 }, { "epoch": 0.8506904907885902, "grad_norm": 0.5269939876606565, "learning_rate": 8.29521492295215e-06, "loss": 0.5834, "step": 29137 }, { "epoch": 0.8507196870164375, "grad_norm": 0.5452762335703526, "learning_rate": 8.29359286293593e-06, "loss": 0.6072, "step": 29138 }, { "epoch": 0.8507488832442849, "grad_norm": 0.5030820577478048, "learning_rate": 8.29197080291971e-06, "loss": 0.5225, "step": 29139 }, { "epoch": 0.8507780794721322, "grad_norm": 0.4765477528779391, "learning_rate": 8.290348742903488e-06, "loss": 0.5107, "step": 29140 }, { "epoch": 0.8508072756999796, "grad_norm": 0.5377992469810308, "learning_rate": 8.288726682887268e-06, "loss": 0.5894, "step": 29141 }, { "epoch": 0.850836471927827, "grad_norm": 0.5416395375757385, "learning_rate": 8.287104622871046e-06, "loss": 0.63, "step": 29142 }, { "epoch": 0.8508656681556743, "grad_norm": 0.48109007707391, "learning_rate": 8.285482562854826e-06, "loss": 0.5097, "step": 29143 }, { "epoch": 0.8508948643835217, "grad_norm": 0.5349241461457263, "learning_rate": 8.283860502838606e-06, "loss": 0.6157, "step": 29144 }, { "epoch": 0.850924060611369, "grad_norm": 0.48304407597580645, "learning_rate": 8.282238442822385e-06, "loss": 0.5081, "step": 29145 }, { "epoch": 0.8509532568392164, "grad_norm": 0.5226601315107111, "learning_rate": 8.280616382806163e-06, "loss": 0.6129, "step": 29146 }, { "epoch": 0.8509824530670638, "grad_norm": 0.5297132221807055, "learning_rate": 8.278994322789943e-06, "loss": 0.598, "step": 29147 }, { "epoch": 0.8510116492949111, "grad_norm": 0.5718974693149482, "learning_rate": 8.277372262773723e-06, "loss": 0.6826, "step": 29148 }, { "epoch": 0.8510408455227585, "grad_norm": 0.5426210359412897, "learning_rate": 8.275750202757503e-06, "loss": 0.5618, "step": 29149 }, { "epoch": 0.8510700417506059, "grad_norm": 0.5511511254596672, "learning_rate": 8.274128142741282e-06, "loss": 0.6687, "step": 29150 }, { "epoch": 0.8510992379784532, "grad_norm": 0.5579647244707083, "learning_rate": 8.27250608272506e-06, "loss": 0.5978, "step": 29151 }, { "epoch": 0.8511284342063006, "grad_norm": 0.5224151023734896, "learning_rate": 8.27088402270884e-06, "loss": 0.5873, "step": 29152 }, { "epoch": 0.8511576304341479, "grad_norm": 0.5244859732724465, "learning_rate": 8.26926196269262e-06, "loss": 0.6162, "step": 29153 }, { "epoch": 0.8511868266619953, "grad_norm": 0.5381213203659027, "learning_rate": 8.2676399026764e-06, "loss": 0.634, "step": 29154 }, { "epoch": 0.8512160228898427, "grad_norm": 0.5533238979335651, "learning_rate": 8.266017842660179e-06, "loss": 0.6102, "step": 29155 }, { "epoch": 0.85124521911769, "grad_norm": 0.4985524177289594, "learning_rate": 8.264395782643957e-06, "loss": 0.5401, "step": 29156 }, { "epoch": 0.8512744153455374, "grad_norm": 0.5312252783650977, "learning_rate": 8.262773722627739e-06, "loss": 0.5921, "step": 29157 }, { "epoch": 0.8513036115733847, "grad_norm": 0.5101541239441524, "learning_rate": 8.261151662611517e-06, "loss": 0.5499, "step": 29158 }, { "epoch": 0.8513328078012321, "grad_norm": 0.519295951167044, "learning_rate": 8.259529602595297e-06, "loss": 0.5796, "step": 29159 }, { "epoch": 0.8513620040290795, "grad_norm": 0.4908053307003871, "learning_rate": 8.257907542579076e-06, "loss": 0.5222, "step": 29160 }, { "epoch": 0.8513912002569268, "grad_norm": 0.5182096376057149, "learning_rate": 8.256285482562854e-06, "loss": 0.5319, "step": 29161 }, { "epoch": 0.8514203964847742, "grad_norm": 0.530985575860017, "learning_rate": 8.254663422546636e-06, "loss": 0.5959, "step": 29162 }, { "epoch": 0.8514495927126216, "grad_norm": 0.5488758932979849, "learning_rate": 8.253041362530414e-06, "loss": 0.6496, "step": 29163 }, { "epoch": 0.8514787889404689, "grad_norm": 0.5138641193510035, "learning_rate": 8.251419302514194e-06, "loss": 0.5457, "step": 29164 }, { "epoch": 0.8515079851683163, "grad_norm": 0.5258221667416956, "learning_rate": 8.249797242497973e-06, "loss": 0.5675, "step": 29165 }, { "epoch": 0.8515371813961636, "grad_norm": 0.5095994928164783, "learning_rate": 8.248175182481751e-06, "loss": 0.5673, "step": 29166 }, { "epoch": 0.851566377624011, "grad_norm": 0.5202032255648391, "learning_rate": 8.246553122465533e-06, "loss": 0.5432, "step": 29167 }, { "epoch": 0.8515955738518584, "grad_norm": 0.5629808074559286, "learning_rate": 8.244931062449311e-06, "loss": 0.6563, "step": 29168 }, { "epoch": 0.8516247700797057, "grad_norm": 0.49346163280063987, "learning_rate": 8.243309002433091e-06, "loss": 0.5175, "step": 29169 }, { "epoch": 0.8516539663075531, "grad_norm": 0.4941307909466497, "learning_rate": 8.24168694241687e-06, "loss": 0.532, "step": 29170 }, { "epoch": 0.8516831625354004, "grad_norm": 0.5004470685143776, "learning_rate": 8.24006488240065e-06, "loss": 0.5406, "step": 29171 }, { "epoch": 0.8517123587632478, "grad_norm": 0.5167535870589599, "learning_rate": 8.23844282238443e-06, "loss": 0.524, "step": 29172 }, { "epoch": 0.8517415549910952, "grad_norm": 0.5380257727732538, "learning_rate": 8.236820762368208e-06, "loss": 0.6057, "step": 29173 }, { "epoch": 0.8517707512189425, "grad_norm": 0.5405026624469975, "learning_rate": 8.235198702351988e-06, "loss": 0.6341, "step": 29174 }, { "epoch": 0.8517999474467899, "grad_norm": 0.4977953852197929, "learning_rate": 8.233576642335766e-06, "loss": 0.5364, "step": 29175 }, { "epoch": 0.8518291436746372, "grad_norm": 0.5374380472770046, "learning_rate": 8.231954582319546e-06, "loss": 0.6032, "step": 29176 }, { "epoch": 0.8518583399024846, "grad_norm": 0.531066183460382, "learning_rate": 8.230332522303327e-06, "loss": 0.5905, "step": 29177 }, { "epoch": 0.851887536130332, "grad_norm": 0.4990361783607252, "learning_rate": 8.228710462287105e-06, "loss": 0.5654, "step": 29178 }, { "epoch": 0.8519167323581793, "grad_norm": 0.5127264976312765, "learning_rate": 8.227088402270883e-06, "loss": 0.5416, "step": 29179 }, { "epoch": 0.8519459285860267, "grad_norm": 0.5353751510831831, "learning_rate": 8.225466342254663e-06, "loss": 0.5879, "step": 29180 }, { "epoch": 0.851975124813874, "grad_norm": 0.5183565718759456, "learning_rate": 8.223844282238443e-06, "loss": 0.5982, "step": 29181 }, { "epoch": 0.8520043210417214, "grad_norm": 0.5340364470175352, "learning_rate": 8.222222222222223e-06, "loss": 0.6185, "step": 29182 }, { "epoch": 0.8520335172695688, "grad_norm": 0.4912161069834116, "learning_rate": 8.220600162206002e-06, "loss": 0.4698, "step": 29183 }, { "epoch": 0.8520627134974161, "grad_norm": 0.5347087089469894, "learning_rate": 8.21897810218978e-06, "loss": 0.5938, "step": 29184 }, { "epoch": 0.8520919097252635, "grad_norm": 0.5064439573434059, "learning_rate": 8.21735604217356e-06, "loss": 0.5631, "step": 29185 }, { "epoch": 0.8521211059531109, "grad_norm": 0.5381828627973214, "learning_rate": 8.21573398215734e-06, "loss": 0.5669, "step": 29186 }, { "epoch": 0.8521503021809582, "grad_norm": 0.5296783577707006, "learning_rate": 8.21411192214112e-06, "loss": 0.555, "step": 29187 }, { "epoch": 0.8521794984088056, "grad_norm": 0.5069580159175668, "learning_rate": 8.212489862124899e-06, "loss": 0.5444, "step": 29188 }, { "epoch": 0.8522086946366529, "grad_norm": 0.5392609771191927, "learning_rate": 8.210867802108677e-06, "loss": 0.6163, "step": 29189 }, { "epoch": 0.8522378908645003, "grad_norm": 0.5222430605616585, "learning_rate": 8.209245742092459e-06, "loss": 0.5779, "step": 29190 }, { "epoch": 0.8522670870923477, "grad_norm": 0.5131501254429172, "learning_rate": 8.207623682076237e-06, "loss": 0.5496, "step": 29191 }, { "epoch": 0.852296283320195, "grad_norm": 0.5313442231454607, "learning_rate": 8.206001622060017e-06, "loss": 0.53, "step": 29192 }, { "epoch": 0.8523254795480424, "grad_norm": 0.5387966951802264, "learning_rate": 8.204379562043796e-06, "loss": 0.6241, "step": 29193 }, { "epoch": 0.8523546757758897, "grad_norm": 0.5581099424549489, "learning_rate": 8.202757502027574e-06, "loss": 0.6426, "step": 29194 }, { "epoch": 0.8523838720037371, "grad_norm": 0.5137632892807901, "learning_rate": 8.201135442011356e-06, "loss": 0.5136, "step": 29195 }, { "epoch": 0.8524130682315845, "grad_norm": 0.5479503870918379, "learning_rate": 8.199513381995134e-06, "loss": 0.6111, "step": 29196 }, { "epoch": 0.8524422644594318, "grad_norm": 0.5045563713596655, "learning_rate": 8.197891321978914e-06, "loss": 0.5697, "step": 29197 }, { "epoch": 0.8524714606872792, "grad_norm": 0.5456832153460627, "learning_rate": 8.196269261962693e-06, "loss": 0.6074, "step": 29198 }, { "epoch": 0.8525006569151266, "grad_norm": 0.5326592201879248, "learning_rate": 8.194647201946471e-06, "loss": 0.5224, "step": 29199 }, { "epoch": 0.8525298531429739, "grad_norm": 0.5025003081834404, "learning_rate": 8.193025141930253e-06, "loss": 0.5381, "step": 29200 }, { "epoch": 0.8525590493708213, "grad_norm": 0.5305478733529588, "learning_rate": 8.191403081914031e-06, "loss": 0.604, "step": 29201 }, { "epoch": 0.8525882455986686, "grad_norm": 0.5775813234613282, "learning_rate": 8.189781021897811e-06, "loss": 0.6494, "step": 29202 }, { "epoch": 0.852617441826516, "grad_norm": 0.5290359563122489, "learning_rate": 8.18815896188159e-06, "loss": 0.6152, "step": 29203 }, { "epoch": 0.8526466380543634, "grad_norm": 0.5259709704984156, "learning_rate": 8.18653690186537e-06, "loss": 0.5884, "step": 29204 }, { "epoch": 0.8526758342822107, "grad_norm": 0.5060422453736538, "learning_rate": 8.18491484184915e-06, "loss": 0.5418, "step": 29205 }, { "epoch": 0.8527050305100581, "grad_norm": 0.5487597167614274, "learning_rate": 8.183292781832928e-06, "loss": 0.6405, "step": 29206 }, { "epoch": 0.8527342267379054, "grad_norm": 0.5063641248039663, "learning_rate": 8.181670721816707e-06, "loss": 0.5337, "step": 29207 }, { "epoch": 0.8527634229657528, "grad_norm": 0.47126770437489013, "learning_rate": 8.180048661800487e-06, "loss": 0.5032, "step": 29208 }, { "epoch": 0.8527926191936002, "grad_norm": 0.5323253904807613, "learning_rate": 8.178426601784267e-06, "loss": 0.6181, "step": 29209 }, { "epoch": 0.8528218154214475, "grad_norm": 0.5219741223183167, "learning_rate": 8.176804541768047e-06, "loss": 0.5652, "step": 29210 }, { "epoch": 0.8528510116492949, "grad_norm": 0.49569036502653535, "learning_rate": 8.175182481751825e-06, "loss": 0.5111, "step": 29211 }, { "epoch": 0.8528802078771422, "grad_norm": 0.4766777295544378, "learning_rate": 8.173560421735604e-06, "loss": 0.5068, "step": 29212 }, { "epoch": 0.8529094041049896, "grad_norm": 0.4919041113251105, "learning_rate": 8.171938361719384e-06, "loss": 0.5252, "step": 29213 }, { "epoch": 0.852938600332837, "grad_norm": 0.4928366383921205, "learning_rate": 8.170316301703164e-06, "loss": 0.5112, "step": 29214 }, { "epoch": 0.8529677965606843, "grad_norm": 0.5187415067146252, "learning_rate": 8.168694241686944e-06, "loss": 0.5856, "step": 29215 }, { "epoch": 0.8529969927885317, "grad_norm": 0.5032453464426457, "learning_rate": 8.167072181670722e-06, "loss": 0.5659, "step": 29216 }, { "epoch": 0.853026189016379, "grad_norm": 0.6778065115574954, "learning_rate": 8.1654501216545e-06, "loss": 0.6245, "step": 29217 }, { "epoch": 0.8530553852442264, "grad_norm": 0.517520271163179, "learning_rate": 8.16382806163828e-06, "loss": 0.5199, "step": 29218 }, { "epoch": 0.8530845814720738, "grad_norm": 0.5442583445414233, "learning_rate": 8.16220600162206e-06, "loss": 0.6162, "step": 29219 }, { "epoch": 0.8531137776999211, "grad_norm": 0.5231721409564167, "learning_rate": 8.16058394160584e-06, "loss": 0.6, "step": 29220 }, { "epoch": 0.8531429739277685, "grad_norm": 0.5343314230585092, "learning_rate": 8.158961881589619e-06, "loss": 0.5656, "step": 29221 }, { "epoch": 0.8531721701556159, "grad_norm": 0.5041703798647528, "learning_rate": 8.157339821573397e-06, "loss": 0.5404, "step": 29222 }, { "epoch": 0.8532013663834632, "grad_norm": 0.532949072243118, "learning_rate": 8.15571776155718e-06, "loss": 0.6182, "step": 29223 }, { "epoch": 0.8532305626113106, "grad_norm": 0.5202791958806113, "learning_rate": 8.154095701540958e-06, "loss": 0.5526, "step": 29224 }, { "epoch": 0.853259758839158, "grad_norm": 0.5192059448239003, "learning_rate": 8.152473641524738e-06, "loss": 0.5547, "step": 29225 }, { "epoch": 0.8532889550670053, "grad_norm": 0.5226715197913508, "learning_rate": 8.150851581508516e-06, "loss": 0.5868, "step": 29226 }, { "epoch": 0.8533181512948527, "grad_norm": 0.5856377473890867, "learning_rate": 8.149229521492294e-06, "loss": 0.7069, "step": 29227 }, { "epoch": 0.8533473475227, "grad_norm": 0.535363973014285, "learning_rate": 8.147607461476076e-06, "loss": 0.5984, "step": 29228 }, { "epoch": 0.8533765437505474, "grad_norm": 0.5239206554365934, "learning_rate": 8.145985401459855e-06, "loss": 0.6039, "step": 29229 }, { "epoch": 0.8534057399783948, "grad_norm": 0.6186224468392809, "learning_rate": 8.144363341443635e-06, "loss": 0.5294, "step": 29230 }, { "epoch": 0.8534349362062421, "grad_norm": 0.5833683659157975, "learning_rate": 8.142741281427413e-06, "loss": 0.7331, "step": 29231 }, { "epoch": 0.8534641324340895, "grad_norm": 0.5249977839580764, "learning_rate": 8.141119221411191e-06, "loss": 0.5823, "step": 29232 }, { "epoch": 0.8534933286619368, "grad_norm": 0.6011725348515141, "learning_rate": 8.139497161394973e-06, "loss": 0.6656, "step": 29233 }, { "epoch": 0.8535225248897842, "grad_norm": 0.5034393028107212, "learning_rate": 8.137875101378751e-06, "loss": 0.5431, "step": 29234 }, { "epoch": 0.8535517211176316, "grad_norm": 0.5218336718165505, "learning_rate": 8.136253041362532e-06, "loss": 0.5882, "step": 29235 }, { "epoch": 0.8535809173454789, "grad_norm": 0.5147832783000885, "learning_rate": 8.13463098134631e-06, "loss": 0.5158, "step": 29236 }, { "epoch": 0.8536101135733263, "grad_norm": 0.5779011295225143, "learning_rate": 8.13300892133009e-06, "loss": 0.6418, "step": 29237 }, { "epoch": 0.8536393098011736, "grad_norm": 0.5383634531154984, "learning_rate": 8.13138686131387e-06, "loss": 0.5993, "step": 29238 }, { "epoch": 0.853668506029021, "grad_norm": 0.5723341530583597, "learning_rate": 8.129764801297648e-06, "loss": 0.6416, "step": 29239 }, { "epoch": 0.8536977022568684, "grad_norm": 0.5312884205663221, "learning_rate": 8.128142741281427e-06, "loss": 0.5981, "step": 29240 }, { "epoch": 0.8537268984847157, "grad_norm": 0.5013668802562137, "learning_rate": 8.126520681265207e-06, "loss": 0.4967, "step": 29241 }, { "epoch": 0.8537560947125631, "grad_norm": 0.5222765861904953, "learning_rate": 8.124898621248987e-06, "loss": 0.559, "step": 29242 }, { "epoch": 0.8537852909404104, "grad_norm": 0.5464175846200672, "learning_rate": 8.123276561232767e-06, "loss": 0.637, "step": 29243 }, { "epoch": 0.8538144871682578, "grad_norm": 0.5086541543379512, "learning_rate": 8.121654501216545e-06, "loss": 0.5789, "step": 29244 }, { "epoch": 0.8538436833961052, "grad_norm": 0.5213956915728768, "learning_rate": 8.120032441200324e-06, "loss": 0.5617, "step": 29245 }, { "epoch": 0.8538728796239525, "grad_norm": 0.5254428932761973, "learning_rate": 8.118410381184104e-06, "loss": 0.5792, "step": 29246 }, { "epoch": 0.8539020758518, "grad_norm": 0.5195206489496114, "learning_rate": 8.116788321167884e-06, "loss": 0.579, "step": 29247 }, { "epoch": 0.8539312720796474, "grad_norm": 0.5610937643184151, "learning_rate": 8.115166261151664e-06, "loss": 0.6251, "step": 29248 }, { "epoch": 0.8539604683074947, "grad_norm": 0.5271741906016261, "learning_rate": 8.113544201135442e-06, "loss": 0.6217, "step": 29249 }, { "epoch": 0.8539896645353421, "grad_norm": 0.5077732621547384, "learning_rate": 8.11192214111922e-06, "loss": 0.5228, "step": 29250 }, { "epoch": 0.8540188607631894, "grad_norm": 0.528250824283464, "learning_rate": 8.110300081103e-06, "loss": 0.5565, "step": 29251 }, { "epoch": 0.8540480569910368, "grad_norm": 0.5536949364269863, "learning_rate": 8.10867802108678e-06, "loss": 0.6362, "step": 29252 }, { "epoch": 0.8540772532188842, "grad_norm": 0.5455241305017147, "learning_rate": 8.107055961070561e-06, "loss": 0.6351, "step": 29253 }, { "epoch": 0.8541064494467315, "grad_norm": 0.5367886925736769, "learning_rate": 8.10543390105434e-06, "loss": 0.6033, "step": 29254 }, { "epoch": 0.8541356456745789, "grad_norm": 0.5609166198419239, "learning_rate": 8.103811841038118e-06, "loss": 0.6036, "step": 29255 }, { "epoch": 0.8541648419024263, "grad_norm": 0.5436110402663634, "learning_rate": 8.1021897810219e-06, "loss": 0.5902, "step": 29256 }, { "epoch": 0.8541940381302736, "grad_norm": 0.49072525878066464, "learning_rate": 8.100567721005678e-06, "loss": 0.4983, "step": 29257 }, { "epoch": 0.854223234358121, "grad_norm": 0.5432363934221819, "learning_rate": 8.098945660989458e-06, "loss": 0.6422, "step": 29258 }, { "epoch": 0.8542524305859683, "grad_norm": 0.510366451491779, "learning_rate": 8.097323600973236e-06, "loss": 0.5372, "step": 29259 }, { "epoch": 0.8542816268138157, "grad_norm": 0.5076425404575863, "learning_rate": 8.095701540957015e-06, "loss": 0.579, "step": 29260 }, { "epoch": 0.8543108230416631, "grad_norm": 0.5356520272793108, "learning_rate": 8.094079480940796e-06, "loss": 0.5679, "step": 29261 }, { "epoch": 0.8543400192695104, "grad_norm": 0.5036677285103885, "learning_rate": 8.092457420924575e-06, "loss": 0.5277, "step": 29262 }, { "epoch": 0.8543692154973578, "grad_norm": 0.5176927393399627, "learning_rate": 8.090835360908355e-06, "loss": 0.5711, "step": 29263 }, { "epoch": 0.8543984117252051, "grad_norm": 0.5074730657357936, "learning_rate": 8.089213300892133e-06, "loss": 0.5614, "step": 29264 }, { "epoch": 0.8544276079530525, "grad_norm": 0.5180554958762162, "learning_rate": 8.087591240875912e-06, "loss": 0.5768, "step": 29265 }, { "epoch": 0.8544568041808999, "grad_norm": 0.5022732761392603, "learning_rate": 8.085969180859693e-06, "loss": 0.5256, "step": 29266 }, { "epoch": 0.8544860004087472, "grad_norm": 0.517446711804, "learning_rate": 8.084347120843472e-06, "loss": 0.552, "step": 29267 }, { "epoch": 0.8545151966365946, "grad_norm": 0.5186724891865301, "learning_rate": 8.08272506082725e-06, "loss": 0.5689, "step": 29268 }, { "epoch": 0.854544392864442, "grad_norm": 0.5348286788264853, "learning_rate": 8.08110300081103e-06, "loss": 0.6222, "step": 29269 }, { "epoch": 0.8545735890922893, "grad_norm": 0.5344989419480325, "learning_rate": 8.079480940794808e-06, "loss": 0.5879, "step": 29270 }, { "epoch": 0.8546027853201367, "grad_norm": 0.5181464017508612, "learning_rate": 8.07785888077859e-06, "loss": 0.5262, "step": 29271 }, { "epoch": 0.854631981547984, "grad_norm": 0.5047696653645307, "learning_rate": 8.076236820762369e-06, "loss": 0.5536, "step": 29272 }, { "epoch": 0.8546611777758314, "grad_norm": 0.5064102154655408, "learning_rate": 8.074614760746147e-06, "loss": 0.5295, "step": 29273 }, { "epoch": 0.8546903740036788, "grad_norm": 0.5386655399487433, "learning_rate": 8.072992700729927e-06, "loss": 0.6055, "step": 29274 }, { "epoch": 0.8547195702315261, "grad_norm": 0.522146150912504, "learning_rate": 8.071370640713707e-06, "loss": 0.6062, "step": 29275 }, { "epoch": 0.8547487664593735, "grad_norm": 0.5174232068507624, "learning_rate": 8.069748580697487e-06, "loss": 0.6117, "step": 29276 }, { "epoch": 0.8547779626872208, "grad_norm": 0.5351525430371363, "learning_rate": 8.068126520681266e-06, "loss": 0.581, "step": 29277 }, { "epoch": 0.8548071589150682, "grad_norm": 0.5421583104628055, "learning_rate": 8.066504460665044e-06, "loss": 0.5836, "step": 29278 }, { "epoch": 0.8548363551429156, "grad_norm": 0.53526465754501, "learning_rate": 8.064882400648824e-06, "loss": 0.5918, "step": 29279 }, { "epoch": 0.8548655513707629, "grad_norm": 0.5707431446765144, "learning_rate": 8.063260340632604e-06, "loss": 0.5991, "step": 29280 }, { "epoch": 0.8548947475986103, "grad_norm": 0.4907860186116435, "learning_rate": 8.061638280616384e-06, "loss": 0.5161, "step": 29281 }, { "epoch": 0.8549239438264576, "grad_norm": 0.5145374185295545, "learning_rate": 8.060016220600163e-06, "loss": 0.5352, "step": 29282 }, { "epoch": 0.854953140054305, "grad_norm": 0.4862231821511248, "learning_rate": 8.058394160583941e-06, "loss": 0.528, "step": 29283 }, { "epoch": 0.8549823362821524, "grad_norm": 0.5239744532470504, "learning_rate": 8.056772100567721e-06, "loss": 0.5639, "step": 29284 }, { "epoch": 0.8550115325099997, "grad_norm": 0.5111515784115142, "learning_rate": 8.055150040551501e-06, "loss": 0.551, "step": 29285 }, { "epoch": 0.8550407287378471, "grad_norm": 0.6017910851827657, "learning_rate": 8.053527980535281e-06, "loss": 0.6668, "step": 29286 }, { "epoch": 0.8550699249656945, "grad_norm": 0.5460222939371955, "learning_rate": 8.05190592051906e-06, "loss": 0.6106, "step": 29287 }, { "epoch": 0.8550991211935418, "grad_norm": 0.555250748656165, "learning_rate": 8.050283860502838e-06, "loss": 0.6308, "step": 29288 }, { "epoch": 0.8551283174213892, "grad_norm": 0.5350523512073998, "learning_rate": 8.04866180048662e-06, "loss": 0.5764, "step": 29289 }, { "epoch": 0.8551575136492365, "grad_norm": 0.546001134190414, "learning_rate": 8.047039740470398e-06, "loss": 0.5869, "step": 29290 }, { "epoch": 0.8551867098770839, "grad_norm": 0.5353372107540075, "learning_rate": 8.045417680454178e-06, "loss": 0.6146, "step": 29291 }, { "epoch": 0.8552159061049313, "grad_norm": 0.5309356271538804, "learning_rate": 8.043795620437956e-06, "loss": 0.6266, "step": 29292 }, { "epoch": 0.8552451023327786, "grad_norm": 0.53034369023752, "learning_rate": 8.042173560421735e-06, "loss": 0.5569, "step": 29293 }, { "epoch": 0.855274298560626, "grad_norm": 0.5402964520147736, "learning_rate": 8.040551500405517e-06, "loss": 0.5737, "step": 29294 }, { "epoch": 0.8553034947884733, "grad_norm": 0.5267332529718659, "learning_rate": 8.038929440389295e-06, "loss": 0.5753, "step": 29295 }, { "epoch": 0.8553326910163207, "grad_norm": 0.5545815302292555, "learning_rate": 8.037307380373075e-06, "loss": 0.5831, "step": 29296 }, { "epoch": 0.8553618872441681, "grad_norm": 0.5213490877295583, "learning_rate": 8.035685320356853e-06, "loss": 0.5364, "step": 29297 }, { "epoch": 0.8553910834720154, "grad_norm": 0.5400631162164967, "learning_rate": 8.034063260340632e-06, "loss": 0.6398, "step": 29298 }, { "epoch": 0.8554202796998628, "grad_norm": 0.5318060623119156, "learning_rate": 8.032441200324414e-06, "loss": 0.6119, "step": 29299 }, { "epoch": 0.8554494759277101, "grad_norm": 0.5198508277024056, "learning_rate": 8.030819140308192e-06, "loss": 0.5762, "step": 29300 }, { "epoch": 0.8554786721555575, "grad_norm": 0.5167966910325505, "learning_rate": 8.02919708029197e-06, "loss": 0.53, "step": 29301 }, { "epoch": 0.8555078683834049, "grad_norm": 0.570813282029533, "learning_rate": 8.02757502027575e-06, "loss": 0.6545, "step": 29302 }, { "epoch": 0.8555370646112522, "grad_norm": 0.5759405543764491, "learning_rate": 8.025952960259529e-06, "loss": 0.6515, "step": 29303 }, { "epoch": 0.8555662608390996, "grad_norm": 0.5414681299075164, "learning_rate": 8.02433090024331e-06, "loss": 0.5669, "step": 29304 }, { "epoch": 0.855595457066947, "grad_norm": 0.5305517297931416, "learning_rate": 8.022708840227089e-06, "loss": 0.6089, "step": 29305 }, { "epoch": 0.8556246532947943, "grad_norm": 0.5290668274576344, "learning_rate": 8.021086780210867e-06, "loss": 0.6136, "step": 29306 }, { "epoch": 0.8556538495226417, "grad_norm": 0.5557097275934383, "learning_rate": 8.019464720194647e-06, "loss": 0.6371, "step": 29307 }, { "epoch": 0.855683045750489, "grad_norm": 0.5251325389320542, "learning_rate": 8.017842660178427e-06, "loss": 0.603, "step": 29308 }, { "epoch": 0.8557122419783364, "grad_norm": 0.5538830797644239, "learning_rate": 8.016220600162207e-06, "loss": 0.6146, "step": 29309 }, { "epoch": 0.8557414382061838, "grad_norm": 0.5274704879875823, "learning_rate": 8.014598540145986e-06, "loss": 0.5978, "step": 29310 }, { "epoch": 0.8557706344340311, "grad_norm": 0.5456513405694207, "learning_rate": 8.012976480129764e-06, "loss": 0.6024, "step": 29311 }, { "epoch": 0.8557998306618785, "grad_norm": 0.4906532436296938, "learning_rate": 8.011354420113544e-06, "loss": 0.5255, "step": 29312 }, { "epoch": 0.8558290268897258, "grad_norm": 0.5222546685594903, "learning_rate": 8.009732360097324e-06, "loss": 0.5682, "step": 29313 }, { "epoch": 0.8558582231175732, "grad_norm": 0.5889587864201326, "learning_rate": 8.008110300081104e-06, "loss": 0.699, "step": 29314 }, { "epoch": 0.8558874193454206, "grad_norm": 0.5742258085407691, "learning_rate": 8.006488240064883e-06, "loss": 0.6621, "step": 29315 }, { "epoch": 0.8559166155732679, "grad_norm": 0.5009144901481357, "learning_rate": 8.004866180048661e-06, "loss": 0.5376, "step": 29316 }, { "epoch": 0.8559458118011153, "grad_norm": 0.49428941964261724, "learning_rate": 8.003244120032441e-06, "loss": 0.545, "step": 29317 }, { "epoch": 0.8559750080289626, "grad_norm": 0.5458245046933516, "learning_rate": 8.001622060016221e-06, "loss": 0.6271, "step": 29318 }, { "epoch": 0.85600420425681, "grad_norm": 0.5837678332229507, "learning_rate": 8.000000000000001e-06, "loss": 0.7191, "step": 29319 }, { "epoch": 0.8560334004846574, "grad_norm": 0.49994406818649223, "learning_rate": 7.99837793998378e-06, "loss": 0.524, "step": 29320 }, { "epoch": 0.8560625967125047, "grad_norm": 0.5516879279982144, "learning_rate": 7.996755879967558e-06, "loss": 0.5765, "step": 29321 }, { "epoch": 0.8560917929403521, "grad_norm": 0.5415081197959438, "learning_rate": 7.99513381995134e-06, "loss": 0.6051, "step": 29322 }, { "epoch": 0.8561209891681995, "grad_norm": 0.5210761401963522, "learning_rate": 7.993511759935118e-06, "loss": 0.575, "step": 29323 }, { "epoch": 0.8561501853960468, "grad_norm": 0.5042206023957516, "learning_rate": 7.991889699918898e-06, "loss": 0.5401, "step": 29324 }, { "epoch": 0.8561793816238942, "grad_norm": 0.4782020766018087, "learning_rate": 7.990267639902677e-06, "loss": 0.4742, "step": 29325 }, { "epoch": 0.8562085778517415, "grad_norm": 0.5219775427194314, "learning_rate": 7.988645579886455e-06, "loss": 0.5679, "step": 29326 }, { "epoch": 0.8562377740795889, "grad_norm": 0.548970417445218, "learning_rate": 7.987023519870237e-06, "loss": 0.5958, "step": 29327 }, { "epoch": 0.8562669703074363, "grad_norm": 0.4789459897135803, "learning_rate": 7.985401459854015e-06, "loss": 0.4962, "step": 29328 }, { "epoch": 0.8562961665352836, "grad_norm": 0.5385457056685105, "learning_rate": 7.983779399837794e-06, "loss": 0.5926, "step": 29329 }, { "epoch": 0.856325362763131, "grad_norm": 0.5266756037109988, "learning_rate": 7.982157339821574e-06, "loss": 0.5669, "step": 29330 }, { "epoch": 0.8563545589909783, "grad_norm": 0.5566574825210326, "learning_rate": 7.980535279805352e-06, "loss": 0.6374, "step": 29331 }, { "epoch": 0.8563837552188257, "grad_norm": 0.5421910558702526, "learning_rate": 7.978913219789134e-06, "loss": 0.5804, "step": 29332 }, { "epoch": 0.8564129514466731, "grad_norm": 0.5475399721508784, "learning_rate": 7.977291159772912e-06, "loss": 0.6087, "step": 29333 }, { "epoch": 0.8564421476745204, "grad_norm": 0.5148121983576655, "learning_rate": 7.97566909975669e-06, "loss": 0.5697, "step": 29334 }, { "epoch": 0.8564713439023678, "grad_norm": 0.5791846125635123, "learning_rate": 7.97404703974047e-06, "loss": 0.5858, "step": 29335 }, { "epoch": 0.8565005401302151, "grad_norm": 0.5471395407358485, "learning_rate": 7.972424979724249e-06, "loss": 0.6294, "step": 29336 }, { "epoch": 0.8565297363580625, "grad_norm": 0.5351761607034529, "learning_rate": 7.97080291970803e-06, "loss": 0.6012, "step": 29337 }, { "epoch": 0.8565589325859099, "grad_norm": 0.5404045829999432, "learning_rate": 7.969180859691809e-06, "loss": 0.6339, "step": 29338 }, { "epoch": 0.8565881288137572, "grad_norm": 0.5420000630045847, "learning_rate": 7.967558799675587e-06, "loss": 0.6225, "step": 29339 }, { "epoch": 0.8566173250416046, "grad_norm": 0.5140990108346135, "learning_rate": 7.965936739659368e-06, "loss": 0.5442, "step": 29340 }, { "epoch": 0.856646521269452, "grad_norm": 0.5022881123460403, "learning_rate": 7.964314679643148e-06, "loss": 0.5559, "step": 29341 }, { "epoch": 0.8566757174972993, "grad_norm": 0.5611510332444407, "learning_rate": 7.962692619626928e-06, "loss": 0.6552, "step": 29342 }, { "epoch": 0.8567049137251467, "grad_norm": 0.5069014106529479, "learning_rate": 7.961070559610706e-06, "loss": 0.551, "step": 29343 }, { "epoch": 0.856734109952994, "grad_norm": 0.5360053529186771, "learning_rate": 7.959448499594484e-06, "loss": 0.6, "step": 29344 }, { "epoch": 0.8567633061808414, "grad_norm": 0.4990084469210031, "learning_rate": 7.957826439578264e-06, "loss": 0.5317, "step": 29345 }, { "epoch": 0.8567925024086888, "grad_norm": 0.5301459937127561, "learning_rate": 7.956204379562045e-06, "loss": 0.6019, "step": 29346 }, { "epoch": 0.8568216986365361, "grad_norm": 0.5248172628513449, "learning_rate": 7.954582319545825e-06, "loss": 0.5821, "step": 29347 }, { "epoch": 0.8568508948643835, "grad_norm": 0.5195046071120152, "learning_rate": 7.952960259529603e-06, "loss": 0.5412, "step": 29348 }, { "epoch": 0.8568800910922308, "grad_norm": 0.49561723472285407, "learning_rate": 7.951338199513381e-06, "loss": 0.507, "step": 29349 }, { "epoch": 0.8569092873200782, "grad_norm": 0.5728860057973548, "learning_rate": 7.949716139497161e-06, "loss": 0.7035, "step": 29350 }, { "epoch": 0.8569384835479256, "grad_norm": 0.5202889223209168, "learning_rate": 7.948094079480941e-06, "loss": 0.6009, "step": 29351 }, { "epoch": 0.8569676797757729, "grad_norm": 0.5132599309336284, "learning_rate": 7.946472019464722e-06, "loss": 0.56, "step": 29352 }, { "epoch": 0.8569968760036203, "grad_norm": 0.5168440591523658, "learning_rate": 7.9448499594485e-06, "loss": 0.5654, "step": 29353 }, { "epoch": 0.8570260722314677, "grad_norm": 0.529631377798186, "learning_rate": 7.943227899432278e-06, "loss": 0.5998, "step": 29354 }, { "epoch": 0.857055268459315, "grad_norm": 0.49859520152059517, "learning_rate": 7.94160583941606e-06, "loss": 0.508, "step": 29355 }, { "epoch": 0.8570844646871624, "grad_norm": 0.5136779154376323, "learning_rate": 7.939983779399838e-06, "loss": 0.5642, "step": 29356 }, { "epoch": 0.8571136609150097, "grad_norm": 0.47982023885305236, "learning_rate": 7.938361719383619e-06, "loss": 0.472, "step": 29357 }, { "epoch": 0.8571428571428571, "grad_norm": 0.5351332901568051, "learning_rate": 7.936739659367397e-06, "loss": 0.623, "step": 29358 }, { "epoch": 0.8571720533707045, "grad_norm": 0.5262093166522518, "learning_rate": 7.935117599351175e-06, "loss": 0.592, "step": 29359 }, { "epoch": 0.8572012495985518, "grad_norm": 0.5435810557065458, "learning_rate": 7.933495539334957e-06, "loss": 0.5878, "step": 29360 }, { "epoch": 0.8572304458263992, "grad_norm": 0.5806294512505661, "learning_rate": 7.931873479318735e-06, "loss": 0.6556, "step": 29361 }, { "epoch": 0.8572596420542465, "grad_norm": 0.4892039274153291, "learning_rate": 7.930251419302514e-06, "loss": 0.4897, "step": 29362 }, { "epoch": 0.8572888382820939, "grad_norm": 0.531865411408612, "learning_rate": 7.928629359286294e-06, "loss": 0.5862, "step": 29363 }, { "epoch": 0.8573180345099413, "grad_norm": 0.5272874495861704, "learning_rate": 7.927007299270072e-06, "loss": 0.5661, "step": 29364 }, { "epoch": 0.8573472307377886, "grad_norm": 0.4850031057939364, "learning_rate": 7.925385239253854e-06, "loss": 0.5296, "step": 29365 }, { "epoch": 0.857376426965636, "grad_norm": 0.5576204183886914, "learning_rate": 7.923763179237632e-06, "loss": 0.6504, "step": 29366 }, { "epoch": 0.8574056231934835, "grad_norm": 0.5273242845254859, "learning_rate": 7.92214111922141e-06, "loss": 0.5782, "step": 29367 }, { "epoch": 0.8574348194213308, "grad_norm": 0.5081736413976753, "learning_rate": 7.92051905920519e-06, "loss": 0.5366, "step": 29368 }, { "epoch": 0.8574640156491782, "grad_norm": 0.524925147759837, "learning_rate": 7.91889699918897e-06, "loss": 0.5694, "step": 29369 }, { "epoch": 0.8574932118770255, "grad_norm": 0.5316639221438966, "learning_rate": 7.917274939172751e-06, "loss": 0.5588, "step": 29370 }, { "epoch": 0.8575224081048729, "grad_norm": 0.49213445002025574, "learning_rate": 7.91565287915653e-06, "loss": 0.5055, "step": 29371 }, { "epoch": 0.8575516043327203, "grad_norm": 0.5347032651917086, "learning_rate": 7.914030819140308e-06, "loss": 0.5873, "step": 29372 }, { "epoch": 0.8575808005605676, "grad_norm": 0.5502122769704328, "learning_rate": 7.912408759124088e-06, "loss": 0.6285, "step": 29373 }, { "epoch": 0.857609996788415, "grad_norm": 0.5116979529059729, "learning_rate": 7.910786699107868e-06, "loss": 0.5461, "step": 29374 }, { "epoch": 0.8576391930162623, "grad_norm": 0.5728431592858827, "learning_rate": 7.909164639091648e-06, "loss": 0.7145, "step": 29375 }, { "epoch": 0.8576683892441097, "grad_norm": 0.5055710535731525, "learning_rate": 7.907542579075426e-06, "loss": 0.5167, "step": 29376 }, { "epoch": 0.8576975854719571, "grad_norm": 0.5233418520890608, "learning_rate": 7.905920519059205e-06, "loss": 0.5703, "step": 29377 }, { "epoch": 0.8577267816998044, "grad_norm": 0.5491631173883259, "learning_rate": 7.904298459042985e-06, "loss": 0.6563, "step": 29378 }, { "epoch": 0.8577559779276518, "grad_norm": 0.5400867514258975, "learning_rate": 7.902676399026765e-06, "loss": 0.5977, "step": 29379 }, { "epoch": 0.8577851741554992, "grad_norm": 0.5183211419978074, "learning_rate": 7.901054339010545e-06, "loss": 0.5728, "step": 29380 }, { "epoch": 0.8578143703833465, "grad_norm": 0.5509380226895886, "learning_rate": 7.899432278994323e-06, "loss": 0.6234, "step": 29381 }, { "epoch": 0.8578435666111939, "grad_norm": 0.5701819975605409, "learning_rate": 7.897810218978102e-06, "loss": 0.6469, "step": 29382 }, { "epoch": 0.8578727628390412, "grad_norm": 0.5121431207088155, "learning_rate": 7.896188158961882e-06, "loss": 0.5581, "step": 29383 }, { "epoch": 0.8579019590668886, "grad_norm": 0.5314517627940925, "learning_rate": 7.894566098945662e-06, "loss": 0.5723, "step": 29384 }, { "epoch": 0.857931155294736, "grad_norm": 0.5514921564635746, "learning_rate": 7.892944038929442e-06, "loss": 0.6284, "step": 29385 }, { "epoch": 0.8579603515225833, "grad_norm": 0.5082573936948773, "learning_rate": 7.89132197891322e-06, "loss": 0.5601, "step": 29386 }, { "epoch": 0.8579895477504307, "grad_norm": 0.5091694768718503, "learning_rate": 7.889699918896999e-06, "loss": 0.5678, "step": 29387 }, { "epoch": 0.858018743978278, "grad_norm": 0.4687804427564848, "learning_rate": 7.88807785888078e-06, "loss": 0.5107, "step": 29388 }, { "epoch": 0.8580479402061254, "grad_norm": 0.5067982750662424, "learning_rate": 7.886455798864559e-06, "loss": 0.5665, "step": 29389 }, { "epoch": 0.8580771364339728, "grad_norm": 0.5461311736890939, "learning_rate": 7.884833738848337e-06, "loss": 0.608, "step": 29390 }, { "epoch": 0.8581063326618201, "grad_norm": 0.5217620168656293, "learning_rate": 7.883211678832117e-06, "loss": 0.5853, "step": 29391 }, { "epoch": 0.8581355288896675, "grad_norm": 0.581702569238498, "learning_rate": 7.881589618815895e-06, "loss": 0.6732, "step": 29392 }, { "epoch": 0.8581647251175148, "grad_norm": 0.495461829526281, "learning_rate": 7.879967558799677e-06, "loss": 0.5373, "step": 29393 }, { "epoch": 0.8581939213453622, "grad_norm": 0.5496648711485174, "learning_rate": 7.878345498783456e-06, "loss": 0.6355, "step": 29394 }, { "epoch": 0.8582231175732096, "grad_norm": 0.5212337508751703, "learning_rate": 7.876723438767234e-06, "loss": 0.6075, "step": 29395 }, { "epoch": 0.8582523138010569, "grad_norm": 0.5255650822309925, "learning_rate": 7.875101378751014e-06, "loss": 0.5971, "step": 29396 }, { "epoch": 0.8582815100289043, "grad_norm": 0.513280006959218, "learning_rate": 7.873479318734792e-06, "loss": 0.5604, "step": 29397 }, { "epoch": 0.8583107062567517, "grad_norm": 0.5285167517380256, "learning_rate": 7.871857258718574e-06, "loss": 0.5708, "step": 29398 }, { "epoch": 0.858339902484599, "grad_norm": 0.5127767690489978, "learning_rate": 7.870235198702353e-06, "loss": 0.5593, "step": 29399 }, { "epoch": 0.8583690987124464, "grad_norm": 0.5352529840402581, "learning_rate": 7.868613138686131e-06, "loss": 0.6418, "step": 29400 }, { "epoch": 0.8583982949402937, "grad_norm": 0.5328087481160476, "learning_rate": 7.866991078669911e-06, "loss": 0.6176, "step": 29401 }, { "epoch": 0.8584274911681411, "grad_norm": 0.5226479724425994, "learning_rate": 7.86536901865369e-06, "loss": 0.6068, "step": 29402 }, { "epoch": 0.8584566873959885, "grad_norm": 0.5071816098173564, "learning_rate": 7.863746958637471e-06, "loss": 0.5502, "step": 29403 }, { "epoch": 0.8584858836238358, "grad_norm": 0.5463773039372589, "learning_rate": 7.86212489862125e-06, "loss": 0.6013, "step": 29404 }, { "epoch": 0.8585150798516832, "grad_norm": 0.5140392809611032, "learning_rate": 7.860502838605028e-06, "loss": 0.5605, "step": 29405 }, { "epoch": 0.8585442760795305, "grad_norm": 0.53677335790522, "learning_rate": 7.858880778588808e-06, "loss": 0.5923, "step": 29406 }, { "epoch": 0.8585734723073779, "grad_norm": 0.4936363547214118, "learning_rate": 7.857258718572588e-06, "loss": 0.5064, "step": 29407 }, { "epoch": 0.8586026685352253, "grad_norm": 0.5268961625386694, "learning_rate": 7.855636658556368e-06, "loss": 0.5233, "step": 29408 }, { "epoch": 0.8586318647630726, "grad_norm": 0.5613332933176308, "learning_rate": 7.854014598540146e-06, "loss": 0.6183, "step": 29409 }, { "epoch": 0.85866106099092, "grad_norm": 0.5478559684104448, "learning_rate": 7.852392538523925e-06, "loss": 0.6253, "step": 29410 }, { "epoch": 0.8586902572187673, "grad_norm": 0.5006515263391766, "learning_rate": 7.850770478507705e-06, "loss": 0.5064, "step": 29411 }, { "epoch": 0.8587194534466147, "grad_norm": 0.5555621150811201, "learning_rate": 7.849148418491485e-06, "loss": 0.5906, "step": 29412 }, { "epoch": 0.8587486496744621, "grad_norm": 0.4545717771567508, "learning_rate": 7.847526358475265e-06, "loss": 0.4454, "step": 29413 }, { "epoch": 0.8587778459023094, "grad_norm": 0.5462245203226379, "learning_rate": 7.845904298459043e-06, "loss": 0.6393, "step": 29414 }, { "epoch": 0.8588070421301568, "grad_norm": 0.5418293924477959, "learning_rate": 7.844282238442822e-06, "loss": 0.5887, "step": 29415 }, { "epoch": 0.8588362383580042, "grad_norm": 0.5103417587277619, "learning_rate": 7.842660178426602e-06, "loss": 0.565, "step": 29416 }, { "epoch": 0.8588654345858515, "grad_norm": 0.49332800913902985, "learning_rate": 7.841038118410382e-06, "loss": 0.54, "step": 29417 }, { "epoch": 0.8588946308136989, "grad_norm": 0.5606082115369174, "learning_rate": 7.839416058394162e-06, "loss": 0.6212, "step": 29418 }, { "epoch": 0.8589238270415462, "grad_norm": 0.4980127828597621, "learning_rate": 7.83779399837794e-06, "loss": 0.515, "step": 29419 }, { "epoch": 0.8589530232693936, "grad_norm": 0.5265808635399444, "learning_rate": 7.836171938361719e-06, "loss": 0.5677, "step": 29420 }, { "epoch": 0.858982219497241, "grad_norm": 0.5082650576987218, "learning_rate": 7.834549878345499e-06, "loss": 0.5605, "step": 29421 }, { "epoch": 0.8590114157250883, "grad_norm": 0.46734743976833326, "learning_rate": 7.832927818329279e-06, "loss": 0.4722, "step": 29422 }, { "epoch": 0.8590406119529357, "grad_norm": 0.5481930629432514, "learning_rate": 7.831305758313057e-06, "loss": 0.5965, "step": 29423 }, { "epoch": 0.859069808180783, "grad_norm": 0.5601652995682609, "learning_rate": 7.829683698296837e-06, "loss": 0.6525, "step": 29424 }, { "epoch": 0.8590990044086304, "grad_norm": 0.5113566879608898, "learning_rate": 7.828061638280616e-06, "loss": 0.5535, "step": 29425 }, { "epoch": 0.8591282006364778, "grad_norm": 0.49717498011151867, "learning_rate": 7.826439578264397e-06, "loss": 0.5274, "step": 29426 }, { "epoch": 0.8591573968643251, "grad_norm": 0.5119957513100933, "learning_rate": 7.824817518248176e-06, "loss": 0.5308, "step": 29427 }, { "epoch": 0.8591865930921725, "grad_norm": 0.5150449073821916, "learning_rate": 7.823195458231954e-06, "loss": 0.5197, "step": 29428 }, { "epoch": 0.8592157893200199, "grad_norm": 0.5379823702670232, "learning_rate": 7.821573398215734e-06, "loss": 0.5991, "step": 29429 }, { "epoch": 0.8592449855478672, "grad_norm": 0.5117869934884554, "learning_rate": 7.819951338199513e-06, "loss": 0.54, "step": 29430 }, { "epoch": 0.8592741817757146, "grad_norm": 0.526150837984722, "learning_rate": 7.818329278183294e-06, "loss": 0.5933, "step": 29431 }, { "epoch": 0.8593033780035619, "grad_norm": 0.4761918042438883, "learning_rate": 7.816707218167073e-06, "loss": 0.5053, "step": 29432 }, { "epoch": 0.8593325742314093, "grad_norm": 0.518096234955032, "learning_rate": 7.815085158150851e-06, "loss": 0.5776, "step": 29433 }, { "epoch": 0.8593617704592567, "grad_norm": 0.48415808007830796, "learning_rate": 7.813463098134631e-06, "loss": 0.4988, "step": 29434 }, { "epoch": 0.859390966687104, "grad_norm": 0.553193308605178, "learning_rate": 7.81184103811841e-06, "loss": 0.623, "step": 29435 }, { "epoch": 0.8594201629149514, "grad_norm": 0.5180266457515199, "learning_rate": 7.810218978102191e-06, "loss": 0.5981, "step": 29436 }, { "epoch": 0.8594493591427987, "grad_norm": 0.5194929245446297, "learning_rate": 7.80859691808597e-06, "loss": 0.5225, "step": 29437 }, { "epoch": 0.8594785553706461, "grad_norm": 0.49047667202057793, "learning_rate": 7.806974858069748e-06, "loss": 0.5257, "step": 29438 }, { "epoch": 0.8595077515984935, "grad_norm": 0.556835747233417, "learning_rate": 7.805352798053528e-06, "loss": 0.6582, "step": 29439 }, { "epoch": 0.8595369478263408, "grad_norm": 0.5360266978869603, "learning_rate": 7.803730738037308e-06, "loss": 0.6043, "step": 29440 }, { "epoch": 0.8595661440541882, "grad_norm": 0.49339328249502923, "learning_rate": 7.802108678021088e-06, "loss": 0.5277, "step": 29441 }, { "epoch": 0.8595953402820355, "grad_norm": 0.5229810281941757, "learning_rate": 7.800486618004867e-06, "loss": 0.5365, "step": 29442 }, { "epoch": 0.8596245365098829, "grad_norm": 0.5286644750410594, "learning_rate": 7.798864557988645e-06, "loss": 0.595, "step": 29443 }, { "epoch": 0.8596537327377303, "grad_norm": 0.4979315948957949, "learning_rate": 7.797242497972425e-06, "loss": 0.5405, "step": 29444 }, { "epoch": 0.8596829289655776, "grad_norm": 0.48740702699538513, "learning_rate": 7.795620437956205e-06, "loss": 0.4856, "step": 29445 }, { "epoch": 0.859712125193425, "grad_norm": 0.5435819762132557, "learning_rate": 7.793998377939985e-06, "loss": 0.5938, "step": 29446 }, { "epoch": 0.8597413214212724, "grad_norm": 0.5276766799500447, "learning_rate": 7.792376317923764e-06, "loss": 0.5806, "step": 29447 }, { "epoch": 0.8597705176491197, "grad_norm": 0.50217835298923, "learning_rate": 7.790754257907542e-06, "loss": 0.5188, "step": 29448 }, { "epoch": 0.8597997138769671, "grad_norm": 0.5784111060754222, "learning_rate": 7.789132197891322e-06, "loss": 0.6661, "step": 29449 }, { "epoch": 0.8598289101048144, "grad_norm": 0.5646461231023161, "learning_rate": 7.787510137875102e-06, "loss": 0.6442, "step": 29450 }, { "epoch": 0.8598581063326618, "grad_norm": 0.534606205581685, "learning_rate": 7.78588807785888e-06, "loss": 0.6466, "step": 29451 }, { "epoch": 0.8598873025605092, "grad_norm": 0.5788581637831552, "learning_rate": 7.78426601784266e-06, "loss": 0.6356, "step": 29452 }, { "epoch": 0.8599164987883565, "grad_norm": 0.5291610202974211, "learning_rate": 7.782643957826439e-06, "loss": 0.5893, "step": 29453 }, { "epoch": 0.8599456950162039, "grad_norm": 0.5496156986847466, "learning_rate": 7.781021897810219e-06, "loss": 0.6324, "step": 29454 }, { "epoch": 0.8599748912440512, "grad_norm": 0.5187843203521936, "learning_rate": 7.779399837793999e-06, "loss": 0.5471, "step": 29455 }, { "epoch": 0.8600040874718986, "grad_norm": 0.5061064602151557, "learning_rate": 7.777777777777777e-06, "loss": 0.538, "step": 29456 }, { "epoch": 0.860033283699746, "grad_norm": 0.5209409602852997, "learning_rate": 7.776155717761558e-06, "loss": 0.5871, "step": 29457 }, { "epoch": 0.8600624799275933, "grad_norm": 0.6031121513888993, "learning_rate": 7.774533657745336e-06, "loss": 0.6766, "step": 29458 }, { "epoch": 0.8600916761554407, "grad_norm": 0.5437978151470344, "learning_rate": 7.772911597729118e-06, "loss": 0.5998, "step": 29459 }, { "epoch": 0.860120872383288, "grad_norm": 0.5375006915794096, "learning_rate": 7.771289537712896e-06, "loss": 0.5752, "step": 29460 }, { "epoch": 0.8601500686111354, "grad_norm": 0.5183735421991045, "learning_rate": 7.769667477696674e-06, "loss": 0.5703, "step": 29461 }, { "epoch": 0.8601792648389828, "grad_norm": 0.4845649139113149, "learning_rate": 7.768045417680454e-06, "loss": 0.4913, "step": 29462 }, { "epoch": 0.8602084610668301, "grad_norm": 0.5365112909389377, "learning_rate": 7.766423357664233e-06, "loss": 0.5951, "step": 29463 }, { "epoch": 0.8602376572946775, "grad_norm": 0.5107147535362954, "learning_rate": 7.764801297648015e-06, "loss": 0.5566, "step": 29464 }, { "epoch": 0.8602668535225249, "grad_norm": 0.5459551137623563, "learning_rate": 7.763179237631793e-06, "loss": 0.602, "step": 29465 }, { "epoch": 0.8602960497503722, "grad_norm": 0.4994679880950613, "learning_rate": 7.761557177615571e-06, "loss": 0.5199, "step": 29466 }, { "epoch": 0.8603252459782196, "grad_norm": 0.5480449470177514, "learning_rate": 7.759935117599351e-06, "loss": 0.6378, "step": 29467 }, { "epoch": 0.8603544422060669, "grad_norm": 0.5222836895949753, "learning_rate": 7.75831305758313e-06, "loss": 0.5995, "step": 29468 }, { "epoch": 0.8603836384339143, "grad_norm": 0.503227290914963, "learning_rate": 7.756690997566912e-06, "loss": 0.5018, "step": 29469 }, { "epoch": 0.8604128346617617, "grad_norm": 0.5726015031571574, "learning_rate": 7.75506893755069e-06, "loss": 0.6386, "step": 29470 }, { "epoch": 0.860442030889609, "grad_norm": 0.5447759010830002, "learning_rate": 7.753446877534468e-06, "loss": 0.6238, "step": 29471 }, { "epoch": 0.8604712271174564, "grad_norm": 0.6222118522186408, "learning_rate": 7.751824817518248e-06, "loss": 0.805, "step": 29472 }, { "epoch": 0.8605004233453037, "grad_norm": 0.48253059806400816, "learning_rate": 7.750202757502028e-06, "loss": 0.5232, "step": 29473 }, { "epoch": 0.8605296195731511, "grad_norm": 0.47877975940508294, "learning_rate": 7.748580697485809e-06, "loss": 0.5247, "step": 29474 }, { "epoch": 0.8605588158009985, "grad_norm": 0.5805015821082258, "learning_rate": 7.746958637469587e-06, "loss": 0.6781, "step": 29475 }, { "epoch": 0.8605880120288458, "grad_norm": 0.5552471672757228, "learning_rate": 7.745336577453365e-06, "loss": 0.6016, "step": 29476 }, { "epoch": 0.8606172082566932, "grad_norm": 0.5446006764592007, "learning_rate": 7.743714517437145e-06, "loss": 0.5851, "step": 29477 }, { "epoch": 0.8606464044845406, "grad_norm": 0.5036987372363461, "learning_rate": 7.742092457420925e-06, "loss": 0.5085, "step": 29478 }, { "epoch": 0.8606756007123879, "grad_norm": 0.5221370105463636, "learning_rate": 7.740470397404705e-06, "loss": 0.5606, "step": 29479 }, { "epoch": 0.8607047969402353, "grad_norm": 0.5522773560108818, "learning_rate": 7.738848337388484e-06, "loss": 0.6367, "step": 29480 }, { "epoch": 0.8607339931680826, "grad_norm": 0.5553695868046763, "learning_rate": 7.737226277372262e-06, "loss": 0.6338, "step": 29481 }, { "epoch": 0.86076318939593, "grad_norm": 0.49625189526966434, "learning_rate": 7.735604217356042e-06, "loss": 0.4977, "step": 29482 }, { "epoch": 0.8607923856237774, "grad_norm": 0.5524367934026421, "learning_rate": 7.733982157339822e-06, "loss": 0.5639, "step": 29483 }, { "epoch": 0.8608215818516247, "grad_norm": 0.5794260899734558, "learning_rate": 7.7323600973236e-06, "loss": 0.6967, "step": 29484 }, { "epoch": 0.8608507780794721, "grad_norm": 0.5035655986417188, "learning_rate": 7.73073803730738e-06, "loss": 0.5393, "step": 29485 }, { "epoch": 0.8608799743073194, "grad_norm": 0.5558087356826297, "learning_rate": 7.72911597729116e-06, "loss": 0.6084, "step": 29486 }, { "epoch": 0.8609091705351668, "grad_norm": 0.5264486077876562, "learning_rate": 7.72749391727494e-06, "loss": 0.6192, "step": 29487 }, { "epoch": 0.8609383667630143, "grad_norm": 0.5089486205548086, "learning_rate": 7.72587185725872e-06, "loss": 0.5533, "step": 29488 }, { "epoch": 0.8609675629908616, "grad_norm": 0.5101121561752525, "learning_rate": 7.724249797242498e-06, "loss": 0.5534, "step": 29489 }, { "epoch": 0.860996759218709, "grad_norm": 0.5354714044045639, "learning_rate": 7.722627737226278e-06, "loss": 0.5773, "step": 29490 }, { "epoch": 0.8610259554465564, "grad_norm": 0.5292657235851015, "learning_rate": 7.721005677210056e-06, "loss": 0.587, "step": 29491 }, { "epoch": 0.8610551516744037, "grad_norm": 0.49736662835545864, "learning_rate": 7.719383617193838e-06, "loss": 0.5419, "step": 29492 }, { "epoch": 0.8610843479022511, "grad_norm": 0.5577674610184691, "learning_rate": 7.717761557177616e-06, "loss": 0.6721, "step": 29493 }, { "epoch": 0.8611135441300984, "grad_norm": 0.5474817845542096, "learning_rate": 7.716139497161395e-06, "loss": 0.6364, "step": 29494 }, { "epoch": 0.8611427403579458, "grad_norm": 0.6014596444884932, "learning_rate": 7.714517437145175e-06, "loss": 0.6539, "step": 29495 }, { "epoch": 0.8611719365857932, "grad_norm": 0.5107226544967287, "learning_rate": 7.712895377128953e-06, "loss": 0.5347, "step": 29496 }, { "epoch": 0.8612011328136405, "grad_norm": 0.5159553457345866, "learning_rate": 7.711273317112735e-06, "loss": 0.5331, "step": 29497 }, { "epoch": 0.8612303290414879, "grad_norm": 0.5250168378122365, "learning_rate": 7.709651257096513e-06, "loss": 0.5696, "step": 29498 }, { "epoch": 0.8612595252693352, "grad_norm": 0.5311921539563269, "learning_rate": 7.708029197080292e-06, "loss": 0.6204, "step": 29499 }, { "epoch": 0.8612887214971826, "grad_norm": 0.5397857847134144, "learning_rate": 7.706407137064072e-06, "loss": 0.5859, "step": 29500 }, { "epoch": 0.86131791772503, "grad_norm": 0.5608218117174663, "learning_rate": 7.70478507704785e-06, "loss": 0.6217, "step": 29501 }, { "epoch": 0.8613471139528773, "grad_norm": 0.5976532262858746, "learning_rate": 7.703163017031632e-06, "loss": 0.6566, "step": 29502 }, { "epoch": 0.8613763101807247, "grad_norm": 0.5501933308630058, "learning_rate": 7.70154095701541e-06, "loss": 0.603, "step": 29503 }, { "epoch": 0.861405506408572, "grad_norm": 0.5100344655161767, "learning_rate": 7.699918896999189e-06, "loss": 0.581, "step": 29504 }, { "epoch": 0.8614347026364194, "grad_norm": 0.5209205331348157, "learning_rate": 7.698296836982969e-06, "loss": 0.5691, "step": 29505 }, { "epoch": 0.8614638988642668, "grad_norm": 0.47337348663412504, "learning_rate": 7.696674776966749e-06, "loss": 0.4803, "step": 29506 }, { "epoch": 0.8614930950921141, "grad_norm": 0.5444635979497001, "learning_rate": 7.695052716950529e-06, "loss": 0.593, "step": 29507 }, { "epoch": 0.8615222913199615, "grad_norm": 0.5497179988078026, "learning_rate": 7.693430656934307e-06, "loss": 0.5867, "step": 29508 }, { "epoch": 0.8615514875478089, "grad_norm": 0.5686326355739427, "learning_rate": 7.691808596918086e-06, "loss": 0.6798, "step": 29509 }, { "epoch": 0.8615806837756562, "grad_norm": 0.5324387874349027, "learning_rate": 7.690186536901866e-06, "loss": 0.5782, "step": 29510 }, { "epoch": 0.8616098800035036, "grad_norm": 0.5291030655435381, "learning_rate": 7.688564476885646e-06, "loss": 0.6016, "step": 29511 }, { "epoch": 0.8616390762313509, "grad_norm": 0.5465176069758502, "learning_rate": 7.686942416869424e-06, "loss": 0.6552, "step": 29512 }, { "epoch": 0.8616682724591983, "grad_norm": 0.5622899446360959, "learning_rate": 7.685320356853204e-06, "loss": 0.6682, "step": 29513 }, { "epoch": 0.8616974686870457, "grad_norm": 0.5577546379872321, "learning_rate": 7.683698296836982e-06, "loss": 0.5926, "step": 29514 }, { "epoch": 0.861726664914893, "grad_norm": 0.5232549969777502, "learning_rate": 7.682076236820763e-06, "loss": 0.5665, "step": 29515 }, { "epoch": 0.8617558611427404, "grad_norm": 0.5835591280181516, "learning_rate": 7.680454176804543e-06, "loss": 0.6323, "step": 29516 }, { "epoch": 0.8617850573705877, "grad_norm": 0.5263163584089618, "learning_rate": 7.678832116788321e-06, "loss": 0.5919, "step": 29517 }, { "epoch": 0.8618142535984351, "grad_norm": 0.592950948695386, "learning_rate": 7.677210056772101e-06, "loss": 0.5968, "step": 29518 }, { "epoch": 0.8618434498262825, "grad_norm": 0.5503245561681749, "learning_rate": 7.67558799675588e-06, "loss": 0.6269, "step": 29519 }, { "epoch": 0.8618726460541298, "grad_norm": 0.5419026681201984, "learning_rate": 7.67396593673966e-06, "loss": 0.598, "step": 29520 }, { "epoch": 0.8619018422819772, "grad_norm": 0.528653707401802, "learning_rate": 7.67234387672344e-06, "loss": 0.6027, "step": 29521 }, { "epoch": 0.8619310385098246, "grad_norm": 0.5468829248263702, "learning_rate": 7.670721816707218e-06, "loss": 0.5892, "step": 29522 }, { "epoch": 0.8619602347376719, "grad_norm": 0.5066606124674002, "learning_rate": 7.669099756690998e-06, "loss": 0.5368, "step": 29523 }, { "epoch": 0.8619894309655193, "grad_norm": 0.5131357814664551, "learning_rate": 7.667477696674776e-06, "loss": 0.5289, "step": 29524 }, { "epoch": 0.8620186271933666, "grad_norm": 0.527712961712488, "learning_rate": 7.665855636658558e-06, "loss": 0.5813, "step": 29525 }, { "epoch": 0.862047823421214, "grad_norm": 0.5557996840668109, "learning_rate": 7.664233576642336e-06, "loss": 0.5861, "step": 29526 }, { "epoch": 0.8620770196490614, "grad_norm": 0.5416048776179776, "learning_rate": 7.662611516626115e-06, "loss": 0.6372, "step": 29527 }, { "epoch": 0.8621062158769087, "grad_norm": 0.5484938407296979, "learning_rate": 7.660989456609895e-06, "loss": 0.6491, "step": 29528 }, { "epoch": 0.8621354121047561, "grad_norm": 0.516624311150504, "learning_rate": 7.659367396593673e-06, "loss": 0.5698, "step": 29529 }, { "epoch": 0.8621646083326034, "grad_norm": 0.5036721869760442, "learning_rate": 7.657745336577455e-06, "loss": 0.537, "step": 29530 }, { "epoch": 0.8621938045604508, "grad_norm": 0.8674668414985174, "learning_rate": 7.656123276561233e-06, "loss": 0.6462, "step": 29531 }, { "epoch": 0.8622230007882982, "grad_norm": 0.4996316816889371, "learning_rate": 7.654501216545012e-06, "loss": 0.5336, "step": 29532 }, { "epoch": 0.8622521970161455, "grad_norm": 0.544601141225456, "learning_rate": 7.652879156528792e-06, "loss": 0.6489, "step": 29533 }, { "epoch": 0.8622813932439929, "grad_norm": 0.5035028543979918, "learning_rate": 7.65125709651257e-06, "loss": 0.5139, "step": 29534 }, { "epoch": 0.8623105894718402, "grad_norm": 0.5052434436979509, "learning_rate": 7.649635036496352e-06, "loss": 0.5201, "step": 29535 }, { "epoch": 0.8623397856996876, "grad_norm": 0.5997923072020352, "learning_rate": 7.64801297648013e-06, "loss": 0.6459, "step": 29536 }, { "epoch": 0.862368981927535, "grad_norm": 0.5634945013314786, "learning_rate": 7.646390916463909e-06, "loss": 0.6492, "step": 29537 }, { "epoch": 0.8623981781553823, "grad_norm": 0.5524836753537176, "learning_rate": 7.644768856447689e-06, "loss": 0.6082, "step": 29538 }, { "epoch": 0.8624273743832297, "grad_norm": 0.5056187835618382, "learning_rate": 7.643146796431467e-06, "loss": 0.5786, "step": 29539 }, { "epoch": 0.8624565706110771, "grad_norm": 0.5357269920819637, "learning_rate": 7.641524736415249e-06, "loss": 0.6063, "step": 29540 }, { "epoch": 0.8624857668389244, "grad_norm": 0.5531807515204495, "learning_rate": 7.639902676399027e-06, "loss": 0.6091, "step": 29541 }, { "epoch": 0.8625149630667718, "grad_norm": 0.551109644641137, "learning_rate": 7.638280616382806e-06, "loss": 0.5892, "step": 29542 }, { "epoch": 0.8625441592946191, "grad_norm": 0.4919782939545588, "learning_rate": 7.636658556366586e-06, "loss": 0.5007, "step": 29543 }, { "epoch": 0.8625733555224665, "grad_norm": 0.5323457214566654, "learning_rate": 7.635036496350366e-06, "loss": 0.5666, "step": 29544 }, { "epoch": 0.8626025517503139, "grad_norm": 0.5285172371229587, "learning_rate": 7.633414436334144e-06, "loss": 0.6069, "step": 29545 }, { "epoch": 0.8626317479781612, "grad_norm": 0.544909457411402, "learning_rate": 7.631792376317924e-06, "loss": 0.633, "step": 29546 }, { "epoch": 0.8626609442060086, "grad_norm": 0.4737883897153606, "learning_rate": 7.630170316301703e-06, "loss": 0.4827, "step": 29547 }, { "epoch": 0.862690140433856, "grad_norm": 0.5175723391377253, "learning_rate": 7.628548256285483e-06, "loss": 0.5732, "step": 29548 }, { "epoch": 0.8627193366617033, "grad_norm": 0.5021655897948487, "learning_rate": 7.626926196269263e-06, "loss": 0.554, "step": 29549 }, { "epoch": 0.8627485328895507, "grad_norm": 0.4936665882726704, "learning_rate": 7.625304136253042e-06, "loss": 0.5236, "step": 29550 }, { "epoch": 0.862777729117398, "grad_norm": 0.5199019850196908, "learning_rate": 7.623682076236821e-06, "loss": 0.6071, "step": 29551 }, { "epoch": 0.8628069253452454, "grad_norm": 0.5540537492898836, "learning_rate": 7.6220600162206005e-06, "loss": 0.5735, "step": 29552 }, { "epoch": 0.8628361215730928, "grad_norm": 0.5191720778686546, "learning_rate": 7.62043795620438e-06, "loss": 0.6096, "step": 29553 }, { "epoch": 0.8628653178009401, "grad_norm": 0.5466311791768798, "learning_rate": 7.61881589618816e-06, "loss": 0.5877, "step": 29554 }, { "epoch": 0.8628945140287875, "grad_norm": 0.5231671258921293, "learning_rate": 7.617193836171939e-06, "loss": 0.5985, "step": 29555 }, { "epoch": 0.8629237102566348, "grad_norm": 0.5280249284221135, "learning_rate": 7.615571776155718e-06, "loss": 0.5871, "step": 29556 }, { "epoch": 0.8629529064844822, "grad_norm": 0.553533773681291, "learning_rate": 7.6139497161394974e-06, "loss": 0.6053, "step": 29557 }, { "epoch": 0.8629821027123296, "grad_norm": 0.514667422228258, "learning_rate": 7.6123276561232775e-06, "loss": 0.5828, "step": 29558 }, { "epoch": 0.8630112989401769, "grad_norm": 0.4949181134515729, "learning_rate": 7.610705596107057e-06, "loss": 0.5428, "step": 29559 }, { "epoch": 0.8630404951680243, "grad_norm": 0.5598480880000761, "learning_rate": 7.609083536090836e-06, "loss": 0.6411, "step": 29560 }, { "epoch": 0.8630696913958716, "grad_norm": 0.5517999987366367, "learning_rate": 7.607461476074615e-06, "loss": 0.608, "step": 29561 }, { "epoch": 0.863098887623719, "grad_norm": 0.4996353004016023, "learning_rate": 7.605839416058394e-06, "loss": 0.4925, "step": 29562 }, { "epoch": 0.8631280838515664, "grad_norm": 0.5712109483003566, "learning_rate": 7.6042173560421744e-06, "loss": 0.6109, "step": 29563 }, { "epoch": 0.8631572800794137, "grad_norm": 0.46201532943039153, "learning_rate": 7.602595296025954e-06, "loss": 0.4879, "step": 29564 }, { "epoch": 0.8631864763072611, "grad_norm": 0.4968338870253638, "learning_rate": 7.600973236009733e-06, "loss": 0.5284, "step": 29565 }, { "epoch": 0.8632156725351084, "grad_norm": 0.531423109088615, "learning_rate": 7.599351175993512e-06, "loss": 0.5782, "step": 29566 }, { "epoch": 0.8632448687629558, "grad_norm": 0.49162764144934973, "learning_rate": 7.597729115977291e-06, "loss": 0.4806, "step": 29567 }, { "epoch": 0.8632740649908032, "grad_norm": 0.5552369987443514, "learning_rate": 7.596107055961071e-06, "loss": 0.6438, "step": 29568 }, { "epoch": 0.8633032612186505, "grad_norm": 0.5020180349761041, "learning_rate": 7.594484995944851e-06, "loss": 0.4782, "step": 29569 }, { "epoch": 0.8633324574464979, "grad_norm": 0.47727071129593174, "learning_rate": 7.59286293592863e-06, "loss": 0.4893, "step": 29570 }, { "epoch": 0.8633616536743453, "grad_norm": 0.5237951432770352, "learning_rate": 7.591240875912409e-06, "loss": 0.5028, "step": 29571 }, { "epoch": 0.8633908499021926, "grad_norm": 0.5042800690708314, "learning_rate": 7.5896188158961874e-06, "loss": 0.5568, "step": 29572 }, { "epoch": 0.86342004613004, "grad_norm": 0.48817552085948646, "learning_rate": 7.587996755879968e-06, "loss": 0.4835, "step": 29573 }, { "epoch": 0.8634492423578873, "grad_norm": 0.5286516118615726, "learning_rate": 7.5863746958637476e-06, "loss": 0.5247, "step": 29574 }, { "epoch": 0.8634784385857347, "grad_norm": 0.5796971695920424, "learning_rate": 7.584752635847527e-06, "loss": 0.6799, "step": 29575 }, { "epoch": 0.8635076348135821, "grad_norm": 0.5048207734261831, "learning_rate": 7.583130575831306e-06, "loss": 0.5284, "step": 29576 }, { "epoch": 0.8635368310414294, "grad_norm": 0.5232671698001489, "learning_rate": 7.581508515815086e-06, "loss": 0.5759, "step": 29577 }, { "epoch": 0.8635660272692768, "grad_norm": 0.48820184849609505, "learning_rate": 7.579886455798865e-06, "loss": 0.5337, "step": 29578 }, { "epoch": 0.8635952234971241, "grad_norm": 0.5735860870550185, "learning_rate": 7.5782643957826445e-06, "loss": 0.6249, "step": 29579 }, { "epoch": 0.8636244197249715, "grad_norm": 0.5453396142755612, "learning_rate": 7.576642335766424e-06, "loss": 0.6189, "step": 29580 }, { "epoch": 0.8636536159528189, "grad_norm": 0.565918087757673, "learning_rate": 7.575020275750203e-06, "loss": 0.5887, "step": 29581 }, { "epoch": 0.8636828121806662, "grad_norm": 0.5118423485924367, "learning_rate": 7.573398215733983e-06, "loss": 0.5575, "step": 29582 }, { "epoch": 0.8637120084085136, "grad_norm": 0.5044856424456025, "learning_rate": 7.571776155717762e-06, "loss": 0.5275, "step": 29583 }, { "epoch": 0.863741204636361, "grad_norm": 0.49918947974665273, "learning_rate": 7.5701540957015415e-06, "loss": 0.5169, "step": 29584 }, { "epoch": 0.8637704008642083, "grad_norm": 0.5143099122772848, "learning_rate": 7.568532035685321e-06, "loss": 0.5494, "step": 29585 }, { "epoch": 0.8637995970920557, "grad_norm": 0.5532652672906628, "learning_rate": 7.566909975669099e-06, "loss": 0.6131, "step": 29586 }, { "epoch": 0.863828793319903, "grad_norm": 0.49743083791113446, "learning_rate": 7.56528791565288e-06, "loss": 0.5622, "step": 29587 }, { "epoch": 0.8638579895477504, "grad_norm": 0.49723654577854093, "learning_rate": 7.563665855636659e-06, "loss": 0.5329, "step": 29588 }, { "epoch": 0.8638871857755978, "grad_norm": 0.5301665956421632, "learning_rate": 7.5620437956204384e-06, "loss": 0.5306, "step": 29589 }, { "epoch": 0.8639163820034451, "grad_norm": 0.5665027860975009, "learning_rate": 7.560421735604218e-06, "loss": 0.5839, "step": 29590 }, { "epoch": 0.8639455782312925, "grad_norm": 0.5445281421538872, "learning_rate": 7.558799675587998e-06, "loss": 0.5678, "step": 29591 }, { "epoch": 0.8639747744591398, "grad_norm": 0.4906730737270108, "learning_rate": 7.557177615571777e-06, "loss": 0.5046, "step": 29592 }, { "epoch": 0.8640039706869872, "grad_norm": 0.49963560702550563, "learning_rate": 7.555555555555556e-06, "loss": 0.5341, "step": 29593 }, { "epoch": 0.8640331669148346, "grad_norm": 0.5502491282984512, "learning_rate": 7.553933495539335e-06, "loss": 0.6013, "step": 29594 }, { "epoch": 0.8640623631426819, "grad_norm": 0.5484918653530461, "learning_rate": 7.552311435523115e-06, "loss": 0.596, "step": 29595 }, { "epoch": 0.8640915593705293, "grad_norm": 0.5658293366655196, "learning_rate": 7.550689375506895e-06, "loss": 0.6299, "step": 29596 }, { "epoch": 0.8641207555983766, "grad_norm": 0.49164965713908654, "learning_rate": 7.549067315490674e-06, "loss": 0.5175, "step": 29597 }, { "epoch": 0.864149951826224, "grad_norm": 0.5570976386208276, "learning_rate": 7.547445255474453e-06, "loss": 0.6182, "step": 29598 }, { "epoch": 0.8641791480540714, "grad_norm": 0.4900156271563223, "learning_rate": 7.545823195458232e-06, "loss": 0.4986, "step": 29599 }, { "epoch": 0.8642083442819187, "grad_norm": 0.5601539983486137, "learning_rate": 7.5442011354420115e-06, "loss": 0.6283, "step": 29600 }, { "epoch": 0.8642375405097661, "grad_norm": 0.5111116884323993, "learning_rate": 7.542579075425792e-06, "loss": 0.5709, "step": 29601 }, { "epoch": 0.8642667367376135, "grad_norm": 0.5273117682719072, "learning_rate": 7.540957015409571e-06, "loss": 0.5685, "step": 29602 }, { "epoch": 0.8642959329654608, "grad_norm": 0.52168204914807, "learning_rate": 7.53933495539335e-06, "loss": 0.5749, "step": 29603 }, { "epoch": 0.8643251291933082, "grad_norm": 0.5699719485768185, "learning_rate": 7.537712895377129e-06, "loss": 0.6627, "step": 29604 }, { "epoch": 0.8643543254211555, "grad_norm": 0.5099589431116196, "learning_rate": 7.536090835360908e-06, "loss": 0.533, "step": 29605 }, { "epoch": 0.8643835216490029, "grad_norm": 0.5041007146623687, "learning_rate": 7.5344687753446886e-06, "loss": 0.5015, "step": 29606 }, { "epoch": 0.8644127178768503, "grad_norm": 0.5204131631448696, "learning_rate": 7.532846715328468e-06, "loss": 0.5798, "step": 29607 }, { "epoch": 0.8644419141046977, "grad_norm": 0.47282772098274295, "learning_rate": 7.531224655312247e-06, "loss": 0.4934, "step": 29608 }, { "epoch": 0.8644711103325451, "grad_norm": 0.4967531361739102, "learning_rate": 7.529602595296026e-06, "loss": 0.4959, "step": 29609 }, { "epoch": 0.8645003065603925, "grad_norm": 0.521531288366861, "learning_rate": 7.527980535279806e-06, "loss": 0.6018, "step": 29610 }, { "epoch": 0.8645295027882398, "grad_norm": 0.5048530780415087, "learning_rate": 7.5263584752635855e-06, "loss": 0.5089, "step": 29611 }, { "epoch": 0.8645586990160872, "grad_norm": 0.5051771887880986, "learning_rate": 7.524736415247365e-06, "loss": 0.5378, "step": 29612 }, { "epoch": 0.8645878952439345, "grad_norm": 0.5407628192748263, "learning_rate": 7.523114355231144e-06, "loss": 0.6082, "step": 29613 }, { "epoch": 0.8646170914717819, "grad_norm": 0.5385586667173559, "learning_rate": 7.521492295214923e-06, "loss": 0.6126, "step": 29614 }, { "epoch": 0.8646462876996293, "grad_norm": 0.5447482017094055, "learning_rate": 7.519870235198703e-06, "loss": 0.6419, "step": 29615 }, { "epoch": 0.8646754839274766, "grad_norm": 0.5328871338447325, "learning_rate": 7.5182481751824825e-06, "loss": 0.5956, "step": 29616 }, { "epoch": 0.864704680155324, "grad_norm": 0.5347625348791368, "learning_rate": 7.516626115166262e-06, "loss": 0.6021, "step": 29617 }, { "epoch": 0.8647338763831713, "grad_norm": 0.49646653113609435, "learning_rate": 7.515004055150041e-06, "loss": 0.5015, "step": 29618 }, { "epoch": 0.8647630726110187, "grad_norm": 0.5222248446286667, "learning_rate": 7.513381995133819e-06, "loss": 0.558, "step": 29619 }, { "epoch": 0.8647922688388661, "grad_norm": 0.5450011602480966, "learning_rate": 7.5117599351176e-06, "loss": 0.6397, "step": 29620 }, { "epoch": 0.8648214650667134, "grad_norm": 0.5419808290239447, "learning_rate": 7.5101378751013794e-06, "loss": 0.6262, "step": 29621 }, { "epoch": 0.8648506612945608, "grad_norm": 0.5875731024617771, "learning_rate": 7.508515815085159e-06, "loss": 0.6437, "step": 29622 }, { "epoch": 0.8648798575224081, "grad_norm": 0.5473309084812001, "learning_rate": 7.506893755068938e-06, "loss": 0.6295, "step": 29623 }, { "epoch": 0.8649090537502555, "grad_norm": 0.5158575838688169, "learning_rate": 7.505271695052718e-06, "loss": 0.5732, "step": 29624 }, { "epoch": 0.8649382499781029, "grad_norm": 0.5212810236683647, "learning_rate": 7.503649635036497e-06, "loss": 0.5508, "step": 29625 }, { "epoch": 0.8649674462059502, "grad_norm": 0.5243166993917405, "learning_rate": 7.502027575020276e-06, "loss": 0.5901, "step": 29626 }, { "epoch": 0.8649966424337976, "grad_norm": 0.5462852328777233, "learning_rate": 7.500405515004056e-06, "loss": 0.5528, "step": 29627 }, { "epoch": 0.865025838661645, "grad_norm": 0.5412558891911852, "learning_rate": 7.498783454987835e-06, "loss": 0.5498, "step": 29628 }, { "epoch": 0.8650550348894923, "grad_norm": 0.5638855643489844, "learning_rate": 7.497161394971615e-06, "loss": 0.6707, "step": 29629 }, { "epoch": 0.8650842311173397, "grad_norm": 0.525914705473738, "learning_rate": 7.495539334955394e-06, "loss": 0.5979, "step": 29630 }, { "epoch": 0.865113427345187, "grad_norm": 0.5446492329024588, "learning_rate": 7.493917274939173e-06, "loss": 0.5959, "step": 29631 }, { "epoch": 0.8651426235730344, "grad_norm": 0.4993116011224189, "learning_rate": 7.4922952149229525e-06, "loss": 0.5744, "step": 29632 }, { "epoch": 0.8651718198008818, "grad_norm": 0.5351098435017511, "learning_rate": 7.490673154906731e-06, "loss": 0.6066, "step": 29633 }, { "epoch": 0.8652010160287291, "grad_norm": 0.5533236308397578, "learning_rate": 7.489051094890512e-06, "loss": 0.6077, "step": 29634 }, { "epoch": 0.8652302122565765, "grad_norm": 0.5613144373398454, "learning_rate": 7.487429034874291e-06, "loss": 0.634, "step": 29635 }, { "epoch": 0.8652594084844238, "grad_norm": 0.5655332938796522, "learning_rate": 7.48580697485807e-06, "loss": 0.6314, "step": 29636 }, { "epoch": 0.8652886047122712, "grad_norm": 0.5757856701713501, "learning_rate": 7.4841849148418495e-06, "loss": 0.6172, "step": 29637 }, { "epoch": 0.8653178009401186, "grad_norm": 0.5145610772114555, "learning_rate": 7.482562854825628e-06, "loss": 0.5903, "step": 29638 }, { "epoch": 0.8653469971679659, "grad_norm": 0.5079667547532513, "learning_rate": 7.480940794809409e-06, "loss": 0.5354, "step": 29639 }, { "epoch": 0.8653761933958133, "grad_norm": 0.5041814483899026, "learning_rate": 7.479318734793188e-06, "loss": 0.5414, "step": 29640 }, { "epoch": 0.8654053896236606, "grad_norm": 0.48938513284534524, "learning_rate": 7.477696674776967e-06, "loss": 0.5287, "step": 29641 }, { "epoch": 0.865434585851508, "grad_norm": 0.49641986006924504, "learning_rate": 7.4760746147607465e-06, "loss": 0.4926, "step": 29642 }, { "epoch": 0.8654637820793554, "grad_norm": 0.5354640124793301, "learning_rate": 7.4744525547445265e-06, "loss": 0.5801, "step": 29643 }, { "epoch": 0.8654929783072027, "grad_norm": 0.5792391521204667, "learning_rate": 7.472830494728306e-06, "loss": 0.702, "step": 29644 }, { "epoch": 0.8655221745350501, "grad_norm": 0.5117093781782728, "learning_rate": 7.471208434712085e-06, "loss": 0.5539, "step": 29645 }, { "epoch": 0.8655513707628975, "grad_norm": 0.5169754029360966, "learning_rate": 7.469586374695864e-06, "loss": 0.548, "step": 29646 }, { "epoch": 0.8655805669907448, "grad_norm": 0.5139917126383846, "learning_rate": 7.4679643146796426e-06, "loss": 0.574, "step": 29647 }, { "epoch": 0.8656097632185922, "grad_norm": 0.5307823077104714, "learning_rate": 7.4663422546634235e-06, "loss": 0.6056, "step": 29648 }, { "epoch": 0.8656389594464395, "grad_norm": 0.5078156166251796, "learning_rate": 7.464720194647203e-06, "loss": 0.5422, "step": 29649 }, { "epoch": 0.8656681556742869, "grad_norm": 0.4811838962928319, "learning_rate": 7.463098134630982e-06, "loss": 0.5088, "step": 29650 }, { "epoch": 0.8656973519021343, "grad_norm": 0.5317181104513083, "learning_rate": 7.461476074614761e-06, "loss": 0.5785, "step": 29651 }, { "epoch": 0.8657265481299816, "grad_norm": 0.47577305047517593, "learning_rate": 7.4598540145985395e-06, "loss": 0.5251, "step": 29652 }, { "epoch": 0.865755744357829, "grad_norm": 0.5502008735048192, "learning_rate": 7.4582319545823204e-06, "loss": 0.6097, "step": 29653 }, { "epoch": 0.8657849405856763, "grad_norm": 0.5248297768720145, "learning_rate": 7.4566098945661e-06, "loss": 0.536, "step": 29654 }, { "epoch": 0.8658141368135237, "grad_norm": 0.5072764299750162, "learning_rate": 7.454987834549879e-06, "loss": 0.5115, "step": 29655 }, { "epoch": 0.8658433330413711, "grad_norm": 0.49481751274573815, "learning_rate": 7.453365774533658e-06, "loss": 0.5013, "step": 29656 }, { "epoch": 0.8658725292692184, "grad_norm": 0.5098179299793618, "learning_rate": 7.451743714517438e-06, "loss": 0.5518, "step": 29657 }, { "epoch": 0.8659017254970658, "grad_norm": 0.47914648832799883, "learning_rate": 7.450121654501217e-06, "loss": 0.5138, "step": 29658 }, { "epoch": 0.8659309217249131, "grad_norm": 0.5281400414875106, "learning_rate": 7.448499594484997e-06, "loss": 0.5658, "step": 29659 }, { "epoch": 0.8659601179527605, "grad_norm": 0.5064613047061686, "learning_rate": 7.446877534468776e-06, "loss": 0.512, "step": 29660 }, { "epoch": 0.8659893141806079, "grad_norm": 0.500596361226884, "learning_rate": 7.445255474452554e-06, "loss": 0.5293, "step": 29661 }, { "epoch": 0.8660185104084552, "grad_norm": 0.5498215787368693, "learning_rate": 7.443633414436335e-06, "loss": 0.6522, "step": 29662 }, { "epoch": 0.8660477066363026, "grad_norm": 0.568607798460007, "learning_rate": 7.442011354420114e-06, "loss": 0.596, "step": 29663 }, { "epoch": 0.86607690286415, "grad_norm": 0.5187329230844829, "learning_rate": 7.4403892944038935e-06, "loss": 0.5585, "step": 29664 }, { "epoch": 0.8661060990919973, "grad_norm": 0.49085420693397086, "learning_rate": 7.438767234387673e-06, "loss": 0.5262, "step": 29665 }, { "epoch": 0.8661352953198447, "grad_norm": 0.48437031388674934, "learning_rate": 7.437145174371451e-06, "loss": 0.4649, "step": 29666 }, { "epoch": 0.866164491547692, "grad_norm": 0.5489749635041159, "learning_rate": 7.435523114355232e-06, "loss": 0.5886, "step": 29667 }, { "epoch": 0.8661936877755394, "grad_norm": 0.520215660623312, "learning_rate": 7.433901054339011e-06, "loss": 0.5448, "step": 29668 }, { "epoch": 0.8662228840033868, "grad_norm": 0.5343825005942181, "learning_rate": 7.4322789943227905e-06, "loss": 0.588, "step": 29669 }, { "epoch": 0.8662520802312341, "grad_norm": 0.5002284834192843, "learning_rate": 7.43065693430657e-06, "loss": 0.5117, "step": 29670 }, { "epoch": 0.8662812764590815, "grad_norm": 0.5331552081130967, "learning_rate": 7.429034874290348e-06, "loss": 0.5983, "step": 29671 }, { "epoch": 0.8663104726869288, "grad_norm": 0.5444385159647573, "learning_rate": 7.427412814274129e-06, "loss": 0.5799, "step": 29672 }, { "epoch": 0.8663396689147762, "grad_norm": 0.5318220984618279, "learning_rate": 7.425790754257908e-06, "loss": 0.5899, "step": 29673 }, { "epoch": 0.8663688651426236, "grad_norm": 0.5611519760512241, "learning_rate": 7.4241686942416875e-06, "loss": 0.6094, "step": 29674 }, { "epoch": 0.8663980613704709, "grad_norm": 0.5433787180198458, "learning_rate": 7.422546634225467e-06, "loss": 0.6135, "step": 29675 }, { "epoch": 0.8664272575983183, "grad_norm": 0.5679027919780029, "learning_rate": 7.420924574209247e-06, "loss": 0.6809, "step": 29676 }, { "epoch": 0.8664564538261657, "grad_norm": 0.5251854431569358, "learning_rate": 7.419302514193026e-06, "loss": 0.6107, "step": 29677 }, { "epoch": 0.866485650054013, "grad_norm": 0.5124013548581613, "learning_rate": 7.417680454176805e-06, "loss": 0.5825, "step": 29678 }, { "epoch": 0.8665148462818604, "grad_norm": 0.4793604245543506, "learning_rate": 7.416058394160584e-06, "loss": 0.4813, "step": 29679 }, { "epoch": 0.8665440425097077, "grad_norm": 0.535056691392197, "learning_rate": 7.414436334144363e-06, "loss": 0.6016, "step": 29680 }, { "epoch": 0.8665732387375551, "grad_norm": 0.5188512148657981, "learning_rate": 7.412814274128144e-06, "loss": 0.5495, "step": 29681 }, { "epoch": 0.8666024349654025, "grad_norm": 0.5253271899953161, "learning_rate": 7.411192214111923e-06, "loss": 0.5905, "step": 29682 }, { "epoch": 0.8666316311932498, "grad_norm": 0.5773680324749297, "learning_rate": 7.409570154095702e-06, "loss": 0.6662, "step": 29683 }, { "epoch": 0.8666608274210972, "grad_norm": 0.48932238657601945, "learning_rate": 7.407948094079481e-06, "loss": 0.4924, "step": 29684 }, { "epoch": 0.8666900236489445, "grad_norm": 0.5130597360396563, "learning_rate": 7.40632603406326e-06, "loss": 0.5814, "step": 29685 }, { "epoch": 0.8667192198767919, "grad_norm": 0.5278275669593075, "learning_rate": 7.404703974047041e-06, "loss": 0.5759, "step": 29686 }, { "epoch": 0.8667484161046393, "grad_norm": 0.5151435852849475, "learning_rate": 7.40308191403082e-06, "loss": 0.5777, "step": 29687 }, { "epoch": 0.8667776123324866, "grad_norm": 0.4900372476115543, "learning_rate": 7.401459854014599e-06, "loss": 0.513, "step": 29688 }, { "epoch": 0.866806808560334, "grad_norm": 0.5070490452916565, "learning_rate": 7.399837793998378e-06, "loss": 0.5233, "step": 29689 }, { "epoch": 0.8668360047881813, "grad_norm": 0.4802604919447799, "learning_rate": 7.398215733982157e-06, "loss": 0.485, "step": 29690 }, { "epoch": 0.8668652010160287, "grad_norm": 0.5342202758128664, "learning_rate": 7.396593673965938e-06, "loss": 0.6134, "step": 29691 }, { "epoch": 0.8668943972438761, "grad_norm": 0.5812108947377477, "learning_rate": 7.394971613949717e-06, "loss": 0.695, "step": 29692 }, { "epoch": 0.8669235934717234, "grad_norm": 0.5186967777986666, "learning_rate": 7.393349553933496e-06, "loss": 0.546, "step": 29693 }, { "epoch": 0.8669527896995708, "grad_norm": 0.53515128755423, "learning_rate": 7.391727493917274e-06, "loss": 0.6265, "step": 29694 }, { "epoch": 0.8669819859274182, "grad_norm": 0.5309234789321026, "learning_rate": 7.390105433901055e-06, "loss": 0.5354, "step": 29695 }, { "epoch": 0.8670111821552655, "grad_norm": 0.5335270133566254, "learning_rate": 7.3884833738848345e-06, "loss": 0.5484, "step": 29696 }, { "epoch": 0.8670403783831129, "grad_norm": 0.5235184734336148, "learning_rate": 7.386861313868614e-06, "loss": 0.5896, "step": 29697 }, { "epoch": 0.8670695746109602, "grad_norm": 0.5446484900951909, "learning_rate": 7.385239253852393e-06, "loss": 0.6029, "step": 29698 }, { "epoch": 0.8670987708388076, "grad_norm": 0.5250205234632853, "learning_rate": 7.383617193836171e-06, "loss": 0.5781, "step": 29699 }, { "epoch": 0.867127967066655, "grad_norm": 0.5378676054645842, "learning_rate": 7.381995133819952e-06, "loss": 0.5952, "step": 29700 }, { "epoch": 0.8671571632945023, "grad_norm": 0.4858320323116818, "learning_rate": 7.3803730738037315e-06, "loss": 0.5004, "step": 29701 }, { "epoch": 0.8671863595223497, "grad_norm": 0.4875130544128768, "learning_rate": 7.378751013787511e-06, "loss": 0.523, "step": 29702 }, { "epoch": 0.867215555750197, "grad_norm": 0.507671018199678, "learning_rate": 7.37712895377129e-06, "loss": 0.5329, "step": 29703 }, { "epoch": 0.8672447519780444, "grad_norm": 0.5122829974836729, "learning_rate": 7.375506893755068e-06, "loss": 0.5626, "step": 29704 }, { "epoch": 0.8672739482058918, "grad_norm": 0.525565255891751, "learning_rate": 7.373884833738849e-06, "loss": 0.5736, "step": 29705 }, { "epoch": 0.8673031444337391, "grad_norm": 0.5181151919720846, "learning_rate": 7.3722627737226285e-06, "loss": 0.6056, "step": 29706 }, { "epoch": 0.8673323406615865, "grad_norm": 0.5100133579516505, "learning_rate": 7.370640713706408e-06, "loss": 0.5756, "step": 29707 }, { "epoch": 0.8673615368894338, "grad_norm": 0.5380835806173028, "learning_rate": 7.369018653690186e-06, "loss": 0.5798, "step": 29708 }, { "epoch": 0.8673907331172812, "grad_norm": 0.5021459782170015, "learning_rate": 7.367396593673967e-06, "loss": 0.5404, "step": 29709 }, { "epoch": 0.8674199293451286, "grad_norm": 0.5637433181005598, "learning_rate": 7.365774533657746e-06, "loss": 0.6172, "step": 29710 }, { "epoch": 0.8674491255729759, "grad_norm": 0.5386967106692612, "learning_rate": 7.364152473641525e-06, "loss": 0.5832, "step": 29711 }, { "epoch": 0.8674783218008233, "grad_norm": 0.5453769214974605, "learning_rate": 7.362530413625305e-06, "loss": 0.604, "step": 29712 }, { "epoch": 0.8675075180286707, "grad_norm": 0.5202194920961725, "learning_rate": 7.360908353609083e-06, "loss": 0.5552, "step": 29713 }, { "epoch": 0.867536714256518, "grad_norm": 0.4655871239453158, "learning_rate": 7.359286293592864e-06, "loss": 0.4574, "step": 29714 }, { "epoch": 0.8675659104843654, "grad_norm": 0.5137999184682952, "learning_rate": 7.357664233576643e-06, "loss": 0.5457, "step": 29715 }, { "epoch": 0.8675951067122127, "grad_norm": 0.48179109004857906, "learning_rate": 7.356042173560422e-06, "loss": 0.475, "step": 29716 }, { "epoch": 0.8676243029400601, "grad_norm": 0.5567873498970428, "learning_rate": 7.3544201135442016e-06, "loss": 0.6278, "step": 29717 }, { "epoch": 0.8676534991679075, "grad_norm": 0.5244599221439098, "learning_rate": 7.35279805352798e-06, "loss": 0.5655, "step": 29718 }, { "epoch": 0.8676826953957548, "grad_norm": 0.4841937344339192, "learning_rate": 7.351175993511761e-06, "loss": 0.4618, "step": 29719 }, { "epoch": 0.8677118916236022, "grad_norm": 0.5164549517901793, "learning_rate": 7.34955393349554e-06, "loss": 0.5778, "step": 29720 }, { "epoch": 0.8677410878514495, "grad_norm": 0.5160645879884677, "learning_rate": 7.347931873479319e-06, "loss": 0.5561, "step": 29721 }, { "epoch": 0.8677702840792969, "grad_norm": 0.5028443939574816, "learning_rate": 7.346309813463098e-06, "loss": 0.5181, "step": 29722 }, { "epoch": 0.8677994803071443, "grad_norm": 0.5221538583697084, "learning_rate": 7.344687753446877e-06, "loss": 0.5747, "step": 29723 }, { "epoch": 0.8678286765349916, "grad_norm": 0.5484606554291138, "learning_rate": 7.343065693430658e-06, "loss": 0.6405, "step": 29724 }, { "epoch": 0.867857872762839, "grad_norm": 0.5816387391346599, "learning_rate": 7.341443633414437e-06, "loss": 0.6351, "step": 29725 }, { "epoch": 0.8678870689906864, "grad_norm": 0.5892569858408626, "learning_rate": 7.339821573398216e-06, "loss": 0.5901, "step": 29726 }, { "epoch": 0.8679162652185337, "grad_norm": 0.5410944416471584, "learning_rate": 7.338199513381995e-06, "loss": 0.5653, "step": 29727 }, { "epoch": 0.8679454614463811, "grad_norm": 0.5235924730351375, "learning_rate": 7.3365774533657755e-06, "loss": 0.5505, "step": 29728 }, { "epoch": 0.8679746576742285, "grad_norm": 0.5001414005814082, "learning_rate": 7.334955393349555e-06, "loss": 0.5282, "step": 29729 }, { "epoch": 0.8680038539020759, "grad_norm": 0.54050107239734, "learning_rate": 7.333333333333334e-06, "loss": 0.5836, "step": 29730 }, { "epoch": 0.8680330501299233, "grad_norm": 0.5398919069679409, "learning_rate": 7.331711273317113e-06, "loss": 0.6073, "step": 29731 }, { "epoch": 0.8680622463577706, "grad_norm": 0.513686830234418, "learning_rate": 7.330089213300892e-06, "loss": 0.5602, "step": 29732 }, { "epoch": 0.868091442585618, "grad_norm": 0.5971058308666689, "learning_rate": 7.3284671532846725e-06, "loss": 0.648, "step": 29733 }, { "epoch": 0.8681206388134654, "grad_norm": 0.5096715769184867, "learning_rate": 7.326845093268452e-06, "loss": 0.5201, "step": 29734 }, { "epoch": 0.8681498350413127, "grad_norm": 0.5149612815302124, "learning_rate": 7.325223033252231e-06, "loss": 0.5568, "step": 29735 }, { "epoch": 0.8681790312691601, "grad_norm": 0.4872862428221717, "learning_rate": 7.32360097323601e-06, "loss": 0.4813, "step": 29736 }, { "epoch": 0.8682082274970074, "grad_norm": 0.5057043348115611, "learning_rate": 7.3219789132197885e-06, "loss": 0.4992, "step": 29737 }, { "epoch": 0.8682374237248548, "grad_norm": 0.5391852912153651, "learning_rate": 7.3203568532035695e-06, "loss": 0.5875, "step": 29738 }, { "epoch": 0.8682666199527022, "grad_norm": 0.5366594461658909, "learning_rate": 7.318734793187349e-06, "loss": 0.5867, "step": 29739 }, { "epoch": 0.8682958161805495, "grad_norm": 0.49930479479345885, "learning_rate": 7.317112733171128e-06, "loss": 0.5287, "step": 29740 }, { "epoch": 0.8683250124083969, "grad_norm": 0.5319483463956958, "learning_rate": 7.315490673154906e-06, "loss": 0.5443, "step": 29741 }, { "epoch": 0.8683542086362442, "grad_norm": 0.5037638566692868, "learning_rate": 7.313868613138687e-06, "loss": 0.5308, "step": 29742 }, { "epoch": 0.8683834048640916, "grad_norm": 0.5690455484268486, "learning_rate": 7.312246553122466e-06, "loss": 0.6613, "step": 29743 }, { "epoch": 0.868412601091939, "grad_norm": 0.5168645813970745, "learning_rate": 7.310624493106246e-06, "loss": 0.5493, "step": 29744 }, { "epoch": 0.8684417973197863, "grad_norm": 0.5043507088331562, "learning_rate": 7.309002433090025e-06, "loss": 0.5393, "step": 29745 }, { "epoch": 0.8684709935476337, "grad_norm": 0.485712824243658, "learning_rate": 7.307380373073803e-06, "loss": 0.49, "step": 29746 }, { "epoch": 0.868500189775481, "grad_norm": 0.5258123511325129, "learning_rate": 7.305758313057584e-06, "loss": 0.5821, "step": 29747 }, { "epoch": 0.8685293860033284, "grad_norm": 0.518792547389132, "learning_rate": 7.304136253041363e-06, "loss": 0.557, "step": 29748 }, { "epoch": 0.8685585822311758, "grad_norm": 0.5035060944079366, "learning_rate": 7.3025141930251426e-06, "loss": 0.5191, "step": 29749 }, { "epoch": 0.8685877784590231, "grad_norm": 0.484591223865894, "learning_rate": 7.300892133008922e-06, "loss": 0.5248, "step": 29750 }, { "epoch": 0.8686169746868705, "grad_norm": 0.4964548269718034, "learning_rate": 7.2992700729927e-06, "loss": 0.5195, "step": 29751 }, { "epoch": 0.8686461709147179, "grad_norm": 0.5307942622660297, "learning_rate": 7.297648012976481e-06, "loss": 0.6484, "step": 29752 }, { "epoch": 0.8686753671425652, "grad_norm": 0.5440896324134772, "learning_rate": 7.29602595296026e-06, "loss": 0.5674, "step": 29753 }, { "epoch": 0.8687045633704126, "grad_norm": 0.5464831826489082, "learning_rate": 7.2944038929440395e-06, "loss": 0.6146, "step": 29754 }, { "epoch": 0.8687337595982599, "grad_norm": 0.5565972899279776, "learning_rate": 7.292781832927818e-06, "loss": 0.6406, "step": 29755 }, { "epoch": 0.8687629558261073, "grad_norm": 0.49264662925812164, "learning_rate": 7.291159772911597e-06, "loss": 0.5178, "step": 29756 }, { "epoch": 0.8687921520539547, "grad_norm": 0.5377884541648319, "learning_rate": 7.289537712895378e-06, "loss": 0.5788, "step": 29757 }, { "epoch": 0.868821348281802, "grad_norm": 0.5607449152144872, "learning_rate": 7.287915652879157e-06, "loss": 0.635, "step": 29758 }, { "epoch": 0.8688505445096494, "grad_norm": 0.5477079508430792, "learning_rate": 7.2862935928629365e-06, "loss": 0.6423, "step": 29759 }, { "epoch": 0.8688797407374967, "grad_norm": 0.5330049967755492, "learning_rate": 7.284671532846715e-06, "loss": 0.5863, "step": 29760 }, { "epoch": 0.8689089369653441, "grad_norm": 0.5769322598768842, "learning_rate": 7.283049472830496e-06, "loss": 0.6786, "step": 29761 }, { "epoch": 0.8689381331931915, "grad_norm": 0.5941596318076872, "learning_rate": 7.281427412814275e-06, "loss": 0.7298, "step": 29762 }, { "epoch": 0.8689673294210388, "grad_norm": 0.5474581742036816, "learning_rate": 7.279805352798054e-06, "loss": 0.6055, "step": 29763 }, { "epoch": 0.8689965256488862, "grad_norm": 0.5622055137815017, "learning_rate": 7.2781832927818334e-06, "loss": 0.6157, "step": 29764 }, { "epoch": 0.8690257218767335, "grad_norm": 0.5234281456328425, "learning_rate": 7.276561232765612e-06, "loss": 0.5507, "step": 29765 }, { "epoch": 0.8690549181045809, "grad_norm": 0.5035231384952753, "learning_rate": 7.274939172749393e-06, "loss": 0.5065, "step": 29766 }, { "epoch": 0.8690841143324283, "grad_norm": 0.5238954644733913, "learning_rate": 7.273317112733172e-06, "loss": 0.5629, "step": 29767 }, { "epoch": 0.8691133105602756, "grad_norm": 0.510105787117781, "learning_rate": 7.271695052716951e-06, "loss": 0.573, "step": 29768 }, { "epoch": 0.869142506788123, "grad_norm": 0.5569498674081722, "learning_rate": 7.2700729927007295e-06, "loss": 0.6361, "step": 29769 }, { "epoch": 0.8691717030159704, "grad_norm": 0.5378784945950371, "learning_rate": 7.268450932684509e-06, "loss": 0.6068, "step": 29770 }, { "epoch": 0.8692008992438177, "grad_norm": 0.5587463193306677, "learning_rate": 7.26682887266829e-06, "loss": 0.6748, "step": 29771 }, { "epoch": 0.8692300954716651, "grad_norm": 0.5030981034140546, "learning_rate": 7.265206812652069e-06, "loss": 0.5273, "step": 29772 }, { "epoch": 0.8692592916995124, "grad_norm": 0.5192129884281095, "learning_rate": 7.263584752635848e-06, "loss": 0.5615, "step": 29773 }, { "epoch": 0.8692884879273598, "grad_norm": 0.5399611272962173, "learning_rate": 7.2619626926196265e-06, "loss": 0.6086, "step": 29774 }, { "epoch": 0.8693176841552072, "grad_norm": 0.5295649583648955, "learning_rate": 7.260340632603407e-06, "loss": 0.5861, "step": 29775 }, { "epoch": 0.8693468803830545, "grad_norm": 0.5102335295716888, "learning_rate": 7.258718572587187e-06, "loss": 0.5409, "step": 29776 }, { "epoch": 0.8693760766109019, "grad_norm": 0.47638930561712567, "learning_rate": 7.257096512570966e-06, "loss": 0.4672, "step": 29777 }, { "epoch": 0.8694052728387492, "grad_norm": 0.5373470718409106, "learning_rate": 7.255474452554745e-06, "loss": 0.5688, "step": 29778 }, { "epoch": 0.8694344690665966, "grad_norm": 0.508871215071403, "learning_rate": 7.2538523925385234e-06, "loss": 0.5549, "step": 29779 }, { "epoch": 0.869463665294444, "grad_norm": 0.5054716604053169, "learning_rate": 7.252230332522304e-06, "loss": 0.5308, "step": 29780 }, { "epoch": 0.8694928615222913, "grad_norm": 0.5078391986289295, "learning_rate": 7.2506082725060836e-06, "loss": 0.5435, "step": 29781 }, { "epoch": 0.8695220577501387, "grad_norm": 0.535353573241643, "learning_rate": 7.248986212489863e-06, "loss": 0.5937, "step": 29782 }, { "epoch": 0.869551253977986, "grad_norm": 0.5448762474323264, "learning_rate": 7.247364152473641e-06, "loss": 0.6035, "step": 29783 }, { "epoch": 0.8695804502058334, "grad_norm": 0.5700758848599045, "learning_rate": 7.24574209245742e-06, "loss": 0.6658, "step": 29784 }, { "epoch": 0.8696096464336808, "grad_norm": 0.5285886495107023, "learning_rate": 7.244120032441201e-06, "loss": 0.5784, "step": 29785 }, { "epoch": 0.8696388426615281, "grad_norm": 0.5588873247194114, "learning_rate": 7.2424979724249805e-06, "loss": 0.6686, "step": 29786 }, { "epoch": 0.8696680388893755, "grad_norm": 0.5279136297804913, "learning_rate": 7.24087591240876e-06, "loss": 0.5778, "step": 29787 }, { "epoch": 0.8696972351172229, "grad_norm": 0.5487091726520559, "learning_rate": 7.239253852392538e-06, "loss": 0.6311, "step": 29788 }, { "epoch": 0.8697264313450702, "grad_norm": 0.5644372174693371, "learning_rate": 7.237631792376317e-06, "loss": 0.6875, "step": 29789 }, { "epoch": 0.8697556275729176, "grad_norm": 0.5084023839618464, "learning_rate": 7.236009732360098e-06, "loss": 0.5581, "step": 29790 }, { "epoch": 0.8697848238007649, "grad_norm": 0.5544139299912096, "learning_rate": 7.2343876723438775e-06, "loss": 0.6614, "step": 29791 }, { "epoch": 0.8698140200286123, "grad_norm": 0.5025094973018809, "learning_rate": 7.232765612327657e-06, "loss": 0.5365, "step": 29792 }, { "epoch": 0.8698432162564597, "grad_norm": 0.4930185083837984, "learning_rate": 7.231143552311435e-06, "loss": 0.5172, "step": 29793 }, { "epoch": 0.869872412484307, "grad_norm": 0.5323642457228377, "learning_rate": 7.229521492295216e-06, "loss": 0.5672, "step": 29794 }, { "epoch": 0.8699016087121544, "grad_norm": 0.5065899026800359, "learning_rate": 7.227899432278995e-06, "loss": 0.5615, "step": 29795 }, { "epoch": 0.8699308049400017, "grad_norm": 0.5438189149004479, "learning_rate": 7.2262773722627744e-06, "loss": 0.5718, "step": 29796 }, { "epoch": 0.8699600011678491, "grad_norm": 0.531687697293776, "learning_rate": 7.224655312246554e-06, "loss": 0.5857, "step": 29797 }, { "epoch": 0.8699891973956965, "grad_norm": 0.5575347180940968, "learning_rate": 7.223033252230332e-06, "loss": 0.5904, "step": 29798 }, { "epoch": 0.8700183936235438, "grad_norm": 0.5540054581583211, "learning_rate": 7.221411192214113e-06, "loss": 0.6712, "step": 29799 }, { "epoch": 0.8700475898513912, "grad_norm": 0.5161016900836423, "learning_rate": 7.219789132197892e-06, "loss": 0.5721, "step": 29800 }, { "epoch": 0.8700767860792386, "grad_norm": 0.5683807476818209, "learning_rate": 7.218167072181671e-06, "loss": 0.6558, "step": 29801 }, { "epoch": 0.8701059823070859, "grad_norm": 0.5268854881077414, "learning_rate": 7.21654501216545e-06, "loss": 0.6069, "step": 29802 }, { "epoch": 0.8701351785349333, "grad_norm": 0.4862745990647789, "learning_rate": 7.214922952149229e-06, "loss": 0.477, "step": 29803 }, { "epoch": 0.8701643747627806, "grad_norm": 0.4969307003284496, "learning_rate": 7.21330089213301e-06, "loss": 0.5498, "step": 29804 }, { "epoch": 0.870193570990628, "grad_norm": 0.5608149706780441, "learning_rate": 7.211678832116789e-06, "loss": 0.6586, "step": 29805 }, { "epoch": 0.8702227672184754, "grad_norm": 0.5357111577429913, "learning_rate": 7.210056772100568e-06, "loss": 0.584, "step": 29806 }, { "epoch": 0.8702519634463227, "grad_norm": 0.5462666198148347, "learning_rate": 7.208434712084347e-06, "loss": 0.6428, "step": 29807 }, { "epoch": 0.8702811596741701, "grad_norm": 0.4998713697544838, "learning_rate": 7.206812652068128e-06, "loss": 0.5343, "step": 29808 }, { "epoch": 0.8703103559020174, "grad_norm": 0.5463792714534444, "learning_rate": 7.205190592051907e-06, "loss": 0.6259, "step": 29809 }, { "epoch": 0.8703395521298648, "grad_norm": 0.5534491941227462, "learning_rate": 7.203568532035686e-06, "loss": 0.618, "step": 29810 }, { "epoch": 0.8703687483577122, "grad_norm": 0.5373599715501437, "learning_rate": 7.201946472019465e-06, "loss": 0.5985, "step": 29811 }, { "epoch": 0.8703979445855595, "grad_norm": 0.5152992419461214, "learning_rate": 7.200324412003244e-06, "loss": 0.5939, "step": 29812 }, { "epoch": 0.8704271408134069, "grad_norm": 0.5217364169595381, "learning_rate": 7.1987023519870246e-06, "loss": 0.5768, "step": 29813 }, { "epoch": 0.8704563370412542, "grad_norm": 0.4779748219353944, "learning_rate": 7.197080291970804e-06, "loss": 0.4972, "step": 29814 }, { "epoch": 0.8704855332691016, "grad_norm": 0.5406537581519297, "learning_rate": 7.195458231954583e-06, "loss": 0.5764, "step": 29815 }, { "epoch": 0.870514729496949, "grad_norm": 0.5532392081320444, "learning_rate": 7.193836171938361e-06, "loss": 0.5988, "step": 29816 }, { "epoch": 0.8705439257247963, "grad_norm": 0.534386449778038, "learning_rate": 7.192214111922141e-06, "loss": 0.5327, "step": 29817 }, { "epoch": 0.8705731219526437, "grad_norm": 0.5162569841819994, "learning_rate": 7.1905920519059215e-06, "loss": 0.5339, "step": 29818 }, { "epoch": 0.870602318180491, "grad_norm": 0.5200730556968683, "learning_rate": 7.188969991889701e-06, "loss": 0.5689, "step": 29819 }, { "epoch": 0.8706315144083384, "grad_norm": 0.5303154122170832, "learning_rate": 7.18734793187348e-06, "loss": 0.5777, "step": 29820 }, { "epoch": 0.8706607106361858, "grad_norm": 0.5420966276917324, "learning_rate": 7.185725871857258e-06, "loss": 0.5789, "step": 29821 }, { "epoch": 0.8706899068640331, "grad_norm": 0.5274780864706953, "learning_rate": 7.1841038118410376e-06, "loss": 0.5589, "step": 29822 }, { "epoch": 0.8707191030918805, "grad_norm": 0.5371476118892483, "learning_rate": 7.1824817518248185e-06, "loss": 0.6265, "step": 29823 }, { "epoch": 0.8707482993197279, "grad_norm": 0.48602225777011504, "learning_rate": 7.180859691808598e-06, "loss": 0.5102, "step": 29824 }, { "epoch": 0.8707774955475752, "grad_norm": 0.5259084847088412, "learning_rate": 7.179237631792377e-06, "loss": 0.5023, "step": 29825 }, { "epoch": 0.8708066917754226, "grad_norm": 0.562004149840066, "learning_rate": 7.177615571776155e-06, "loss": 0.5458, "step": 29826 }, { "epoch": 0.8708358880032699, "grad_norm": 0.5312769165056779, "learning_rate": 7.175993511759936e-06, "loss": 0.5715, "step": 29827 }, { "epoch": 0.8708650842311173, "grad_norm": 0.5287270910377303, "learning_rate": 7.1743714517437154e-06, "loss": 0.5828, "step": 29828 }, { "epoch": 0.8708942804589647, "grad_norm": 0.5477826939960037, "learning_rate": 7.172749391727495e-06, "loss": 0.5918, "step": 29829 }, { "epoch": 0.870923476686812, "grad_norm": 0.5396765898284306, "learning_rate": 7.171127331711273e-06, "loss": 0.6165, "step": 29830 }, { "epoch": 0.8709526729146594, "grad_norm": 0.537648002806291, "learning_rate": 7.169505271695052e-06, "loss": 0.5998, "step": 29831 }, { "epoch": 0.8709818691425067, "grad_norm": 0.5370757502057824, "learning_rate": 7.167883211678833e-06, "loss": 0.6201, "step": 29832 }, { "epoch": 0.8710110653703541, "grad_norm": 0.5253993490041929, "learning_rate": 7.166261151662612e-06, "loss": 0.5433, "step": 29833 }, { "epoch": 0.8710402615982015, "grad_norm": 0.5033778585745144, "learning_rate": 7.164639091646392e-06, "loss": 0.5289, "step": 29834 }, { "epoch": 0.8710694578260488, "grad_norm": 0.5256027783480307, "learning_rate": 7.16301703163017e-06, "loss": 0.5661, "step": 29835 }, { "epoch": 0.8710986540538962, "grad_norm": 0.5255839982441561, "learning_rate": 7.161394971613949e-06, "loss": 0.5676, "step": 29836 }, { "epoch": 0.8711278502817436, "grad_norm": 0.4765527461733057, "learning_rate": 7.15977291159773e-06, "loss": 0.4808, "step": 29837 }, { "epoch": 0.8711570465095909, "grad_norm": 0.5048391920063887, "learning_rate": 7.158150851581509e-06, "loss": 0.5328, "step": 29838 }, { "epoch": 0.8711862427374383, "grad_norm": 0.5381639449391123, "learning_rate": 7.1565287915652886e-06, "loss": 0.6338, "step": 29839 }, { "epoch": 0.8712154389652856, "grad_norm": 0.5447808310075091, "learning_rate": 7.154906731549067e-06, "loss": 0.5874, "step": 29840 }, { "epoch": 0.871244635193133, "grad_norm": 0.555257881925274, "learning_rate": 7.153284671532846e-06, "loss": 0.5652, "step": 29841 }, { "epoch": 0.8712738314209804, "grad_norm": 0.5451153708675643, "learning_rate": 7.151662611516627e-06, "loss": 0.5772, "step": 29842 }, { "epoch": 0.8713030276488277, "grad_norm": 0.5246408908969926, "learning_rate": 7.150040551500406e-06, "loss": 0.5565, "step": 29843 }, { "epoch": 0.8713322238766751, "grad_norm": 0.5189315457199932, "learning_rate": 7.148418491484185e-06, "loss": 0.5579, "step": 29844 }, { "epoch": 0.8713614201045224, "grad_norm": 0.5463882101904345, "learning_rate": 7.146796431467964e-06, "loss": 0.6338, "step": 29845 }, { "epoch": 0.8713906163323698, "grad_norm": 0.47865961918961036, "learning_rate": 7.145174371451745e-06, "loss": 0.4975, "step": 29846 }, { "epoch": 0.8714198125602172, "grad_norm": 0.5177319010120742, "learning_rate": 7.143552311435524e-06, "loss": 0.5485, "step": 29847 }, { "epoch": 0.8714490087880645, "grad_norm": 0.5118716374896453, "learning_rate": 7.141930251419303e-06, "loss": 0.527, "step": 29848 }, { "epoch": 0.8714782050159119, "grad_norm": 0.5639725004284181, "learning_rate": 7.140308191403082e-06, "loss": 0.6582, "step": 29849 }, { "epoch": 0.8715074012437594, "grad_norm": 0.49814478931132455, "learning_rate": 7.138686131386861e-06, "loss": 0.5027, "step": 29850 }, { "epoch": 0.8715365974716067, "grad_norm": 0.5475894167636133, "learning_rate": 7.137064071370642e-06, "loss": 0.6632, "step": 29851 }, { "epoch": 0.8715657936994541, "grad_norm": 0.4925978536530158, "learning_rate": 7.135442011354421e-06, "loss": 0.5247, "step": 29852 }, { "epoch": 0.8715949899273014, "grad_norm": 0.5029278658295876, "learning_rate": 7.1338199513382e-06, "loss": 0.54, "step": 29853 }, { "epoch": 0.8716241861551488, "grad_norm": 0.4929134661189882, "learning_rate": 7.1321978913219786e-06, "loss": 0.4953, "step": 29854 }, { "epoch": 0.8716533823829962, "grad_norm": 0.5296737094936397, "learning_rate": 7.130575831305758e-06, "loss": 0.6242, "step": 29855 }, { "epoch": 0.8716825786108435, "grad_norm": 0.5706727758751111, "learning_rate": 7.128953771289539e-06, "loss": 0.6461, "step": 29856 }, { "epoch": 0.8717117748386909, "grad_norm": 0.5164841053110489, "learning_rate": 7.127331711273318e-06, "loss": 0.5602, "step": 29857 }, { "epoch": 0.8717409710665383, "grad_norm": 0.5158510994415617, "learning_rate": 7.125709651257097e-06, "loss": 0.5891, "step": 29858 }, { "epoch": 0.8717701672943856, "grad_norm": 0.5246291043560516, "learning_rate": 7.1240875912408755e-06, "loss": 0.558, "step": 29859 }, { "epoch": 0.871799363522233, "grad_norm": 0.5236693960502717, "learning_rate": 7.1224655312246564e-06, "loss": 0.5809, "step": 29860 }, { "epoch": 0.8718285597500803, "grad_norm": 0.5340057069821648, "learning_rate": 7.120843471208436e-06, "loss": 0.5722, "step": 29861 }, { "epoch": 0.8718577559779277, "grad_norm": 0.5280319607958655, "learning_rate": 7.119221411192215e-06, "loss": 0.5735, "step": 29862 }, { "epoch": 0.8718869522057751, "grad_norm": 0.48848883223985484, "learning_rate": 7.117599351175993e-06, "loss": 0.4998, "step": 29863 }, { "epoch": 0.8719161484336224, "grad_norm": 0.4799226408816998, "learning_rate": 7.1159772911597725e-06, "loss": 0.4833, "step": 29864 }, { "epoch": 0.8719453446614698, "grad_norm": 0.5562743883874847, "learning_rate": 7.114355231143553e-06, "loss": 0.611, "step": 29865 }, { "epoch": 0.8719745408893171, "grad_norm": 0.5216578381963258, "learning_rate": 7.112733171127333e-06, "loss": 0.5835, "step": 29866 }, { "epoch": 0.8720037371171645, "grad_norm": 0.5018016293064439, "learning_rate": 7.111111111111112e-06, "loss": 0.547, "step": 29867 }, { "epoch": 0.8720329333450119, "grad_norm": 0.5405732578752704, "learning_rate": 7.10948905109489e-06, "loss": 0.5605, "step": 29868 }, { "epoch": 0.8720621295728592, "grad_norm": 0.5345818523475263, "learning_rate": 7.107866991078669e-06, "loss": 0.5651, "step": 29869 }, { "epoch": 0.8720913258007066, "grad_norm": 0.5611354037219589, "learning_rate": 7.10624493106245e-06, "loss": 0.6514, "step": 29870 }, { "epoch": 0.872120522028554, "grad_norm": 0.5277375097476558, "learning_rate": 7.1046228710462296e-06, "loss": 0.6149, "step": 29871 }, { "epoch": 0.8721497182564013, "grad_norm": 0.5313254189321139, "learning_rate": 7.103000811030009e-06, "loss": 0.611, "step": 29872 }, { "epoch": 0.8721789144842487, "grad_norm": 0.5029145220367239, "learning_rate": 7.101378751013787e-06, "loss": 0.537, "step": 29873 }, { "epoch": 0.872208110712096, "grad_norm": 0.5141931537721858, "learning_rate": 7.099756690997566e-06, "loss": 0.5158, "step": 29874 }, { "epoch": 0.8722373069399434, "grad_norm": 0.5268457322086747, "learning_rate": 7.098134630981347e-06, "loss": 0.5763, "step": 29875 }, { "epoch": 0.8722665031677908, "grad_norm": 0.4849482340921718, "learning_rate": 7.0965125709651265e-06, "loss": 0.4811, "step": 29876 }, { "epoch": 0.8722956993956381, "grad_norm": 0.5395388919269059, "learning_rate": 7.094890510948905e-06, "loss": 0.5838, "step": 29877 }, { "epoch": 0.8723248956234855, "grad_norm": 0.5285683848649542, "learning_rate": 7.093268450932684e-06, "loss": 0.5435, "step": 29878 }, { "epoch": 0.8723540918513328, "grad_norm": 0.545616882759585, "learning_rate": 7.091646390916465e-06, "loss": 0.6423, "step": 29879 }, { "epoch": 0.8723832880791802, "grad_norm": 0.5476870206206708, "learning_rate": 7.090024330900244e-06, "loss": 0.6351, "step": 29880 }, { "epoch": 0.8724124843070276, "grad_norm": 0.5202381731912946, "learning_rate": 7.0884022708840235e-06, "loss": 0.5606, "step": 29881 }, { "epoch": 0.8724416805348749, "grad_norm": 0.5686691172353944, "learning_rate": 7.086780210867802e-06, "loss": 0.6922, "step": 29882 }, { "epoch": 0.8724708767627223, "grad_norm": 0.9962939529502921, "learning_rate": 7.085158150851581e-06, "loss": 0.7317, "step": 29883 }, { "epoch": 0.8725000729905696, "grad_norm": 0.5279287364967519, "learning_rate": 7.083536090835362e-06, "loss": 0.5882, "step": 29884 }, { "epoch": 0.872529269218417, "grad_norm": 0.5087177699037332, "learning_rate": 7.081914030819141e-06, "loss": 0.5481, "step": 29885 }, { "epoch": 0.8725584654462644, "grad_norm": 0.522250024691769, "learning_rate": 7.08029197080292e-06, "loss": 0.593, "step": 29886 }, { "epoch": 0.8725876616741117, "grad_norm": 0.4896008830172477, "learning_rate": 7.078669910786699e-06, "loss": 0.5005, "step": 29887 }, { "epoch": 0.8726168579019591, "grad_norm": 0.5253909343332736, "learning_rate": 7.077047850770478e-06, "loss": 0.5657, "step": 29888 }, { "epoch": 0.8726460541298064, "grad_norm": 0.5552267526444555, "learning_rate": 7.075425790754259e-06, "loss": 0.6098, "step": 29889 }, { "epoch": 0.8726752503576538, "grad_norm": 0.5508668523082683, "learning_rate": 7.073803730738038e-06, "loss": 0.6021, "step": 29890 }, { "epoch": 0.8727044465855012, "grad_norm": 0.5589293009336973, "learning_rate": 7.0721816707218165e-06, "loss": 0.6695, "step": 29891 }, { "epoch": 0.8727336428133485, "grad_norm": 0.5307983860262154, "learning_rate": 7.070559610705596e-06, "loss": 0.5851, "step": 29892 }, { "epoch": 0.8727628390411959, "grad_norm": 0.5444071593090491, "learning_rate": 7.068937550689377e-06, "loss": 0.5812, "step": 29893 }, { "epoch": 0.8727920352690433, "grad_norm": 0.4970173107766931, "learning_rate": 7.067315490673156e-06, "loss": 0.5396, "step": 29894 }, { "epoch": 0.8728212314968906, "grad_norm": 0.5513612077683393, "learning_rate": 7.065693430656935e-06, "loss": 0.6634, "step": 29895 }, { "epoch": 0.872850427724738, "grad_norm": 0.5345233833802315, "learning_rate": 7.0640713706407135e-06, "loss": 0.5779, "step": 29896 }, { "epoch": 0.8728796239525853, "grad_norm": 0.5269355734841206, "learning_rate": 7.062449310624493e-06, "loss": 0.562, "step": 29897 }, { "epoch": 0.8729088201804327, "grad_norm": 0.5910136585076359, "learning_rate": 7.060827250608274e-06, "loss": 0.6062, "step": 29898 }, { "epoch": 0.8729380164082801, "grad_norm": 0.48412158118517207, "learning_rate": 7.059205190592053e-06, "loss": 0.4957, "step": 29899 }, { "epoch": 0.8729672126361274, "grad_norm": 0.5113579173382082, "learning_rate": 7.057583130575832e-06, "loss": 0.5346, "step": 29900 }, { "epoch": 0.8729964088639748, "grad_norm": 0.5458266820068989, "learning_rate": 7.05596107055961e-06, "loss": 0.6584, "step": 29901 }, { "epoch": 0.8730256050918221, "grad_norm": 0.5557713049458927, "learning_rate": 7.05433901054339e-06, "loss": 0.6505, "step": 29902 }, { "epoch": 0.8730548013196695, "grad_norm": 0.5265267284902967, "learning_rate": 7.0527169505271706e-06, "loss": 0.5567, "step": 29903 }, { "epoch": 0.8730839975475169, "grad_norm": 0.48583020873084326, "learning_rate": 7.05109489051095e-06, "loss": 0.5098, "step": 29904 }, { "epoch": 0.8731131937753642, "grad_norm": 0.5362145707988998, "learning_rate": 7.049472830494728e-06, "loss": 0.5464, "step": 29905 }, { "epoch": 0.8731423900032116, "grad_norm": 0.5413384726164009, "learning_rate": 7.047850770478507e-06, "loss": 0.6422, "step": 29906 }, { "epoch": 0.873171586231059, "grad_norm": 0.5272428619163635, "learning_rate": 7.046228710462287e-06, "loss": 0.5469, "step": 29907 }, { "epoch": 0.8732007824589063, "grad_norm": 0.5617149451279, "learning_rate": 7.0446066504460675e-06, "loss": 0.6848, "step": 29908 }, { "epoch": 0.8732299786867537, "grad_norm": 0.5508586869110584, "learning_rate": 7.042984590429847e-06, "loss": 0.648, "step": 29909 }, { "epoch": 0.873259174914601, "grad_norm": 0.5292984684435679, "learning_rate": 7.041362530413625e-06, "loss": 0.5997, "step": 29910 }, { "epoch": 0.8732883711424484, "grad_norm": 0.5718803609244935, "learning_rate": 7.039740470397404e-06, "loss": 0.681, "step": 29911 }, { "epoch": 0.8733175673702958, "grad_norm": 0.5432155631801702, "learning_rate": 7.038118410381185e-06, "loss": 0.6405, "step": 29912 }, { "epoch": 0.8733467635981431, "grad_norm": 0.5302667480043661, "learning_rate": 7.0364963503649645e-06, "loss": 0.5858, "step": 29913 }, { "epoch": 0.8733759598259905, "grad_norm": 0.508948000565189, "learning_rate": 7.034874290348744e-06, "loss": 0.5444, "step": 29914 }, { "epoch": 0.8734051560538378, "grad_norm": 0.5296267723544378, "learning_rate": 7.033252230332522e-06, "loss": 0.5918, "step": 29915 }, { "epoch": 0.8734343522816852, "grad_norm": 0.5075995344031339, "learning_rate": 7.031630170316301e-06, "loss": 0.5518, "step": 29916 }, { "epoch": 0.8734635485095326, "grad_norm": 0.5016405322525194, "learning_rate": 7.030008110300082e-06, "loss": 0.5513, "step": 29917 }, { "epoch": 0.8734927447373799, "grad_norm": 0.5037173879331746, "learning_rate": 7.028386050283861e-06, "loss": 0.5066, "step": 29918 }, { "epoch": 0.8735219409652273, "grad_norm": 0.5334845336930397, "learning_rate": 7.02676399026764e-06, "loss": 0.5804, "step": 29919 }, { "epoch": 0.8735511371930746, "grad_norm": 0.5226643289184173, "learning_rate": 7.025141930251419e-06, "loss": 0.5628, "step": 29920 }, { "epoch": 0.873580333420922, "grad_norm": 0.5119447300404832, "learning_rate": 7.023519870235198e-06, "loss": 0.5491, "step": 29921 }, { "epoch": 0.8736095296487694, "grad_norm": 0.5278379524260245, "learning_rate": 7.021897810218979e-06, "loss": 0.6154, "step": 29922 }, { "epoch": 0.8736387258766167, "grad_norm": 0.5277548895973256, "learning_rate": 7.020275750202758e-06, "loss": 0.5748, "step": 29923 }, { "epoch": 0.8736679221044641, "grad_norm": 0.509491416030634, "learning_rate": 7.018653690186537e-06, "loss": 0.5051, "step": 29924 }, { "epoch": 0.8736971183323115, "grad_norm": 0.5171302414057777, "learning_rate": 7.017031630170316e-06, "loss": 0.5521, "step": 29925 }, { "epoch": 0.8737263145601588, "grad_norm": 0.4977864201727187, "learning_rate": 7.015409570154097e-06, "loss": 0.5361, "step": 29926 }, { "epoch": 0.8737555107880062, "grad_norm": 0.489096028882352, "learning_rate": 7.013787510137876e-06, "loss": 0.4756, "step": 29927 }, { "epoch": 0.8737847070158535, "grad_norm": 0.5289986585046607, "learning_rate": 7.012165450121655e-06, "loss": 0.5395, "step": 29928 }, { "epoch": 0.8738139032437009, "grad_norm": 0.4729663226739985, "learning_rate": 7.010543390105434e-06, "loss": 0.4876, "step": 29929 }, { "epoch": 0.8738430994715483, "grad_norm": 0.5030758749256106, "learning_rate": 7.008921330089213e-06, "loss": 0.5302, "step": 29930 }, { "epoch": 0.8738722956993956, "grad_norm": 0.5182406738371189, "learning_rate": 7.007299270072994e-06, "loss": 0.5682, "step": 29931 }, { "epoch": 0.873901491927243, "grad_norm": 0.535580912589829, "learning_rate": 7.005677210056773e-06, "loss": 0.564, "step": 29932 }, { "epoch": 0.8739306881550903, "grad_norm": 0.5036557113960892, "learning_rate": 7.004055150040552e-06, "loss": 0.5388, "step": 29933 }, { "epoch": 0.8739598843829377, "grad_norm": 0.5479844566362193, "learning_rate": 7.002433090024331e-06, "loss": 0.6284, "step": 29934 }, { "epoch": 0.8739890806107851, "grad_norm": 0.5167697570727591, "learning_rate": 7.00081103000811e-06, "loss": 0.5543, "step": 29935 }, { "epoch": 0.8740182768386324, "grad_norm": 0.5107091722971904, "learning_rate": 6.999188969991891e-06, "loss": 0.5701, "step": 29936 }, { "epoch": 0.8740474730664798, "grad_norm": 0.5393595579041802, "learning_rate": 6.99756690997567e-06, "loss": 0.5643, "step": 29937 }, { "epoch": 0.8740766692943271, "grad_norm": 0.5192224034211145, "learning_rate": 6.995944849959448e-06, "loss": 0.5896, "step": 29938 }, { "epoch": 0.8741058655221745, "grad_norm": 0.5580339585359553, "learning_rate": 6.994322789943228e-06, "loss": 0.6554, "step": 29939 }, { "epoch": 0.8741350617500219, "grad_norm": 0.566144489077929, "learning_rate": 6.992700729927007e-06, "loss": 0.608, "step": 29940 }, { "epoch": 0.8741642579778692, "grad_norm": 0.4718525666503613, "learning_rate": 6.991078669910788e-06, "loss": 0.4838, "step": 29941 }, { "epoch": 0.8741934542057166, "grad_norm": 0.4753244712649652, "learning_rate": 6.989456609894567e-06, "loss": 0.4871, "step": 29942 }, { "epoch": 0.874222650433564, "grad_norm": 0.49093126783284347, "learning_rate": 6.987834549878345e-06, "loss": 0.5066, "step": 29943 }, { "epoch": 0.8742518466614113, "grad_norm": 0.4901435115132926, "learning_rate": 6.9862124898621245e-06, "loss": 0.4959, "step": 29944 }, { "epoch": 0.8742810428892587, "grad_norm": 0.5378472662773967, "learning_rate": 6.9845904298459055e-06, "loss": 0.587, "step": 29945 }, { "epoch": 0.874310239117106, "grad_norm": 0.49568886801146383, "learning_rate": 6.982968369829685e-06, "loss": 0.5701, "step": 29946 }, { "epoch": 0.8743394353449534, "grad_norm": 0.4904514006619329, "learning_rate": 6.981346309813464e-06, "loss": 0.4916, "step": 29947 }, { "epoch": 0.8743686315728008, "grad_norm": 0.5232428945178088, "learning_rate": 6.979724249797242e-06, "loss": 0.5495, "step": 29948 }, { "epoch": 0.8743978278006481, "grad_norm": 0.5112332156148344, "learning_rate": 6.9781021897810215e-06, "loss": 0.5665, "step": 29949 }, { "epoch": 0.8744270240284955, "grad_norm": 0.4970531097756257, "learning_rate": 6.976480129764802e-06, "loss": 0.5328, "step": 29950 }, { "epoch": 0.8744562202563428, "grad_norm": 0.5253626792116499, "learning_rate": 6.974858069748582e-06, "loss": 0.5681, "step": 29951 }, { "epoch": 0.8744854164841902, "grad_norm": 0.5232222067169912, "learning_rate": 6.97323600973236e-06, "loss": 0.5813, "step": 29952 }, { "epoch": 0.8745146127120376, "grad_norm": 0.5265295083117368, "learning_rate": 6.971613949716139e-06, "loss": 0.5706, "step": 29953 }, { "epoch": 0.8745438089398849, "grad_norm": 0.5189221019395432, "learning_rate": 6.9699918896999184e-06, "loss": 0.5645, "step": 29954 }, { "epoch": 0.8745730051677323, "grad_norm": 0.5537191224893725, "learning_rate": 6.968369829683699e-06, "loss": 0.6126, "step": 29955 }, { "epoch": 0.8746022013955796, "grad_norm": 0.5393534632980774, "learning_rate": 6.966747769667479e-06, "loss": 0.5991, "step": 29956 }, { "epoch": 0.874631397623427, "grad_norm": 0.5123906347403236, "learning_rate": 6.965125709651257e-06, "loss": 0.559, "step": 29957 }, { "epoch": 0.8746605938512744, "grad_norm": 0.5318213788843665, "learning_rate": 6.963503649635036e-06, "loss": 0.5872, "step": 29958 }, { "epoch": 0.8746897900791217, "grad_norm": 0.5063578714892003, "learning_rate": 6.961881589618815e-06, "loss": 0.5362, "step": 29959 }, { "epoch": 0.8747189863069691, "grad_norm": 0.5438954758004686, "learning_rate": 6.960259529602596e-06, "loss": 0.5929, "step": 29960 }, { "epoch": 0.8747481825348165, "grad_norm": 0.4349357947253276, "learning_rate": 6.9586374695863755e-06, "loss": 0.3726, "step": 29961 }, { "epoch": 0.8747773787626638, "grad_norm": 0.5230726984316819, "learning_rate": 6.957015409570154e-06, "loss": 0.5601, "step": 29962 }, { "epoch": 0.8748065749905112, "grad_norm": 0.5362941360687373, "learning_rate": 6.955393349553933e-06, "loss": 0.5707, "step": 29963 }, { "epoch": 0.8748357712183585, "grad_norm": 0.5100833417516595, "learning_rate": 6.953771289537714e-06, "loss": 0.5738, "step": 29964 }, { "epoch": 0.8748649674462059, "grad_norm": 0.5631833130470429, "learning_rate": 6.952149229521493e-06, "loss": 0.6242, "step": 29965 }, { "epoch": 0.8748941636740533, "grad_norm": 0.5334882820952765, "learning_rate": 6.950527169505272e-06, "loss": 0.5891, "step": 29966 }, { "epoch": 0.8749233599019006, "grad_norm": 0.4943775028561619, "learning_rate": 6.948905109489051e-06, "loss": 0.5267, "step": 29967 }, { "epoch": 0.874952556129748, "grad_norm": 0.5221276561067244, "learning_rate": 6.94728304947283e-06, "loss": 0.5726, "step": 29968 }, { "epoch": 0.8749817523575953, "grad_norm": 0.5382719825648764, "learning_rate": 6.945660989456611e-06, "loss": 0.5923, "step": 29969 }, { "epoch": 0.8750109485854428, "grad_norm": 0.5188310038403882, "learning_rate": 6.94403892944039e-06, "loss": 0.5638, "step": 29970 }, { "epoch": 0.8750401448132902, "grad_norm": 0.5642139140310357, "learning_rate": 6.942416869424169e-06, "loss": 0.5922, "step": 29971 }, { "epoch": 0.8750693410411375, "grad_norm": 0.510582872371923, "learning_rate": 6.940794809407948e-06, "loss": 0.552, "step": 29972 }, { "epoch": 0.8750985372689849, "grad_norm": 0.5252887751038713, "learning_rate": 6.939172749391727e-06, "loss": 0.5941, "step": 29973 }, { "epoch": 0.8751277334968323, "grad_norm": 0.5567304186696242, "learning_rate": 6.937550689375508e-06, "loss": 0.6309, "step": 29974 }, { "epoch": 0.8751569297246796, "grad_norm": 0.5394589506091764, "learning_rate": 6.935928629359287e-06, "loss": 0.611, "step": 29975 }, { "epoch": 0.875186125952527, "grad_norm": 0.5511692223677543, "learning_rate": 6.9343065693430655e-06, "loss": 0.6452, "step": 29976 }, { "epoch": 0.8752153221803743, "grad_norm": 0.5451326498903817, "learning_rate": 6.932684509326845e-06, "loss": 0.6356, "step": 29977 }, { "epoch": 0.8752445184082217, "grad_norm": 0.523638280732332, "learning_rate": 6.931062449310626e-06, "loss": 0.518, "step": 29978 }, { "epoch": 0.8752737146360691, "grad_norm": 0.5169192642174437, "learning_rate": 6.929440389294405e-06, "loss": 0.5238, "step": 29979 }, { "epoch": 0.8753029108639164, "grad_norm": 0.4860569837084927, "learning_rate": 6.927818329278183e-06, "loss": 0.5069, "step": 29980 }, { "epoch": 0.8753321070917638, "grad_norm": 0.5016825650277096, "learning_rate": 6.9261962692619625e-06, "loss": 0.5479, "step": 29981 }, { "epoch": 0.8753613033196112, "grad_norm": 0.5471596298917175, "learning_rate": 6.924574209245742e-06, "loss": 0.597, "step": 29982 }, { "epoch": 0.8753904995474585, "grad_norm": 0.5209314354445461, "learning_rate": 6.922952149229523e-06, "loss": 0.6185, "step": 29983 }, { "epoch": 0.8754196957753059, "grad_norm": 0.5369585459647395, "learning_rate": 6.921330089213302e-06, "loss": 0.5734, "step": 29984 }, { "epoch": 0.8754488920031532, "grad_norm": 0.562730149236093, "learning_rate": 6.91970802919708e-06, "loss": 0.6528, "step": 29985 }, { "epoch": 0.8754780882310006, "grad_norm": 0.5496559397085082, "learning_rate": 6.9180859691808594e-06, "loss": 0.5767, "step": 29986 }, { "epoch": 0.875507284458848, "grad_norm": 0.49150364049974726, "learning_rate": 6.916463909164639e-06, "loss": 0.4754, "step": 29987 }, { "epoch": 0.8755364806866953, "grad_norm": 0.5383651619765113, "learning_rate": 6.91484184914842e-06, "loss": 0.6441, "step": 29988 }, { "epoch": 0.8755656769145427, "grad_norm": 0.4980950496280905, "learning_rate": 6.913219789132199e-06, "loss": 0.5057, "step": 29989 }, { "epoch": 0.87559487314239, "grad_norm": 0.5175281489909916, "learning_rate": 6.911597729115977e-06, "loss": 0.5766, "step": 29990 }, { "epoch": 0.8756240693702374, "grad_norm": 0.5119355538937326, "learning_rate": 6.909975669099756e-06, "loss": 0.553, "step": 29991 }, { "epoch": 0.8756532655980848, "grad_norm": 0.5225556794404772, "learning_rate": 6.908353609083536e-06, "loss": 0.5834, "step": 29992 }, { "epoch": 0.8756824618259321, "grad_norm": 0.5458045721853487, "learning_rate": 6.9067315490673165e-06, "loss": 0.5759, "step": 29993 }, { "epoch": 0.8757116580537795, "grad_norm": 0.5044201309749355, "learning_rate": 6.905109489051096e-06, "loss": 0.5294, "step": 29994 }, { "epoch": 0.8757408542816268, "grad_norm": 0.5416334075852349, "learning_rate": 6.903487429034874e-06, "loss": 0.5799, "step": 29995 }, { "epoch": 0.8757700505094742, "grad_norm": 0.4842034190600774, "learning_rate": 6.901865369018653e-06, "loss": 0.4911, "step": 29996 }, { "epoch": 0.8757992467373216, "grad_norm": 0.6117247613994036, "learning_rate": 6.900243309002434e-06, "loss": 0.5461, "step": 29997 }, { "epoch": 0.8758284429651689, "grad_norm": 0.5317377572519131, "learning_rate": 6.8986212489862135e-06, "loss": 0.5883, "step": 29998 }, { "epoch": 0.8758576391930163, "grad_norm": 0.5164500783936895, "learning_rate": 6.896999188969992e-06, "loss": 0.5823, "step": 29999 }, { "epoch": 0.8758868354208637, "grad_norm": 1.787004891090687, "learning_rate": 6.895377128953771e-06, "loss": 0.5917, "step": 30000 }, { "epoch": 0.875916031648711, "grad_norm": 0.513606801754141, "learning_rate": 6.89375506893755e-06, "loss": 0.5518, "step": 30001 }, { "epoch": 0.8759452278765584, "grad_norm": 0.5485886794020886, "learning_rate": 6.892133008921331e-06, "loss": 0.5943, "step": 30002 }, { "epoch": 0.8759744241044057, "grad_norm": 0.5381043598809334, "learning_rate": 6.8905109489051104e-06, "loss": 0.5868, "step": 30003 }, { "epoch": 0.8760036203322531, "grad_norm": 0.5588994944005062, "learning_rate": 6.888888888888889e-06, "loss": 0.5679, "step": 30004 }, { "epoch": 0.8760328165601005, "grad_norm": 0.5443349391235701, "learning_rate": 6.887266828872668e-06, "loss": 0.6321, "step": 30005 }, { "epoch": 0.8760620127879478, "grad_norm": 0.5293553544514691, "learning_rate": 6.885644768856447e-06, "loss": 0.5779, "step": 30006 }, { "epoch": 0.8760912090157952, "grad_norm": 0.5195342610902726, "learning_rate": 6.884022708840228e-06, "loss": 0.5883, "step": 30007 }, { "epoch": 0.8761204052436425, "grad_norm": 0.5847844234066345, "learning_rate": 6.882400648824007e-06, "loss": 0.6766, "step": 30008 }, { "epoch": 0.8761496014714899, "grad_norm": 0.5753591755724884, "learning_rate": 6.880778588807786e-06, "loss": 0.652, "step": 30009 }, { "epoch": 0.8761787976993373, "grad_norm": 0.5182548341690715, "learning_rate": 6.879156528791565e-06, "loss": 0.5483, "step": 30010 }, { "epoch": 0.8762079939271846, "grad_norm": 0.5633143525003844, "learning_rate": 6.877534468775346e-06, "loss": 0.6557, "step": 30011 }, { "epoch": 0.876237190155032, "grad_norm": 0.5512691441168843, "learning_rate": 6.875912408759125e-06, "loss": 0.6454, "step": 30012 }, { "epoch": 0.8762663863828793, "grad_norm": 0.5255037727199828, "learning_rate": 6.8742903487429035e-06, "loss": 0.5962, "step": 30013 }, { "epoch": 0.8762955826107267, "grad_norm": 0.5312741957662268, "learning_rate": 6.872668288726683e-06, "loss": 0.5945, "step": 30014 }, { "epoch": 0.8763247788385741, "grad_norm": 0.4675312285860971, "learning_rate": 6.871046228710462e-06, "loss": 0.4886, "step": 30015 }, { "epoch": 0.8763539750664214, "grad_norm": 0.5139015868287127, "learning_rate": 6.869424168694243e-06, "loss": 0.601, "step": 30016 }, { "epoch": 0.8763831712942688, "grad_norm": 0.5371915439094191, "learning_rate": 6.867802108678022e-06, "loss": 0.5573, "step": 30017 }, { "epoch": 0.8764123675221162, "grad_norm": 0.5560119748270749, "learning_rate": 6.8661800486618004e-06, "loss": 0.6268, "step": 30018 }, { "epoch": 0.8764415637499635, "grad_norm": 0.48530732402229404, "learning_rate": 6.86455798864558e-06, "loss": 0.4741, "step": 30019 }, { "epoch": 0.8764707599778109, "grad_norm": 0.5850148834906054, "learning_rate": 6.862935928629359e-06, "loss": 0.6862, "step": 30020 }, { "epoch": 0.8764999562056582, "grad_norm": 0.534798738953248, "learning_rate": 6.86131386861314e-06, "loss": 0.5938, "step": 30021 }, { "epoch": 0.8765291524335056, "grad_norm": 0.5452262847235766, "learning_rate": 6.859691808596919e-06, "loss": 0.6169, "step": 30022 }, { "epoch": 0.876558348661353, "grad_norm": 0.5105254699502646, "learning_rate": 6.858069748580697e-06, "loss": 0.5548, "step": 30023 }, { "epoch": 0.8765875448892003, "grad_norm": 0.4783255312092336, "learning_rate": 6.856447688564477e-06, "loss": 0.5088, "step": 30024 }, { "epoch": 0.8766167411170477, "grad_norm": 0.5316537392392585, "learning_rate": 6.854825628548256e-06, "loss": 0.5669, "step": 30025 }, { "epoch": 0.876645937344895, "grad_norm": 0.5242929902583754, "learning_rate": 6.853203568532037e-06, "loss": 0.5349, "step": 30026 }, { "epoch": 0.8766751335727424, "grad_norm": 0.5063810036378725, "learning_rate": 6.851581508515815e-06, "loss": 0.5228, "step": 30027 }, { "epoch": 0.8767043298005898, "grad_norm": 0.5057705516059039, "learning_rate": 6.849959448499594e-06, "loss": 0.5696, "step": 30028 }, { "epoch": 0.8767335260284371, "grad_norm": 0.5543159611015566, "learning_rate": 6.8483373884833736e-06, "loss": 0.5877, "step": 30029 }, { "epoch": 0.8767627222562845, "grad_norm": 0.5156790304972102, "learning_rate": 6.8467153284671545e-06, "loss": 0.5803, "step": 30030 }, { "epoch": 0.8767919184841318, "grad_norm": 0.5600351071576556, "learning_rate": 6.845093268450934e-06, "loss": 0.6764, "step": 30031 }, { "epoch": 0.8768211147119792, "grad_norm": 0.5571051524698334, "learning_rate": 6.843471208434712e-06, "loss": 0.6807, "step": 30032 }, { "epoch": 0.8768503109398266, "grad_norm": 0.5569278919055968, "learning_rate": 6.841849148418491e-06, "loss": 0.6107, "step": 30033 }, { "epoch": 0.8768795071676739, "grad_norm": 0.5273412261382907, "learning_rate": 6.8402270884022705e-06, "loss": 0.5721, "step": 30034 }, { "epoch": 0.8769087033955213, "grad_norm": 0.4931955554295923, "learning_rate": 6.8386050283860514e-06, "loss": 0.5107, "step": 30035 }, { "epoch": 0.8769378996233687, "grad_norm": 0.5196878933836503, "learning_rate": 6.836982968369831e-06, "loss": 0.564, "step": 30036 }, { "epoch": 0.876967095851216, "grad_norm": 0.524391615104345, "learning_rate": 6.835360908353609e-06, "loss": 0.6001, "step": 30037 }, { "epoch": 0.8769962920790634, "grad_norm": 0.5168145825707763, "learning_rate": 6.833738848337388e-06, "loss": 0.5549, "step": 30038 }, { "epoch": 0.8770254883069107, "grad_norm": 0.5284698573061328, "learning_rate": 6.8321167883211675e-06, "loss": 0.5669, "step": 30039 }, { "epoch": 0.8770546845347581, "grad_norm": 0.48520661877154986, "learning_rate": 6.830494728304948e-06, "loss": 0.4722, "step": 30040 }, { "epoch": 0.8770838807626055, "grad_norm": 0.6054184695907373, "learning_rate": 6.828872668288727e-06, "loss": 0.5944, "step": 30041 }, { "epoch": 0.8771130769904528, "grad_norm": 0.4971452548913386, "learning_rate": 6.827250608272506e-06, "loss": 0.5454, "step": 30042 }, { "epoch": 0.8771422732183002, "grad_norm": 0.5492119318155754, "learning_rate": 6.825628548256285e-06, "loss": 0.6011, "step": 30043 }, { "epoch": 0.8771714694461475, "grad_norm": 0.5039603540365876, "learning_rate": 6.824006488240066e-06, "loss": 0.5192, "step": 30044 }, { "epoch": 0.8772006656739949, "grad_norm": 0.555403020561254, "learning_rate": 6.822384428223845e-06, "loss": 0.6166, "step": 30045 }, { "epoch": 0.8772298619018423, "grad_norm": 0.5069827202158497, "learning_rate": 6.820762368207624e-06, "loss": 0.5411, "step": 30046 }, { "epoch": 0.8772590581296896, "grad_norm": 0.5074685460562023, "learning_rate": 6.819140308191403e-06, "loss": 0.5438, "step": 30047 }, { "epoch": 0.877288254357537, "grad_norm": 0.5143246399026393, "learning_rate": 6.817518248175182e-06, "loss": 0.5627, "step": 30048 }, { "epoch": 0.8773174505853844, "grad_norm": 0.5418193528842804, "learning_rate": 6.815896188158963e-06, "loss": 0.6538, "step": 30049 }, { "epoch": 0.8773466468132317, "grad_norm": 0.521664966618833, "learning_rate": 6.814274128142742e-06, "loss": 0.5695, "step": 30050 }, { "epoch": 0.8773758430410791, "grad_norm": 0.4882480267442194, "learning_rate": 6.812652068126521e-06, "loss": 0.5215, "step": 30051 }, { "epoch": 0.8774050392689264, "grad_norm": 0.5130713077179074, "learning_rate": 6.8110300081103e-06, "loss": 0.5704, "step": 30052 }, { "epoch": 0.8774342354967738, "grad_norm": 0.5509765610045323, "learning_rate": 6.809407948094079e-06, "loss": 0.6612, "step": 30053 }, { "epoch": 0.8774634317246212, "grad_norm": 0.5267100530053848, "learning_rate": 6.80778588807786e-06, "loss": 0.5781, "step": 30054 }, { "epoch": 0.8774926279524685, "grad_norm": 0.4926924135205717, "learning_rate": 6.806163828061639e-06, "loss": 0.5178, "step": 30055 }, { "epoch": 0.8775218241803159, "grad_norm": 0.5224978180173937, "learning_rate": 6.804541768045418e-06, "loss": 0.5711, "step": 30056 }, { "epoch": 0.8775510204081632, "grad_norm": 0.5254207255497093, "learning_rate": 6.802919708029197e-06, "loss": 0.5471, "step": 30057 }, { "epoch": 0.8775802166360106, "grad_norm": 0.5536519468689104, "learning_rate": 6.801297648012976e-06, "loss": 0.5673, "step": 30058 }, { "epoch": 0.877609412863858, "grad_norm": 0.527670418268298, "learning_rate": 6.799675587996757e-06, "loss": 0.5432, "step": 30059 }, { "epoch": 0.8776386090917053, "grad_norm": 0.5148378708418415, "learning_rate": 6.798053527980535e-06, "loss": 0.5263, "step": 30060 }, { "epoch": 0.8776678053195527, "grad_norm": 0.5593021205838252, "learning_rate": 6.7964314679643146e-06, "loss": 0.6513, "step": 30061 }, { "epoch": 0.8776970015474, "grad_norm": 0.5048440139709367, "learning_rate": 6.794809407948094e-06, "loss": 0.556, "step": 30062 }, { "epoch": 0.8777261977752474, "grad_norm": 0.5270199533633506, "learning_rate": 6.793187347931875e-06, "loss": 0.5622, "step": 30063 }, { "epoch": 0.8777553940030948, "grad_norm": 0.5490722664004514, "learning_rate": 6.791565287915654e-06, "loss": 0.5934, "step": 30064 }, { "epoch": 0.8777845902309421, "grad_norm": 0.5036772349178373, "learning_rate": 6.789943227899432e-06, "loss": 0.5431, "step": 30065 }, { "epoch": 0.8778137864587895, "grad_norm": 0.5183937304323258, "learning_rate": 6.7883211678832115e-06, "loss": 0.5328, "step": 30066 }, { "epoch": 0.8778429826866369, "grad_norm": 0.562728071700349, "learning_rate": 6.786699107866991e-06, "loss": 0.6734, "step": 30067 }, { "epoch": 0.8778721789144842, "grad_norm": 0.5359777400594962, "learning_rate": 6.785077047850772e-06, "loss": 0.6042, "step": 30068 }, { "epoch": 0.8779013751423316, "grad_norm": 0.4821471792218361, "learning_rate": 6.783454987834551e-06, "loss": 0.5094, "step": 30069 }, { "epoch": 0.8779305713701789, "grad_norm": 0.5231882967387272, "learning_rate": 6.781832927818329e-06, "loss": 0.5657, "step": 30070 }, { "epoch": 0.8779597675980263, "grad_norm": 0.550644324349996, "learning_rate": 6.7802108678021085e-06, "loss": 0.6116, "step": 30071 }, { "epoch": 0.8779889638258737, "grad_norm": 0.587506968368195, "learning_rate": 6.778588807785888e-06, "loss": 0.6961, "step": 30072 }, { "epoch": 0.878018160053721, "grad_norm": 0.5020478543275696, "learning_rate": 6.776966747769669e-06, "loss": 0.535, "step": 30073 }, { "epoch": 0.8780473562815684, "grad_norm": 0.5182830169921823, "learning_rate": 6.775344687753447e-06, "loss": 0.5259, "step": 30074 }, { "epoch": 0.8780765525094157, "grad_norm": 0.5148415277641937, "learning_rate": 6.773722627737226e-06, "loss": 0.6014, "step": 30075 }, { "epoch": 0.8781057487372631, "grad_norm": 0.522526096524447, "learning_rate": 6.7721005677210054e-06, "loss": 0.5658, "step": 30076 }, { "epoch": 0.8781349449651105, "grad_norm": 0.5413920225727512, "learning_rate": 6.770478507704786e-06, "loss": 0.6085, "step": 30077 }, { "epoch": 0.8781641411929578, "grad_norm": 0.5200983577601257, "learning_rate": 6.7688564476885656e-06, "loss": 0.5791, "step": 30078 }, { "epoch": 0.8781933374208052, "grad_norm": 0.5285343465073461, "learning_rate": 6.767234387672344e-06, "loss": 0.5755, "step": 30079 }, { "epoch": 0.8782225336486525, "grad_norm": 0.5169296358149006, "learning_rate": 6.765612327656123e-06, "loss": 0.5604, "step": 30080 }, { "epoch": 0.8782517298764999, "grad_norm": 0.5282917283418885, "learning_rate": 6.763990267639902e-06, "loss": 0.5874, "step": 30081 }, { "epoch": 0.8782809261043473, "grad_norm": 0.5096368838753249, "learning_rate": 6.762368207623683e-06, "loss": 0.5557, "step": 30082 }, { "epoch": 0.8783101223321946, "grad_norm": 0.5112776878173452, "learning_rate": 6.7607461476074625e-06, "loss": 0.4795, "step": 30083 }, { "epoch": 0.878339318560042, "grad_norm": 0.4790669713079259, "learning_rate": 6.759124087591241e-06, "loss": 0.4996, "step": 30084 }, { "epoch": 0.8783685147878894, "grad_norm": 0.5578236892654727, "learning_rate": 6.75750202757502e-06, "loss": 0.6312, "step": 30085 }, { "epoch": 0.8783977110157367, "grad_norm": 0.5748429605758874, "learning_rate": 6.755879967558799e-06, "loss": 0.6097, "step": 30086 }, { "epoch": 0.8784269072435841, "grad_norm": 0.5118616781745664, "learning_rate": 6.75425790754258e-06, "loss": 0.5428, "step": 30087 }, { "epoch": 0.8784561034714314, "grad_norm": 0.52178019892769, "learning_rate": 6.752635847526359e-06, "loss": 0.5927, "step": 30088 }, { "epoch": 0.8784852996992788, "grad_norm": 0.5371545834403203, "learning_rate": 6.751013787510138e-06, "loss": 0.5399, "step": 30089 }, { "epoch": 0.8785144959271262, "grad_norm": 0.5069595847841152, "learning_rate": 6.749391727493917e-06, "loss": 0.5224, "step": 30090 }, { "epoch": 0.8785436921549736, "grad_norm": 0.5405488781072773, "learning_rate": 6.747769667477696e-06, "loss": 0.6086, "step": 30091 }, { "epoch": 0.878572888382821, "grad_norm": 0.48682478701552445, "learning_rate": 6.746147607461477e-06, "loss": 0.5309, "step": 30092 }, { "epoch": 0.8786020846106684, "grad_norm": 0.514995924209925, "learning_rate": 6.7445255474452556e-06, "loss": 0.5314, "step": 30093 }, { "epoch": 0.8786312808385157, "grad_norm": 0.5379117219647591, "learning_rate": 6.742903487429035e-06, "loss": 0.5638, "step": 30094 }, { "epoch": 0.8786604770663631, "grad_norm": 0.5454338378314004, "learning_rate": 6.741281427412814e-06, "loss": 0.6028, "step": 30095 }, { "epoch": 0.8786896732942104, "grad_norm": 0.5865550779592511, "learning_rate": 6.739659367396595e-06, "loss": 0.6495, "step": 30096 }, { "epoch": 0.8787188695220578, "grad_norm": 0.5282969944640751, "learning_rate": 6.738037307380374e-06, "loss": 0.5838, "step": 30097 }, { "epoch": 0.8787480657499052, "grad_norm": 0.5136834577706304, "learning_rate": 6.7364152473641525e-06, "loss": 0.5827, "step": 30098 }, { "epoch": 0.8787772619777525, "grad_norm": 0.5060342319073444, "learning_rate": 6.734793187347932e-06, "loss": 0.5291, "step": 30099 }, { "epoch": 0.8788064582055999, "grad_norm": 0.4957910405722324, "learning_rate": 6.733171127331711e-06, "loss": 0.5276, "step": 30100 }, { "epoch": 0.8788356544334472, "grad_norm": 0.5163060370862909, "learning_rate": 6.731549067315492e-06, "loss": 0.5469, "step": 30101 }, { "epoch": 0.8788648506612946, "grad_norm": 0.5244288675127663, "learning_rate": 6.72992700729927e-06, "loss": 0.5401, "step": 30102 }, { "epoch": 0.878894046889142, "grad_norm": 0.5317948358503103, "learning_rate": 6.7283049472830495e-06, "loss": 0.6156, "step": 30103 }, { "epoch": 0.8789232431169893, "grad_norm": 0.5563424086008522, "learning_rate": 6.726682887266829e-06, "loss": 0.5883, "step": 30104 }, { "epoch": 0.8789524393448367, "grad_norm": 0.5245478423567927, "learning_rate": 6.725060827250608e-06, "loss": 0.5723, "step": 30105 }, { "epoch": 0.878981635572684, "grad_norm": 0.5589268239963507, "learning_rate": 6.723438767234389e-06, "loss": 0.6188, "step": 30106 }, { "epoch": 0.8790108318005314, "grad_norm": 0.524744649893788, "learning_rate": 6.721816707218167e-06, "loss": 0.585, "step": 30107 }, { "epoch": 0.8790400280283788, "grad_norm": 0.5077781999339392, "learning_rate": 6.7201946472019464e-06, "loss": 0.54, "step": 30108 }, { "epoch": 0.8790692242562261, "grad_norm": 0.5258146403694501, "learning_rate": 6.718572587185726e-06, "loss": 0.5513, "step": 30109 }, { "epoch": 0.8790984204840735, "grad_norm": 0.537887844930179, "learning_rate": 6.716950527169505e-06, "loss": 0.5931, "step": 30110 }, { "epoch": 0.8791276167119209, "grad_norm": 0.46007809984084114, "learning_rate": 6.715328467153286e-06, "loss": 0.4624, "step": 30111 }, { "epoch": 0.8791568129397682, "grad_norm": 0.5421561932696577, "learning_rate": 6.713706407137064e-06, "loss": 0.5904, "step": 30112 }, { "epoch": 0.8791860091676156, "grad_norm": 0.5358496405590755, "learning_rate": 6.712084347120843e-06, "loss": 0.5871, "step": 30113 }, { "epoch": 0.8792152053954629, "grad_norm": 0.4902149661156587, "learning_rate": 6.710462287104623e-06, "loss": 0.5295, "step": 30114 }, { "epoch": 0.8792444016233103, "grad_norm": 0.5251386667179289, "learning_rate": 6.7088402270884035e-06, "loss": 0.5713, "step": 30115 }, { "epoch": 0.8792735978511577, "grad_norm": 0.5791879983527597, "learning_rate": 6.707218167072183e-06, "loss": 0.7307, "step": 30116 }, { "epoch": 0.879302794079005, "grad_norm": 0.5480631797085321, "learning_rate": 6.705596107055961e-06, "loss": 0.6438, "step": 30117 }, { "epoch": 0.8793319903068524, "grad_norm": 0.5121639093705003, "learning_rate": 6.70397404703974e-06, "loss": 0.5518, "step": 30118 }, { "epoch": 0.8793611865346997, "grad_norm": 0.5265473448503534, "learning_rate": 6.7023519870235196e-06, "loss": 0.5913, "step": 30119 }, { "epoch": 0.8793903827625471, "grad_norm": 0.4915168685690121, "learning_rate": 6.7007299270073005e-06, "loss": 0.5312, "step": 30120 }, { "epoch": 0.8794195789903945, "grad_norm": 0.4859050215017319, "learning_rate": 6.699107866991079e-06, "loss": 0.5201, "step": 30121 }, { "epoch": 0.8794487752182418, "grad_norm": 0.49027710322139345, "learning_rate": 6.697485806974858e-06, "loss": 0.5107, "step": 30122 }, { "epoch": 0.8794779714460892, "grad_norm": 0.5390414729990574, "learning_rate": 6.695863746958637e-06, "loss": 0.5946, "step": 30123 }, { "epoch": 0.8795071676739366, "grad_norm": 0.5758222347701949, "learning_rate": 6.6942416869424165e-06, "loss": 0.6461, "step": 30124 }, { "epoch": 0.8795363639017839, "grad_norm": 0.5091606978453207, "learning_rate": 6.692619626926197e-06, "loss": 0.5466, "step": 30125 }, { "epoch": 0.8795655601296313, "grad_norm": 0.5218100310699492, "learning_rate": 6.690997566909976e-06, "loss": 0.5818, "step": 30126 }, { "epoch": 0.8795947563574786, "grad_norm": 0.5118166775444041, "learning_rate": 6.689375506893755e-06, "loss": 0.5329, "step": 30127 }, { "epoch": 0.879623952585326, "grad_norm": 0.5410706829223921, "learning_rate": 6.687753446877534e-06, "loss": 0.6262, "step": 30128 }, { "epoch": 0.8796531488131734, "grad_norm": 0.5372427177113668, "learning_rate": 6.686131386861315e-06, "loss": 0.6698, "step": 30129 }, { "epoch": 0.8796823450410207, "grad_norm": 0.5179354833029889, "learning_rate": 6.684509326845094e-06, "loss": 0.5457, "step": 30130 }, { "epoch": 0.8797115412688681, "grad_norm": 0.48856682630023657, "learning_rate": 6.682887266828873e-06, "loss": 0.5118, "step": 30131 }, { "epoch": 0.8797407374967154, "grad_norm": 0.4809288917302694, "learning_rate": 6.681265206812652e-06, "loss": 0.4816, "step": 30132 }, { "epoch": 0.8797699337245628, "grad_norm": 0.5730948888809453, "learning_rate": 6.679643146796431e-06, "loss": 0.6533, "step": 30133 }, { "epoch": 0.8797991299524102, "grad_norm": 0.5442007454415749, "learning_rate": 6.678021086780212e-06, "loss": 0.6318, "step": 30134 }, { "epoch": 0.8798283261802575, "grad_norm": 0.5358587986370332, "learning_rate": 6.6763990267639905e-06, "loss": 0.6006, "step": 30135 }, { "epoch": 0.8798575224081049, "grad_norm": 0.5730531307561554, "learning_rate": 6.67477696674777e-06, "loss": 0.694, "step": 30136 }, { "epoch": 0.8798867186359522, "grad_norm": 0.5216146067694263, "learning_rate": 6.673154906731549e-06, "loss": 0.5456, "step": 30137 }, { "epoch": 0.8799159148637996, "grad_norm": 0.4737352987574281, "learning_rate": 6.671532846715328e-06, "loss": 0.4645, "step": 30138 }, { "epoch": 0.879945111091647, "grad_norm": 0.5222764381527091, "learning_rate": 6.669910786699109e-06, "loss": 0.5736, "step": 30139 }, { "epoch": 0.8799743073194943, "grad_norm": 0.5279805311847674, "learning_rate": 6.6682887266828874e-06, "loss": 0.5837, "step": 30140 }, { "epoch": 0.8800035035473417, "grad_norm": 0.4914805806602626, "learning_rate": 6.666666666666667e-06, "loss": 0.5151, "step": 30141 }, { "epoch": 0.880032699775189, "grad_norm": 0.5640507268742574, "learning_rate": 6.665044606650446e-06, "loss": 0.6276, "step": 30142 }, { "epoch": 0.8800618960030364, "grad_norm": 0.5588995492342331, "learning_rate": 6.663422546634225e-06, "loss": 0.6017, "step": 30143 }, { "epoch": 0.8800910922308838, "grad_norm": 0.5130138788743825, "learning_rate": 6.661800486618006e-06, "loss": 0.5578, "step": 30144 }, { "epoch": 0.8801202884587311, "grad_norm": 0.5247317145075712, "learning_rate": 6.660178426601784e-06, "loss": 0.5981, "step": 30145 }, { "epoch": 0.8801494846865785, "grad_norm": 0.5178841980526184, "learning_rate": 6.658556366585564e-06, "loss": 0.5477, "step": 30146 }, { "epoch": 0.8801786809144259, "grad_norm": 0.5398681846782922, "learning_rate": 6.656934306569343e-06, "loss": 0.589, "step": 30147 }, { "epoch": 0.8802078771422732, "grad_norm": 0.5220156406707978, "learning_rate": 6.655312246553124e-06, "loss": 0.5436, "step": 30148 }, { "epoch": 0.8802370733701206, "grad_norm": 0.5166467200339198, "learning_rate": 6.653690186536902e-06, "loss": 0.5786, "step": 30149 }, { "epoch": 0.8802662695979679, "grad_norm": 0.5499773730774472, "learning_rate": 6.652068126520681e-06, "loss": 0.6372, "step": 30150 }, { "epoch": 0.8802954658258153, "grad_norm": 0.5512010552799268, "learning_rate": 6.6504460665044606e-06, "loss": 0.6103, "step": 30151 }, { "epoch": 0.8803246620536627, "grad_norm": 0.49745127034563985, "learning_rate": 6.64882400648824e-06, "loss": 0.5007, "step": 30152 }, { "epoch": 0.88035385828151, "grad_norm": 0.5197369289408669, "learning_rate": 6.647201946472021e-06, "loss": 0.5309, "step": 30153 }, { "epoch": 0.8803830545093574, "grad_norm": 0.5594742224224034, "learning_rate": 6.645579886455799e-06, "loss": 0.5776, "step": 30154 }, { "epoch": 0.8804122507372047, "grad_norm": 0.4466598853065923, "learning_rate": 6.643957826439578e-06, "loss": 0.4496, "step": 30155 }, { "epoch": 0.8804414469650521, "grad_norm": 0.5336822085500429, "learning_rate": 6.6423357664233575e-06, "loss": 0.5885, "step": 30156 }, { "epoch": 0.8804706431928995, "grad_norm": 0.5055598626499024, "learning_rate": 6.640713706407137e-06, "loss": 0.5718, "step": 30157 }, { "epoch": 0.8804998394207468, "grad_norm": 0.5220225939491187, "learning_rate": 6.639091646390918e-06, "loss": 0.558, "step": 30158 }, { "epoch": 0.8805290356485942, "grad_norm": 0.5041852864372445, "learning_rate": 6.637469586374696e-06, "loss": 0.5271, "step": 30159 }, { "epoch": 0.8805582318764416, "grad_norm": 0.49225014076219215, "learning_rate": 6.635847526358475e-06, "loss": 0.4917, "step": 30160 }, { "epoch": 0.8805874281042889, "grad_norm": 0.5975994103237005, "learning_rate": 6.6342254663422545e-06, "loss": 0.6936, "step": 30161 }, { "epoch": 0.8806166243321363, "grad_norm": 0.5493183924825069, "learning_rate": 6.632603406326035e-06, "loss": 0.6219, "step": 30162 }, { "epoch": 0.8806458205599836, "grad_norm": 0.5455736300883789, "learning_rate": 6.630981346309814e-06, "loss": 0.6362, "step": 30163 }, { "epoch": 0.880675016787831, "grad_norm": 0.5255919525922526, "learning_rate": 6.629359286293593e-06, "loss": 0.5726, "step": 30164 }, { "epoch": 0.8807042130156784, "grad_norm": 0.4565531771052433, "learning_rate": 6.627737226277372e-06, "loss": 0.4417, "step": 30165 }, { "epoch": 0.8807334092435257, "grad_norm": 0.5033831891067456, "learning_rate": 6.626115166261151e-06, "loss": 0.5363, "step": 30166 }, { "epoch": 0.8807626054713731, "grad_norm": 0.5106775441726025, "learning_rate": 6.624493106244932e-06, "loss": 0.571, "step": 30167 }, { "epoch": 0.8807918016992204, "grad_norm": 0.5091081519434479, "learning_rate": 6.622871046228711e-06, "loss": 0.5761, "step": 30168 }, { "epoch": 0.8808209979270678, "grad_norm": 0.498295091580861, "learning_rate": 6.62124898621249e-06, "loss": 0.5105, "step": 30169 }, { "epoch": 0.8808501941549152, "grad_norm": 0.5368461870782374, "learning_rate": 6.619626926196269e-06, "loss": 0.6176, "step": 30170 }, { "epoch": 0.8808793903827625, "grad_norm": 0.5259341570091762, "learning_rate": 6.618004866180048e-06, "loss": 0.6246, "step": 30171 }, { "epoch": 0.8809085866106099, "grad_norm": 0.6192753446927679, "learning_rate": 6.616382806163829e-06, "loss": 0.6924, "step": 30172 }, { "epoch": 0.8809377828384573, "grad_norm": 0.5145961252887249, "learning_rate": 6.614760746147608e-06, "loss": 0.5206, "step": 30173 }, { "epoch": 0.8809669790663046, "grad_norm": 0.49607207294774625, "learning_rate": 6.613138686131387e-06, "loss": 0.5145, "step": 30174 }, { "epoch": 0.880996175294152, "grad_norm": 0.5015100436511408, "learning_rate": 6.611516626115166e-06, "loss": 0.554, "step": 30175 }, { "epoch": 0.8810253715219993, "grad_norm": 0.5452307577136982, "learning_rate": 6.609894566098945e-06, "loss": 0.5948, "step": 30176 }, { "epoch": 0.8810545677498467, "grad_norm": 0.5134501037058538, "learning_rate": 6.608272506082725e-06, "loss": 0.5624, "step": 30177 }, { "epoch": 0.8810837639776941, "grad_norm": 0.5344980340177472, "learning_rate": 6.606650446066505e-06, "loss": 0.5936, "step": 30178 }, { "epoch": 0.8811129602055414, "grad_norm": 0.6615976421258469, "learning_rate": 6.605028386050284e-06, "loss": 0.5762, "step": 30179 }, { "epoch": 0.8811421564333888, "grad_norm": 0.5099961517107471, "learning_rate": 6.603406326034063e-06, "loss": 0.5366, "step": 30180 }, { "epoch": 0.8811713526612361, "grad_norm": 0.5338886553532853, "learning_rate": 6.601784266017844e-06, "loss": 0.6015, "step": 30181 }, { "epoch": 0.8812005488890835, "grad_norm": 0.545289027663286, "learning_rate": 6.600162206001622e-06, "loss": 0.6159, "step": 30182 }, { "epoch": 0.8812297451169309, "grad_norm": 0.5168509446401616, "learning_rate": 6.5985401459854016e-06, "loss": 0.5749, "step": 30183 }, { "epoch": 0.8812589413447782, "grad_norm": 0.5139155409857139, "learning_rate": 6.596918085969181e-06, "loss": 0.5705, "step": 30184 }, { "epoch": 0.8812881375726256, "grad_norm": 0.5349024425593903, "learning_rate": 6.59529602595296e-06, "loss": 0.6124, "step": 30185 }, { "epoch": 0.881317333800473, "grad_norm": 0.5410941362305539, "learning_rate": 6.593673965936741e-06, "loss": 0.5899, "step": 30186 }, { "epoch": 0.8813465300283203, "grad_norm": 0.5268980691913703, "learning_rate": 6.592051905920519e-06, "loss": 0.602, "step": 30187 }, { "epoch": 0.8813757262561677, "grad_norm": 0.5332753794526802, "learning_rate": 6.5904298459042985e-06, "loss": 0.5748, "step": 30188 }, { "epoch": 0.881404922484015, "grad_norm": 0.7628711060620712, "learning_rate": 6.588807785888078e-06, "loss": 0.6911, "step": 30189 }, { "epoch": 0.8814341187118624, "grad_norm": 0.5456003152663075, "learning_rate": 6.587185725871857e-06, "loss": 0.5873, "step": 30190 }, { "epoch": 0.8814633149397098, "grad_norm": 0.544001048181339, "learning_rate": 6.585563665855638e-06, "loss": 0.5867, "step": 30191 }, { "epoch": 0.8814925111675571, "grad_norm": 0.5864373928067311, "learning_rate": 6.583941605839416e-06, "loss": 0.6751, "step": 30192 }, { "epoch": 0.8815217073954045, "grad_norm": 0.5534717545556913, "learning_rate": 6.5823195458231955e-06, "loss": 0.6016, "step": 30193 }, { "epoch": 0.8815509036232518, "grad_norm": 0.561785040733487, "learning_rate": 6.580697485806975e-06, "loss": 0.5833, "step": 30194 }, { "epoch": 0.8815800998510992, "grad_norm": 0.5343483774092486, "learning_rate": 6.579075425790756e-06, "loss": 0.5718, "step": 30195 }, { "epoch": 0.8816092960789466, "grad_norm": 0.533902371166533, "learning_rate": 6.577453365774534e-06, "loss": 0.5815, "step": 30196 }, { "epoch": 0.8816384923067939, "grad_norm": 0.5241148246916862, "learning_rate": 6.575831305758313e-06, "loss": 0.5794, "step": 30197 }, { "epoch": 0.8816676885346413, "grad_norm": 0.540990181312433, "learning_rate": 6.574209245742092e-06, "loss": 0.5527, "step": 30198 }, { "epoch": 0.8816968847624886, "grad_norm": 0.5202768537575797, "learning_rate": 6.572587185725872e-06, "loss": 0.5001, "step": 30199 }, { "epoch": 0.881726080990336, "grad_norm": 0.5533111125576142, "learning_rate": 6.5709651257096525e-06, "loss": 0.6289, "step": 30200 }, { "epoch": 0.8817552772181834, "grad_norm": 0.517600457192498, "learning_rate": 6.569343065693431e-06, "loss": 0.5619, "step": 30201 }, { "epoch": 0.8817844734460307, "grad_norm": 0.5449831423215495, "learning_rate": 6.56772100567721e-06, "loss": 0.688, "step": 30202 }, { "epoch": 0.8818136696738781, "grad_norm": 0.5830177259855063, "learning_rate": 6.566098945660989e-06, "loss": 0.6434, "step": 30203 }, { "epoch": 0.8818428659017254, "grad_norm": 0.511937479353204, "learning_rate": 6.564476885644769e-06, "loss": 0.5619, "step": 30204 }, { "epoch": 0.8818720621295728, "grad_norm": 0.5360201651416939, "learning_rate": 6.5628548256285495e-06, "loss": 0.5764, "step": 30205 }, { "epoch": 0.8819012583574202, "grad_norm": 0.5156949180626265, "learning_rate": 6.561232765612328e-06, "loss": 0.5895, "step": 30206 }, { "epoch": 0.8819304545852675, "grad_norm": 0.5100560486948365, "learning_rate": 6.559610705596107e-06, "loss": 0.5367, "step": 30207 }, { "epoch": 0.8819596508131149, "grad_norm": 0.5333256545189969, "learning_rate": 6.557988645579886e-06, "loss": 0.5708, "step": 30208 }, { "epoch": 0.8819888470409623, "grad_norm": 0.5383681151147345, "learning_rate": 6.5563665855636655e-06, "loss": 0.6113, "step": 30209 }, { "epoch": 0.8820180432688096, "grad_norm": 0.5428736483988374, "learning_rate": 6.554744525547446e-06, "loss": 0.6227, "step": 30210 }, { "epoch": 0.8820472394966571, "grad_norm": 0.5210047843957446, "learning_rate": 6.553122465531225e-06, "loss": 0.5371, "step": 30211 }, { "epoch": 0.8820764357245044, "grad_norm": 0.5319662442712239, "learning_rate": 6.551500405515004e-06, "loss": 0.6013, "step": 30212 }, { "epoch": 0.8821056319523518, "grad_norm": 0.5261041253287653, "learning_rate": 6.549878345498783e-06, "loss": 0.5991, "step": 30213 }, { "epoch": 0.8821348281801992, "grad_norm": 0.5403960202040858, "learning_rate": 6.548256285482564e-06, "loss": 0.6053, "step": 30214 }, { "epoch": 0.8821640244080465, "grad_norm": 0.5582996187539404, "learning_rate": 6.5466342254663426e-06, "loss": 0.6459, "step": 30215 }, { "epoch": 0.8821932206358939, "grad_norm": 0.5211445461971115, "learning_rate": 6.545012165450122e-06, "loss": 0.571, "step": 30216 }, { "epoch": 0.8822224168637413, "grad_norm": 0.5742576007662572, "learning_rate": 6.543390105433901e-06, "loss": 0.6552, "step": 30217 }, { "epoch": 0.8822516130915886, "grad_norm": 0.50762635072251, "learning_rate": 6.54176804541768e-06, "loss": 0.563, "step": 30218 }, { "epoch": 0.882280809319436, "grad_norm": 0.5596060089955853, "learning_rate": 6.540145985401461e-06, "loss": 0.596, "step": 30219 }, { "epoch": 0.8823100055472833, "grad_norm": 0.5426174536536313, "learning_rate": 6.5385239253852395e-06, "loss": 0.6018, "step": 30220 }, { "epoch": 0.8823392017751307, "grad_norm": 0.5146142592167148, "learning_rate": 6.536901865369019e-06, "loss": 0.5575, "step": 30221 }, { "epoch": 0.8823683980029781, "grad_norm": 0.5609508557331525, "learning_rate": 6.535279805352798e-06, "loss": 0.5586, "step": 30222 }, { "epoch": 0.8823975942308254, "grad_norm": 0.5302434778380122, "learning_rate": 6.533657745336577e-06, "loss": 0.5678, "step": 30223 }, { "epoch": 0.8824267904586728, "grad_norm": 0.5548732916549237, "learning_rate": 6.532035685320357e-06, "loss": 0.6338, "step": 30224 }, { "epoch": 0.8824559866865201, "grad_norm": 0.5478297076772898, "learning_rate": 6.5304136253041365e-06, "loss": 0.5995, "step": 30225 }, { "epoch": 0.8824851829143675, "grad_norm": 0.5262241825551304, "learning_rate": 6.528791565287916e-06, "loss": 0.5391, "step": 30226 }, { "epoch": 0.8825143791422149, "grad_norm": 0.5495036119438744, "learning_rate": 6.527169505271695e-06, "loss": 0.6242, "step": 30227 }, { "epoch": 0.8825435753700622, "grad_norm": 0.46379462999788984, "learning_rate": 6.525547445255476e-06, "loss": 0.4719, "step": 30228 }, { "epoch": 0.8825727715979096, "grad_norm": 0.5509367284346792, "learning_rate": 6.523925385239254e-06, "loss": 0.5906, "step": 30229 }, { "epoch": 0.882601967825757, "grad_norm": 0.4819819596754149, "learning_rate": 6.522303325223033e-06, "loss": 0.5203, "step": 30230 }, { "epoch": 0.8826311640536043, "grad_norm": 0.5676130444888927, "learning_rate": 6.520681265206813e-06, "loss": 0.65, "step": 30231 }, { "epoch": 0.8826603602814517, "grad_norm": 0.5558455090603999, "learning_rate": 6.519059205190592e-06, "loss": 0.6372, "step": 30232 }, { "epoch": 0.882689556509299, "grad_norm": 0.5401602853904375, "learning_rate": 6.517437145174373e-06, "loss": 0.6197, "step": 30233 }, { "epoch": 0.8827187527371464, "grad_norm": 0.5172778753121671, "learning_rate": 6.515815085158151e-06, "loss": 0.5513, "step": 30234 }, { "epoch": 0.8827479489649938, "grad_norm": 0.5369817144084776, "learning_rate": 6.51419302514193e-06, "loss": 0.6151, "step": 30235 }, { "epoch": 0.8827771451928411, "grad_norm": 0.5361358935452096, "learning_rate": 6.51257096512571e-06, "loss": 0.6118, "step": 30236 }, { "epoch": 0.8828063414206885, "grad_norm": 0.5753243450606566, "learning_rate": 6.510948905109489e-06, "loss": 0.6458, "step": 30237 }, { "epoch": 0.8828355376485358, "grad_norm": 0.5111120701436088, "learning_rate": 6.509326845093269e-06, "loss": 0.5609, "step": 30238 }, { "epoch": 0.8828647338763832, "grad_norm": 0.49694980786212145, "learning_rate": 6.507704785077048e-06, "loss": 0.5268, "step": 30239 }, { "epoch": 0.8828939301042306, "grad_norm": 0.4937370217720714, "learning_rate": 6.506082725060827e-06, "loss": 0.5203, "step": 30240 }, { "epoch": 0.8829231263320779, "grad_norm": 0.4792063848625833, "learning_rate": 6.5044606650446065e-06, "loss": 0.4812, "step": 30241 }, { "epoch": 0.8829523225599253, "grad_norm": 0.5059262629913588, "learning_rate": 6.502838605028386e-06, "loss": 0.5642, "step": 30242 }, { "epoch": 0.8829815187877726, "grad_norm": 0.5452782759386481, "learning_rate": 6.501216545012166e-06, "loss": 0.665, "step": 30243 }, { "epoch": 0.88301071501562, "grad_norm": 0.5491726603587908, "learning_rate": 6.499594484995945e-06, "loss": 0.582, "step": 30244 }, { "epoch": 0.8830399112434674, "grad_norm": 0.5286512844663416, "learning_rate": 6.497972424979724e-06, "loss": 0.6216, "step": 30245 }, { "epoch": 0.8830691074713147, "grad_norm": 0.5416060803989119, "learning_rate": 6.4963503649635035e-06, "loss": 0.5842, "step": 30246 }, { "epoch": 0.8830983036991621, "grad_norm": 0.5525606557445855, "learning_rate": 6.494728304947284e-06, "loss": 0.6695, "step": 30247 }, { "epoch": 0.8831274999270095, "grad_norm": 0.5465892789905095, "learning_rate": 6.493106244931063e-06, "loss": 0.5553, "step": 30248 }, { "epoch": 0.8831566961548568, "grad_norm": 0.5500693685408033, "learning_rate": 6.491484184914842e-06, "loss": 0.6366, "step": 30249 }, { "epoch": 0.8831858923827042, "grad_norm": 0.515939363056637, "learning_rate": 6.489862124898621e-06, "loss": 0.5619, "step": 30250 }, { "epoch": 0.8832150886105515, "grad_norm": 0.48124983928473924, "learning_rate": 6.4882400648824004e-06, "loss": 0.5063, "step": 30251 }, { "epoch": 0.8832442848383989, "grad_norm": 0.5250735262119242, "learning_rate": 6.486618004866181e-06, "loss": 0.583, "step": 30252 }, { "epoch": 0.8832734810662463, "grad_norm": 0.5180094728253205, "learning_rate": 6.48499594484996e-06, "loss": 0.5551, "step": 30253 }, { "epoch": 0.8833026772940936, "grad_norm": 0.5362209772052002, "learning_rate": 6.483373884833739e-06, "loss": 0.631, "step": 30254 }, { "epoch": 0.883331873521941, "grad_norm": 0.5687493295982721, "learning_rate": 6.481751824817518e-06, "loss": 0.6647, "step": 30255 }, { "epoch": 0.8833610697497883, "grad_norm": 0.5128155009006611, "learning_rate": 6.480129764801297e-06, "loss": 0.5578, "step": 30256 }, { "epoch": 0.8833902659776357, "grad_norm": 0.4966699894242549, "learning_rate": 6.4785077047850775e-06, "loss": 0.5319, "step": 30257 }, { "epoch": 0.8834194622054831, "grad_norm": 0.5338999733040238, "learning_rate": 6.476885644768857e-06, "loss": 0.5863, "step": 30258 }, { "epoch": 0.8834486584333304, "grad_norm": 0.4856712086638876, "learning_rate": 6.475263584752636e-06, "loss": 0.5179, "step": 30259 }, { "epoch": 0.8834778546611778, "grad_norm": 0.5389584967941491, "learning_rate": 6.473641524736415e-06, "loss": 0.6287, "step": 30260 }, { "epoch": 0.8835070508890251, "grad_norm": 0.5501246567461676, "learning_rate": 6.472019464720194e-06, "loss": 0.6194, "step": 30261 }, { "epoch": 0.8835362471168725, "grad_norm": 0.5076608417614417, "learning_rate": 6.470397404703974e-06, "loss": 0.5583, "step": 30262 }, { "epoch": 0.8835654433447199, "grad_norm": 0.5343093005571841, "learning_rate": 6.468775344687754e-06, "loss": 0.6071, "step": 30263 }, { "epoch": 0.8835946395725672, "grad_norm": 0.5426966240374886, "learning_rate": 6.467153284671533e-06, "loss": 0.6156, "step": 30264 }, { "epoch": 0.8836238358004146, "grad_norm": 0.5608458334299332, "learning_rate": 6.465531224655312e-06, "loss": 0.6391, "step": 30265 }, { "epoch": 0.883653032028262, "grad_norm": 0.5863582248101057, "learning_rate": 6.463909164639093e-06, "loss": 0.6554, "step": 30266 }, { "epoch": 0.8836822282561093, "grad_norm": 0.5610811390975342, "learning_rate": 6.462287104622871e-06, "loss": 0.5608, "step": 30267 }, { "epoch": 0.8837114244839567, "grad_norm": 0.5048880666133808, "learning_rate": 6.460665044606651e-06, "loss": 0.5493, "step": 30268 }, { "epoch": 0.883740620711804, "grad_norm": 0.5768430491651967, "learning_rate": 6.45904298459043e-06, "loss": 0.6513, "step": 30269 }, { "epoch": 0.8837698169396514, "grad_norm": 0.5216928296170406, "learning_rate": 6.457420924574209e-06, "loss": 0.5796, "step": 30270 }, { "epoch": 0.8837990131674988, "grad_norm": 0.52021544319327, "learning_rate": 6.455798864557989e-06, "loss": 0.561, "step": 30271 }, { "epoch": 0.8838282093953461, "grad_norm": 0.5473227360484008, "learning_rate": 6.454176804541768e-06, "loss": 0.6011, "step": 30272 }, { "epoch": 0.8838574056231935, "grad_norm": 0.517291448152912, "learning_rate": 6.4525547445255475e-06, "loss": 0.5864, "step": 30273 }, { "epoch": 0.8838866018510408, "grad_norm": 0.4901315672409992, "learning_rate": 6.450932684509327e-06, "loss": 0.5315, "step": 30274 }, { "epoch": 0.8839157980788882, "grad_norm": 0.5377741676924206, "learning_rate": 6.449310624493106e-06, "loss": 0.6276, "step": 30275 }, { "epoch": 0.8839449943067356, "grad_norm": 0.5025564992963792, "learning_rate": 6.447688564476886e-06, "loss": 0.5469, "step": 30276 }, { "epoch": 0.8839741905345829, "grad_norm": 0.5248679391658596, "learning_rate": 6.446066504460665e-06, "loss": 0.5821, "step": 30277 }, { "epoch": 0.8840033867624303, "grad_norm": 0.5181689217901101, "learning_rate": 6.4444444444444445e-06, "loss": 0.5778, "step": 30278 }, { "epoch": 0.8840325829902776, "grad_norm": 0.5539749316275708, "learning_rate": 6.442822384428224e-06, "loss": 0.6597, "step": 30279 }, { "epoch": 0.884061779218125, "grad_norm": 0.5064396903995483, "learning_rate": 6.441200324412005e-06, "loss": 0.5056, "step": 30280 }, { "epoch": 0.8840909754459724, "grad_norm": 0.5378272589254386, "learning_rate": 6.439578264395783e-06, "loss": 0.6086, "step": 30281 }, { "epoch": 0.8841201716738197, "grad_norm": 0.5032642168647545, "learning_rate": 6.437956204379562e-06, "loss": 0.5239, "step": 30282 }, { "epoch": 0.8841493679016671, "grad_norm": 0.5319859731751717, "learning_rate": 6.4363341443633414e-06, "loss": 0.5017, "step": 30283 }, { "epoch": 0.8841785641295145, "grad_norm": 0.5386412195839528, "learning_rate": 6.434712084347121e-06, "loss": 0.6112, "step": 30284 }, { "epoch": 0.8842077603573618, "grad_norm": 0.515976457016414, "learning_rate": 6.433090024330901e-06, "loss": 0.5603, "step": 30285 }, { "epoch": 0.8842369565852092, "grad_norm": 0.5638060218461889, "learning_rate": 6.43146796431468e-06, "loss": 0.6606, "step": 30286 }, { "epoch": 0.8842661528130565, "grad_norm": 0.504367205828502, "learning_rate": 6.429845904298459e-06, "loss": 0.5796, "step": 30287 }, { "epoch": 0.8842953490409039, "grad_norm": 0.49119990607430153, "learning_rate": 6.428223844282238e-06, "loss": 0.5147, "step": 30288 }, { "epoch": 0.8843245452687513, "grad_norm": 0.48804914665630206, "learning_rate": 6.426601784266018e-06, "loss": 0.4994, "step": 30289 }, { "epoch": 0.8843537414965986, "grad_norm": 0.5139066095691961, "learning_rate": 6.424979724249798e-06, "loss": 0.5636, "step": 30290 }, { "epoch": 0.884382937724446, "grad_norm": 0.5205080692021977, "learning_rate": 6.423357664233577e-06, "loss": 0.5995, "step": 30291 }, { "epoch": 0.8844121339522933, "grad_norm": 0.5444183464959839, "learning_rate": 6.421735604217356e-06, "loss": 0.6131, "step": 30292 }, { "epoch": 0.8844413301801407, "grad_norm": 0.5275617016019472, "learning_rate": 6.420113544201135e-06, "loss": 0.6011, "step": 30293 }, { "epoch": 0.8844705264079881, "grad_norm": 0.5030088246094372, "learning_rate": 6.4184914841849146e-06, "loss": 0.5524, "step": 30294 }, { "epoch": 0.8844997226358354, "grad_norm": 0.5312645282422179, "learning_rate": 6.416869424168695e-06, "loss": 0.5543, "step": 30295 }, { "epoch": 0.8845289188636828, "grad_norm": 0.49542429290385914, "learning_rate": 6.415247364152474e-06, "loss": 0.5355, "step": 30296 }, { "epoch": 0.8845581150915302, "grad_norm": 0.503919473699211, "learning_rate": 6.413625304136253e-06, "loss": 0.5637, "step": 30297 }, { "epoch": 0.8845873113193775, "grad_norm": 0.5095524165552319, "learning_rate": 6.412003244120032e-06, "loss": 0.5358, "step": 30298 }, { "epoch": 0.8846165075472249, "grad_norm": 0.5396961513894237, "learning_rate": 6.410381184103812e-06, "loss": 0.6008, "step": 30299 }, { "epoch": 0.8846457037750722, "grad_norm": 0.5042331208757327, "learning_rate": 6.408759124087592e-06, "loss": 0.5665, "step": 30300 }, { "epoch": 0.8846749000029196, "grad_norm": 0.5358001613990676, "learning_rate": 6.407137064071371e-06, "loss": 0.5815, "step": 30301 }, { "epoch": 0.884704096230767, "grad_norm": 0.5611696589666014, "learning_rate": 6.40551500405515e-06, "loss": 0.6607, "step": 30302 }, { "epoch": 0.8847332924586143, "grad_norm": 0.5351193465440052, "learning_rate": 6.403892944038929e-06, "loss": 0.5796, "step": 30303 }, { "epoch": 0.8847624886864617, "grad_norm": 0.5782281516797099, "learning_rate": 6.402270884022709e-06, "loss": 0.6302, "step": 30304 }, { "epoch": 0.884791684914309, "grad_norm": 0.5146949487665397, "learning_rate": 6.4006488240064885e-06, "loss": 0.5401, "step": 30305 }, { "epoch": 0.8848208811421564, "grad_norm": 0.5027131134648772, "learning_rate": 6.399026763990268e-06, "loss": 0.5415, "step": 30306 }, { "epoch": 0.8848500773700038, "grad_norm": 0.5481129631071243, "learning_rate": 6.397404703974047e-06, "loss": 0.6255, "step": 30307 }, { "epoch": 0.8848792735978511, "grad_norm": 0.5129677679574284, "learning_rate": 6.395782643957826e-06, "loss": 0.577, "step": 30308 }, { "epoch": 0.8849084698256985, "grad_norm": 0.5534631320145947, "learning_rate": 6.394160583941606e-06, "loss": 0.6667, "step": 30309 }, { "epoch": 0.8849376660535458, "grad_norm": 0.5190807279266461, "learning_rate": 6.3925385239253855e-06, "loss": 0.58, "step": 30310 }, { "epoch": 0.8849668622813932, "grad_norm": 0.5037663028791319, "learning_rate": 6.390916463909165e-06, "loss": 0.5408, "step": 30311 }, { "epoch": 0.8849960585092406, "grad_norm": 0.5278771221758511, "learning_rate": 6.389294403892944e-06, "loss": 0.6085, "step": 30312 }, { "epoch": 0.8850252547370879, "grad_norm": 0.5057073417560463, "learning_rate": 6.387672343876725e-06, "loss": 0.5265, "step": 30313 }, { "epoch": 0.8850544509649353, "grad_norm": 0.5233333757556112, "learning_rate": 6.386050283860503e-06, "loss": 0.5757, "step": 30314 }, { "epoch": 0.8850836471927827, "grad_norm": 0.5604868651422691, "learning_rate": 6.3844282238442824e-06, "loss": 0.6381, "step": 30315 }, { "epoch": 0.88511284342063, "grad_norm": 0.5112206943386834, "learning_rate": 6.382806163828062e-06, "loss": 0.5682, "step": 30316 }, { "epoch": 0.8851420396484774, "grad_norm": 0.5406720578504863, "learning_rate": 6.381184103811841e-06, "loss": 0.576, "step": 30317 }, { "epoch": 0.8851712358763247, "grad_norm": 0.549854756921009, "learning_rate": 6.379562043795621e-06, "loss": 0.5384, "step": 30318 }, { "epoch": 0.8852004321041721, "grad_norm": 0.5380734321883065, "learning_rate": 6.3779399837794e-06, "loss": 0.5975, "step": 30319 }, { "epoch": 0.8852296283320195, "grad_norm": 0.5483057705736379, "learning_rate": 6.376317923763179e-06, "loss": 0.6344, "step": 30320 }, { "epoch": 0.8852588245598668, "grad_norm": 0.5186078565888349, "learning_rate": 6.374695863746959e-06, "loss": 0.57, "step": 30321 }, { "epoch": 0.8852880207877142, "grad_norm": 0.547733571260492, "learning_rate": 6.373073803730738e-06, "loss": 0.6473, "step": 30322 }, { "epoch": 0.8853172170155615, "grad_norm": 0.4946666210276331, "learning_rate": 6.371451743714518e-06, "loss": 0.5387, "step": 30323 }, { "epoch": 0.8853464132434089, "grad_norm": 0.49716285015121936, "learning_rate": 6.369829683698297e-06, "loss": 0.5348, "step": 30324 }, { "epoch": 0.8853756094712563, "grad_norm": 0.5048385352100911, "learning_rate": 6.368207623682076e-06, "loss": 0.5619, "step": 30325 }, { "epoch": 0.8854048056991036, "grad_norm": 0.5142021453493708, "learning_rate": 6.3665855636658556e-06, "loss": 0.5549, "step": 30326 }, { "epoch": 0.885434001926951, "grad_norm": 0.5335646936577143, "learning_rate": 6.364963503649635e-06, "loss": 0.5935, "step": 30327 }, { "epoch": 0.8854631981547983, "grad_norm": 0.5166202923973231, "learning_rate": 6.363341443633415e-06, "loss": 0.4893, "step": 30328 }, { "epoch": 0.8854923943826457, "grad_norm": 0.5153664282100634, "learning_rate": 6.361719383617194e-06, "loss": 0.5444, "step": 30329 }, { "epoch": 0.8855215906104931, "grad_norm": 0.4848151058909313, "learning_rate": 6.360097323600973e-06, "loss": 0.5001, "step": 30330 }, { "epoch": 0.8855507868383404, "grad_norm": 0.5219204922986015, "learning_rate": 6.3584752635847525e-06, "loss": 0.5718, "step": 30331 }, { "epoch": 0.8855799830661879, "grad_norm": 0.5002409352371412, "learning_rate": 6.356853203568533e-06, "loss": 0.5085, "step": 30332 }, { "epoch": 0.8856091792940353, "grad_norm": 0.5292690282317772, "learning_rate": 6.355231143552312e-06, "loss": 0.6184, "step": 30333 }, { "epoch": 0.8856383755218826, "grad_norm": 0.5893373317261198, "learning_rate": 6.353609083536091e-06, "loss": 0.6655, "step": 30334 }, { "epoch": 0.88566757174973, "grad_norm": 0.5654743843530117, "learning_rate": 6.35198702351987e-06, "loss": 0.6276, "step": 30335 }, { "epoch": 0.8856967679775773, "grad_norm": 0.4957901979787787, "learning_rate": 6.3503649635036495e-06, "loss": 0.5168, "step": 30336 }, { "epoch": 0.8857259642054247, "grad_norm": 0.5512260916013597, "learning_rate": 6.3487429034874295e-06, "loss": 0.654, "step": 30337 }, { "epoch": 0.8857551604332721, "grad_norm": 0.5473157992366866, "learning_rate": 6.347120843471209e-06, "loss": 0.5762, "step": 30338 }, { "epoch": 0.8857843566611194, "grad_norm": 0.5018746136589249, "learning_rate": 6.345498783454988e-06, "loss": 0.5322, "step": 30339 }, { "epoch": 0.8858135528889668, "grad_norm": 0.5470090129834411, "learning_rate": 6.343876723438767e-06, "loss": 0.6316, "step": 30340 }, { "epoch": 0.8858427491168142, "grad_norm": 0.5277322367876798, "learning_rate": 6.342254663422546e-06, "loss": 0.5705, "step": 30341 }, { "epoch": 0.8858719453446615, "grad_norm": 0.5068752869817544, "learning_rate": 6.3406326034063265e-06, "loss": 0.517, "step": 30342 }, { "epoch": 0.8859011415725089, "grad_norm": 0.5487841663568095, "learning_rate": 6.339010543390106e-06, "loss": 0.6145, "step": 30343 }, { "epoch": 0.8859303378003562, "grad_norm": 0.5237179483176352, "learning_rate": 6.337388483373885e-06, "loss": 0.6002, "step": 30344 }, { "epoch": 0.8859595340282036, "grad_norm": 0.5320265720228178, "learning_rate": 6.335766423357664e-06, "loss": 0.5767, "step": 30345 }, { "epoch": 0.885988730256051, "grad_norm": 0.5112846191683399, "learning_rate": 6.334144363341444e-06, "loss": 0.5695, "step": 30346 }, { "epoch": 0.8860179264838983, "grad_norm": 0.5220576632383895, "learning_rate": 6.3325223033252234e-06, "loss": 0.5741, "step": 30347 }, { "epoch": 0.8860471227117457, "grad_norm": 0.5318354091765398, "learning_rate": 6.330900243309003e-06, "loss": 0.619, "step": 30348 }, { "epoch": 0.886076318939593, "grad_norm": 0.5301124067196988, "learning_rate": 6.329278183292782e-06, "loss": 0.612, "step": 30349 }, { "epoch": 0.8861055151674404, "grad_norm": 0.5539176744015365, "learning_rate": 6.327656123276561e-06, "loss": 0.6538, "step": 30350 }, { "epoch": 0.8861347113952878, "grad_norm": 0.5199038574936029, "learning_rate": 6.326034063260341e-06, "loss": 0.5478, "step": 30351 }, { "epoch": 0.8861639076231351, "grad_norm": 0.5281909638591699, "learning_rate": 6.32441200324412e-06, "loss": 0.6015, "step": 30352 }, { "epoch": 0.8861931038509825, "grad_norm": 0.5233360523080263, "learning_rate": 6.3227899432279e-06, "loss": 0.6004, "step": 30353 }, { "epoch": 0.8862223000788298, "grad_norm": 0.5521114523775642, "learning_rate": 6.321167883211679e-06, "loss": 0.6172, "step": 30354 }, { "epoch": 0.8862514963066772, "grad_norm": 0.5401695196394954, "learning_rate": 6.319545823195458e-06, "loss": 0.5837, "step": 30355 }, { "epoch": 0.8862806925345246, "grad_norm": 0.5039292975158047, "learning_rate": 6.317923763179238e-06, "loss": 0.5076, "step": 30356 }, { "epoch": 0.8863098887623719, "grad_norm": 0.4956256073891395, "learning_rate": 6.316301703163017e-06, "loss": 0.5342, "step": 30357 }, { "epoch": 0.8863390849902193, "grad_norm": 0.505438239180046, "learning_rate": 6.3146796431467966e-06, "loss": 0.5458, "step": 30358 }, { "epoch": 0.8863682812180667, "grad_norm": 0.5103560897714454, "learning_rate": 6.313057583130576e-06, "loss": 0.5345, "step": 30359 }, { "epoch": 0.886397477445914, "grad_norm": 0.5315845817925695, "learning_rate": 6.311435523114355e-06, "loss": 0.6182, "step": 30360 }, { "epoch": 0.8864266736737614, "grad_norm": 0.5160130793833637, "learning_rate": 6.309813463098135e-06, "loss": 0.5801, "step": 30361 }, { "epoch": 0.8864558699016087, "grad_norm": 0.5435957310277607, "learning_rate": 6.308191403081914e-06, "loss": 0.5944, "step": 30362 }, { "epoch": 0.8864850661294561, "grad_norm": 0.5352494586909105, "learning_rate": 6.3065693430656935e-06, "loss": 0.5959, "step": 30363 }, { "epoch": 0.8865142623573035, "grad_norm": 0.49957326337867897, "learning_rate": 6.304947283049473e-06, "loss": 0.5197, "step": 30364 }, { "epoch": 0.8865434585851508, "grad_norm": 0.535972771268211, "learning_rate": 6.303325223033253e-06, "loss": 0.5404, "step": 30365 }, { "epoch": 0.8865726548129982, "grad_norm": 0.5314031871232332, "learning_rate": 6.301703163017032e-06, "loss": 0.6023, "step": 30366 }, { "epoch": 0.8866018510408455, "grad_norm": 0.6088900895544231, "learning_rate": 6.300081103000811e-06, "loss": 0.7721, "step": 30367 }, { "epoch": 0.8866310472686929, "grad_norm": 0.534003547677177, "learning_rate": 6.2984590429845905e-06, "loss": 0.6256, "step": 30368 }, { "epoch": 0.8866602434965403, "grad_norm": 0.522479347811122, "learning_rate": 6.29683698296837e-06, "loss": 0.5538, "step": 30369 }, { "epoch": 0.8866894397243876, "grad_norm": 0.535029249803629, "learning_rate": 6.29521492295215e-06, "loss": 0.6349, "step": 30370 }, { "epoch": 0.886718635952235, "grad_norm": 0.5015891815515918, "learning_rate": 6.293592862935929e-06, "loss": 0.5506, "step": 30371 }, { "epoch": 0.8867478321800824, "grad_norm": 0.5321886616772683, "learning_rate": 6.291970802919708e-06, "loss": 0.6091, "step": 30372 }, { "epoch": 0.8867770284079297, "grad_norm": 0.551038112562381, "learning_rate": 6.290348742903487e-06, "loss": 0.6602, "step": 30373 }, { "epoch": 0.8868062246357771, "grad_norm": 0.5044191206727887, "learning_rate": 6.288726682887267e-06, "loss": 0.5322, "step": 30374 }, { "epoch": 0.8868354208636244, "grad_norm": 0.5622172672068598, "learning_rate": 6.287104622871047e-06, "loss": 0.6312, "step": 30375 }, { "epoch": 0.8868646170914718, "grad_norm": 0.5258151601028136, "learning_rate": 6.285482562854826e-06, "loss": 0.5794, "step": 30376 }, { "epoch": 0.8868938133193192, "grad_norm": 0.5758649480040572, "learning_rate": 6.283860502838605e-06, "loss": 0.6623, "step": 30377 }, { "epoch": 0.8869230095471665, "grad_norm": 0.5166947178448574, "learning_rate": 6.282238442822384e-06, "loss": 0.549, "step": 30378 }, { "epoch": 0.8869522057750139, "grad_norm": 0.5227906424165529, "learning_rate": 6.2806163828061644e-06, "loss": 0.5444, "step": 30379 }, { "epoch": 0.8869814020028612, "grad_norm": 0.5265386917782536, "learning_rate": 6.278994322789944e-06, "loss": 0.561, "step": 30380 }, { "epoch": 0.8870105982307086, "grad_norm": 0.49951234174993564, "learning_rate": 6.277372262773723e-06, "loss": 0.5375, "step": 30381 }, { "epoch": 0.887039794458556, "grad_norm": 0.52169480367687, "learning_rate": 6.275750202757502e-06, "loss": 0.5893, "step": 30382 }, { "epoch": 0.8870689906864033, "grad_norm": 0.5510710733249553, "learning_rate": 6.274128142741281e-06, "loss": 0.5873, "step": 30383 }, { "epoch": 0.8870981869142507, "grad_norm": 0.5240989865021912, "learning_rate": 6.272506082725061e-06, "loss": 0.5375, "step": 30384 }, { "epoch": 0.887127383142098, "grad_norm": 0.5452656525976971, "learning_rate": 6.270884022708841e-06, "loss": 0.6089, "step": 30385 }, { "epoch": 0.8871565793699454, "grad_norm": 0.5314881906388229, "learning_rate": 6.26926196269262e-06, "loss": 0.555, "step": 30386 }, { "epoch": 0.8871857755977928, "grad_norm": 0.5411643898390883, "learning_rate": 6.267639902676399e-06, "loss": 0.5891, "step": 30387 }, { "epoch": 0.8872149718256401, "grad_norm": 0.5433920162934373, "learning_rate": 6.266017842660178e-06, "loss": 0.6094, "step": 30388 }, { "epoch": 0.8872441680534875, "grad_norm": 0.48941943913663455, "learning_rate": 6.264395782643958e-06, "loss": 0.4932, "step": 30389 }, { "epoch": 0.8872733642813349, "grad_norm": 0.564229283660074, "learning_rate": 6.2627737226277376e-06, "loss": 0.6836, "step": 30390 }, { "epoch": 0.8873025605091822, "grad_norm": 0.5461148079373775, "learning_rate": 6.261151662611517e-06, "loss": 0.6167, "step": 30391 }, { "epoch": 0.8873317567370296, "grad_norm": 0.5032781608109023, "learning_rate": 6.259529602595296e-06, "loss": 0.5671, "step": 30392 }, { "epoch": 0.8873609529648769, "grad_norm": 0.5314150298247713, "learning_rate": 6.257907542579075e-06, "loss": 0.5968, "step": 30393 }, { "epoch": 0.8873901491927243, "grad_norm": 0.5247892125809261, "learning_rate": 6.256285482562855e-06, "loss": 0.6377, "step": 30394 }, { "epoch": 0.8874193454205717, "grad_norm": 0.5295342750251851, "learning_rate": 6.2546634225466345e-06, "loss": 0.5403, "step": 30395 }, { "epoch": 0.887448541648419, "grad_norm": 0.6303374367033187, "learning_rate": 6.253041362530414e-06, "loss": 0.5862, "step": 30396 }, { "epoch": 0.8874777378762664, "grad_norm": 0.48399456316473105, "learning_rate": 6.251419302514193e-06, "loss": 0.5027, "step": 30397 }, { "epoch": 0.8875069341041137, "grad_norm": 0.5205126790376148, "learning_rate": 6.249797242497973e-06, "loss": 0.5568, "step": 30398 }, { "epoch": 0.8875361303319611, "grad_norm": 0.5579265681646688, "learning_rate": 6.248175182481751e-06, "loss": 0.6218, "step": 30399 }, { "epoch": 0.8875653265598085, "grad_norm": 0.5212474547220203, "learning_rate": 6.2465531224655315e-06, "loss": 0.5913, "step": 30400 }, { "epoch": 0.8875945227876558, "grad_norm": 0.5172802999952952, "learning_rate": 6.244931062449311e-06, "loss": 0.5423, "step": 30401 }, { "epoch": 0.8876237190155032, "grad_norm": 0.535214706069426, "learning_rate": 6.243309002433091e-06, "loss": 0.5569, "step": 30402 }, { "epoch": 0.8876529152433505, "grad_norm": 0.4817189946961726, "learning_rate": 6.24168694241687e-06, "loss": 0.4892, "step": 30403 }, { "epoch": 0.8876821114711979, "grad_norm": 0.5175200199003607, "learning_rate": 6.240064882400649e-06, "loss": 0.5843, "step": 30404 }, { "epoch": 0.8877113076990453, "grad_norm": 0.5112711145886, "learning_rate": 6.238442822384428e-06, "loss": 0.5553, "step": 30405 }, { "epoch": 0.8877405039268926, "grad_norm": 0.5126950870184089, "learning_rate": 6.236820762368208e-06, "loss": 0.5729, "step": 30406 }, { "epoch": 0.88776970015474, "grad_norm": 0.538517500314085, "learning_rate": 6.235198702351988e-06, "loss": 0.6001, "step": 30407 }, { "epoch": 0.8877988963825874, "grad_norm": 0.5293839637838762, "learning_rate": 6.233576642335767e-06, "loss": 0.6006, "step": 30408 }, { "epoch": 0.8878280926104347, "grad_norm": 0.5006076468757608, "learning_rate": 6.231954582319546e-06, "loss": 0.52, "step": 30409 }, { "epoch": 0.8878572888382821, "grad_norm": 0.5740703097247901, "learning_rate": 6.230332522303325e-06, "loss": 0.6221, "step": 30410 }, { "epoch": 0.8878864850661294, "grad_norm": 0.45333247456524245, "learning_rate": 6.2287104622871054e-06, "loss": 0.4709, "step": 30411 }, { "epoch": 0.8879156812939768, "grad_norm": 0.5279021585788706, "learning_rate": 6.227088402270885e-06, "loss": 0.5469, "step": 30412 }, { "epoch": 0.8879448775218242, "grad_norm": 0.5056175555390853, "learning_rate": 6.225466342254663e-06, "loss": 0.5591, "step": 30413 }, { "epoch": 0.8879740737496715, "grad_norm": 0.4982659247743334, "learning_rate": 6.223844282238443e-06, "loss": 0.5134, "step": 30414 }, { "epoch": 0.8880032699775189, "grad_norm": 0.5399798108641491, "learning_rate": 6.222222222222222e-06, "loss": 0.6208, "step": 30415 }, { "epoch": 0.8880324662053662, "grad_norm": 0.5112586583950497, "learning_rate": 6.220600162206002e-06, "loss": 0.5404, "step": 30416 }, { "epoch": 0.8880616624332136, "grad_norm": 0.5454834841626643, "learning_rate": 6.218978102189782e-06, "loss": 0.6526, "step": 30417 }, { "epoch": 0.888090858661061, "grad_norm": 0.5291535738410349, "learning_rate": 6.217356042173561e-06, "loss": 0.5805, "step": 30418 }, { "epoch": 0.8881200548889083, "grad_norm": 0.5173796130056375, "learning_rate": 6.21573398215734e-06, "loss": 0.5714, "step": 30419 }, { "epoch": 0.8881492511167557, "grad_norm": 0.5322969191230543, "learning_rate": 6.214111922141119e-06, "loss": 0.5547, "step": 30420 }, { "epoch": 0.888178447344603, "grad_norm": 0.5373289944424446, "learning_rate": 6.212489862124899e-06, "loss": 0.5846, "step": 30421 }, { "epoch": 0.8882076435724504, "grad_norm": 0.48785934821023835, "learning_rate": 6.2108678021086786e-06, "loss": 0.4903, "step": 30422 }, { "epoch": 0.8882368398002978, "grad_norm": 0.5490754172904624, "learning_rate": 6.209245742092458e-06, "loss": 0.6218, "step": 30423 }, { "epoch": 0.8882660360281451, "grad_norm": 0.5123113830322171, "learning_rate": 6.207623682076237e-06, "loss": 0.5267, "step": 30424 }, { "epoch": 0.8882952322559925, "grad_norm": 0.5191023039773587, "learning_rate": 6.206001622060016e-06, "loss": 0.5572, "step": 30425 }, { "epoch": 0.8883244284838399, "grad_norm": 0.5162570465400834, "learning_rate": 6.204379562043796e-06, "loss": 0.5305, "step": 30426 }, { "epoch": 0.8883536247116872, "grad_norm": 0.49387041747727073, "learning_rate": 6.202757502027575e-06, "loss": 0.5469, "step": 30427 }, { "epoch": 0.8883828209395346, "grad_norm": 0.5516050276826336, "learning_rate": 6.201135442011355e-06, "loss": 0.6359, "step": 30428 }, { "epoch": 0.8884120171673819, "grad_norm": 0.5461956562049118, "learning_rate": 6.199513381995134e-06, "loss": 0.5971, "step": 30429 }, { "epoch": 0.8884412133952293, "grad_norm": 0.5328164205820701, "learning_rate": 6.197891321978914e-06, "loss": 0.5775, "step": 30430 }, { "epoch": 0.8884704096230767, "grad_norm": 0.5405503482264983, "learning_rate": 6.196269261962693e-06, "loss": 0.5907, "step": 30431 }, { "epoch": 0.888499605850924, "grad_norm": 0.4892466219658648, "learning_rate": 6.194647201946472e-06, "loss": 0.5215, "step": 30432 }, { "epoch": 0.8885288020787714, "grad_norm": 0.5748473192894326, "learning_rate": 6.193025141930252e-06, "loss": 0.6579, "step": 30433 }, { "epoch": 0.8885579983066187, "grad_norm": 0.5342669158536704, "learning_rate": 6.191403081914031e-06, "loss": 0.5616, "step": 30434 }, { "epoch": 0.8885871945344661, "grad_norm": 0.5175660815582387, "learning_rate": 6.189781021897811e-06, "loss": 0.518, "step": 30435 }, { "epoch": 0.8886163907623135, "grad_norm": 0.5551668255137688, "learning_rate": 6.18815896188159e-06, "loss": 0.6444, "step": 30436 }, { "epoch": 0.8886455869901608, "grad_norm": 0.5432192427948499, "learning_rate": 6.186536901865369e-06, "loss": 0.5894, "step": 30437 }, { "epoch": 0.8886747832180082, "grad_norm": 0.5134601942428407, "learning_rate": 6.184914841849149e-06, "loss": 0.5752, "step": 30438 }, { "epoch": 0.8887039794458556, "grad_norm": 0.5755042917301836, "learning_rate": 6.183292781832928e-06, "loss": 0.6776, "step": 30439 }, { "epoch": 0.8887331756737029, "grad_norm": 0.570248529950857, "learning_rate": 6.181670721816708e-06, "loss": 0.6983, "step": 30440 }, { "epoch": 0.8887623719015503, "grad_norm": 0.5363760947109114, "learning_rate": 6.180048661800487e-06, "loss": 0.5794, "step": 30441 }, { "epoch": 0.8887915681293976, "grad_norm": 0.532092869150523, "learning_rate": 6.178426601784266e-06, "loss": 0.5397, "step": 30442 }, { "epoch": 0.888820764357245, "grad_norm": 0.5507954615786402, "learning_rate": 6.176804541768046e-06, "loss": 0.62, "step": 30443 }, { "epoch": 0.8888499605850924, "grad_norm": 0.5287239756063792, "learning_rate": 6.175182481751826e-06, "loss": 0.5724, "step": 30444 }, { "epoch": 0.8888791568129397, "grad_norm": 0.5252814806743853, "learning_rate": 6.173560421735605e-06, "loss": 0.5475, "step": 30445 }, { "epoch": 0.8889083530407871, "grad_norm": 0.5205346375032616, "learning_rate": 6.171938361719383e-06, "loss": 0.5815, "step": 30446 }, { "epoch": 0.8889375492686344, "grad_norm": 0.4916122465065151, "learning_rate": 6.170316301703163e-06, "loss": 0.4971, "step": 30447 }, { "epoch": 0.8889667454964818, "grad_norm": 0.5028642380981758, "learning_rate": 6.1686942416869425e-06, "loss": 0.5514, "step": 30448 }, { "epoch": 0.8889959417243292, "grad_norm": 0.5628760749454633, "learning_rate": 6.167072181670723e-06, "loss": 0.6545, "step": 30449 }, { "epoch": 0.8890251379521765, "grad_norm": 0.4680411851017888, "learning_rate": 6.165450121654502e-06, "loss": 0.4844, "step": 30450 }, { "epoch": 0.8890543341800239, "grad_norm": 0.563115066795237, "learning_rate": 6.163828061638281e-06, "loss": 0.6203, "step": 30451 }, { "epoch": 0.8890835304078712, "grad_norm": 0.5318616369796781, "learning_rate": 6.16220600162206e-06, "loss": 0.5769, "step": 30452 }, { "epoch": 0.8891127266357187, "grad_norm": 0.5208743916346247, "learning_rate": 6.1605839416058395e-06, "loss": 0.6156, "step": 30453 }, { "epoch": 0.8891419228635661, "grad_norm": 0.5368252519914977, "learning_rate": 6.1589618815896196e-06, "loss": 0.5887, "step": 30454 }, { "epoch": 0.8891711190914134, "grad_norm": 0.5078169078180708, "learning_rate": 6.157339821573399e-06, "loss": 0.5516, "step": 30455 }, { "epoch": 0.8892003153192608, "grad_norm": 0.550296193187286, "learning_rate": 6.155717761557178e-06, "loss": 0.6225, "step": 30456 }, { "epoch": 0.8892295115471082, "grad_norm": 0.5690206865587977, "learning_rate": 6.154095701540957e-06, "loss": 0.6877, "step": 30457 }, { "epoch": 0.8892587077749555, "grad_norm": 0.5429977627907234, "learning_rate": 6.1524736415247364e-06, "loss": 0.6118, "step": 30458 }, { "epoch": 0.8892879040028029, "grad_norm": 0.5630903079417527, "learning_rate": 6.1508515815085165e-06, "loss": 0.6178, "step": 30459 }, { "epoch": 0.8893171002306502, "grad_norm": 0.5214337633838959, "learning_rate": 6.149229521492295e-06, "loss": 0.58, "step": 30460 }, { "epoch": 0.8893462964584976, "grad_norm": 0.5435380456828008, "learning_rate": 6.147607461476075e-06, "loss": 0.6008, "step": 30461 }, { "epoch": 0.889375492686345, "grad_norm": 0.5464291288149035, "learning_rate": 6.145985401459854e-06, "loss": 0.585, "step": 30462 }, { "epoch": 0.8894046889141923, "grad_norm": 0.5797509853641563, "learning_rate": 6.144363341443634e-06, "loss": 0.6693, "step": 30463 }, { "epoch": 0.8894338851420397, "grad_norm": 0.525716418675856, "learning_rate": 6.1427412814274135e-06, "loss": 0.5421, "step": 30464 }, { "epoch": 0.889463081369887, "grad_norm": 0.5379952003242768, "learning_rate": 6.141119221411192e-06, "loss": 0.5723, "step": 30465 }, { "epoch": 0.8894922775977344, "grad_norm": 0.5139205564767617, "learning_rate": 6.139497161394972e-06, "loss": 0.5516, "step": 30466 }, { "epoch": 0.8895214738255818, "grad_norm": 0.5853922811948334, "learning_rate": 6.137875101378751e-06, "loss": 0.648, "step": 30467 }, { "epoch": 0.8895506700534291, "grad_norm": 0.5255147881369369, "learning_rate": 6.136253041362531e-06, "loss": 0.577, "step": 30468 }, { "epoch": 0.8895798662812765, "grad_norm": 0.5887701215296585, "learning_rate": 6.13463098134631e-06, "loss": 0.637, "step": 30469 }, { "epoch": 0.8896090625091239, "grad_norm": 0.49486166200627807, "learning_rate": 6.13300892133009e-06, "loss": 0.5184, "step": 30470 }, { "epoch": 0.8896382587369712, "grad_norm": 0.5532463779653782, "learning_rate": 6.131386861313869e-06, "loss": 0.6306, "step": 30471 }, { "epoch": 0.8896674549648186, "grad_norm": 0.5304934151564172, "learning_rate": 6.129764801297648e-06, "loss": 0.5508, "step": 30472 }, { "epoch": 0.8896966511926659, "grad_norm": 0.49849916992583504, "learning_rate": 6.128142741281428e-06, "loss": 0.5464, "step": 30473 }, { "epoch": 0.8897258474205133, "grad_norm": 0.5308026368833705, "learning_rate": 6.1265206812652065e-06, "loss": 0.5981, "step": 30474 }, { "epoch": 0.8897550436483607, "grad_norm": 0.4893702714661527, "learning_rate": 6.124898621248987e-06, "loss": 0.516, "step": 30475 }, { "epoch": 0.889784239876208, "grad_norm": 0.5189678453296251, "learning_rate": 6.123276561232766e-06, "loss": 0.5495, "step": 30476 }, { "epoch": 0.8898134361040554, "grad_norm": 0.5404494039435443, "learning_rate": 6.121654501216546e-06, "loss": 0.5691, "step": 30477 }, { "epoch": 0.8898426323319027, "grad_norm": 0.5080549706247942, "learning_rate": 6.120032441200325e-06, "loss": 0.5493, "step": 30478 }, { "epoch": 0.8898718285597501, "grad_norm": 0.5406142468461225, "learning_rate": 6.1184103811841035e-06, "loss": 0.6064, "step": 30479 }, { "epoch": 0.8899010247875975, "grad_norm": 0.5243501494178846, "learning_rate": 6.1167883211678835e-06, "loss": 0.556, "step": 30480 }, { "epoch": 0.8899302210154448, "grad_norm": 0.5323611170453363, "learning_rate": 6.115166261151663e-06, "loss": 0.6053, "step": 30481 }, { "epoch": 0.8899594172432922, "grad_norm": 0.49936652392914865, "learning_rate": 6.113544201135443e-06, "loss": 0.5518, "step": 30482 }, { "epoch": 0.8899886134711396, "grad_norm": 0.5196800525725673, "learning_rate": 6.111922141119222e-06, "loss": 0.5867, "step": 30483 }, { "epoch": 0.8900178096989869, "grad_norm": 0.5203004667598025, "learning_rate": 6.110300081103001e-06, "loss": 0.5837, "step": 30484 }, { "epoch": 0.8900470059268343, "grad_norm": 0.5349777756131009, "learning_rate": 6.1086780210867805e-06, "loss": 0.6039, "step": 30485 }, { "epoch": 0.8900762021546816, "grad_norm": 0.5128634356388593, "learning_rate": 6.10705596107056e-06, "loss": 0.5186, "step": 30486 }, { "epoch": 0.890105398382529, "grad_norm": 0.5324345065493055, "learning_rate": 6.10543390105434e-06, "loss": 0.5934, "step": 30487 }, { "epoch": 0.8901345946103764, "grad_norm": 0.5132107682041345, "learning_rate": 6.103811841038118e-06, "loss": 0.5456, "step": 30488 }, { "epoch": 0.8901637908382237, "grad_norm": 0.532624879590605, "learning_rate": 6.102189781021898e-06, "loss": 0.609, "step": 30489 }, { "epoch": 0.8901929870660711, "grad_norm": 0.5131661666038755, "learning_rate": 6.1005677210056774e-06, "loss": 0.5651, "step": 30490 }, { "epoch": 0.8902221832939184, "grad_norm": 0.509713990781005, "learning_rate": 6.098945660989457e-06, "loss": 0.516, "step": 30491 }, { "epoch": 0.8902513795217658, "grad_norm": 0.5266233923760127, "learning_rate": 6.097323600973237e-06, "loss": 0.6088, "step": 30492 }, { "epoch": 0.8902805757496132, "grad_norm": 0.5591421659785254, "learning_rate": 6.095701540957015e-06, "loss": 0.6302, "step": 30493 }, { "epoch": 0.8903097719774605, "grad_norm": 0.57285702353908, "learning_rate": 6.094079480940795e-06, "loss": 0.6371, "step": 30494 }, { "epoch": 0.8903389682053079, "grad_norm": 0.4875721469623338, "learning_rate": 6.092457420924574e-06, "loss": 0.4877, "step": 30495 }, { "epoch": 0.8903681644331553, "grad_norm": 0.5902416621160917, "learning_rate": 6.0908353609083545e-06, "loss": 0.6777, "step": 30496 }, { "epoch": 0.8903973606610026, "grad_norm": 0.5409019399770308, "learning_rate": 6.089213300892134e-06, "loss": 0.5599, "step": 30497 }, { "epoch": 0.89042655688885, "grad_norm": 0.5443831420874994, "learning_rate": 6.087591240875912e-06, "loss": 0.5747, "step": 30498 }, { "epoch": 0.8904557531166973, "grad_norm": 0.5693873528366521, "learning_rate": 6.085969180859692e-06, "loss": 0.6462, "step": 30499 }, { "epoch": 0.8904849493445447, "grad_norm": 0.5550606095795674, "learning_rate": 6.084347120843471e-06, "loss": 0.632, "step": 30500 }, { "epoch": 0.8905141455723921, "grad_norm": 0.5147050309554859, "learning_rate": 6.082725060827251e-06, "loss": 0.566, "step": 30501 }, { "epoch": 0.8905433418002394, "grad_norm": 0.5392604186194072, "learning_rate": 6.08110300081103e-06, "loss": 0.5572, "step": 30502 }, { "epoch": 0.8905725380280868, "grad_norm": 0.4995911949297225, "learning_rate": 6.07948094079481e-06, "loss": 0.4954, "step": 30503 }, { "epoch": 0.8906017342559341, "grad_norm": 0.5153323218195759, "learning_rate": 6.077858880778589e-06, "loss": 0.5449, "step": 30504 }, { "epoch": 0.8906309304837815, "grad_norm": 0.5219464229710727, "learning_rate": 6.076236820762368e-06, "loss": 0.5736, "step": 30505 }, { "epoch": 0.8906601267116289, "grad_norm": 0.5017903579329903, "learning_rate": 6.074614760746148e-06, "loss": 0.5301, "step": 30506 }, { "epoch": 0.8906893229394762, "grad_norm": 0.5153895999054072, "learning_rate": 6.072992700729927e-06, "loss": 0.548, "step": 30507 }, { "epoch": 0.8907185191673236, "grad_norm": 0.5156510804722453, "learning_rate": 6.071370640713707e-06, "loss": 0.5588, "step": 30508 }, { "epoch": 0.890747715395171, "grad_norm": 0.5198954641532536, "learning_rate": 6.069748580697486e-06, "loss": 0.583, "step": 30509 }, { "epoch": 0.8907769116230183, "grad_norm": 0.5080612003890197, "learning_rate": 6.068126520681266e-06, "loss": 0.5511, "step": 30510 }, { "epoch": 0.8908061078508657, "grad_norm": 0.5656849082764784, "learning_rate": 6.066504460665045e-06, "loss": 0.6279, "step": 30511 }, { "epoch": 0.890835304078713, "grad_norm": 0.5299201779657604, "learning_rate": 6.064882400648824e-06, "loss": 0.5936, "step": 30512 }, { "epoch": 0.8908645003065604, "grad_norm": 0.49716747279865176, "learning_rate": 6.063260340632604e-06, "loss": 0.5026, "step": 30513 }, { "epoch": 0.8908936965344078, "grad_norm": 0.4988782790269786, "learning_rate": 6.061638280616383e-06, "loss": 0.554, "step": 30514 }, { "epoch": 0.8909228927622551, "grad_norm": 0.5174573764576639, "learning_rate": 6.060016220600163e-06, "loss": 0.5617, "step": 30515 }, { "epoch": 0.8909520889901025, "grad_norm": 0.5524171377887955, "learning_rate": 6.058394160583942e-06, "loss": 0.5899, "step": 30516 }, { "epoch": 0.8909812852179498, "grad_norm": 0.5368515503588063, "learning_rate": 6.056772100567721e-06, "loss": 0.5773, "step": 30517 }, { "epoch": 0.8910104814457972, "grad_norm": 0.5012157322587831, "learning_rate": 6.055150040551501e-06, "loss": 0.5051, "step": 30518 }, { "epoch": 0.8910396776736446, "grad_norm": 0.5139166469375569, "learning_rate": 6.05352798053528e-06, "loss": 0.5808, "step": 30519 }, { "epoch": 0.8910688739014919, "grad_norm": 0.5398286213348661, "learning_rate": 6.05190592051906e-06, "loss": 0.5883, "step": 30520 }, { "epoch": 0.8910980701293393, "grad_norm": 0.5380095773281826, "learning_rate": 6.050283860502838e-06, "loss": 0.5594, "step": 30521 }, { "epoch": 0.8911272663571866, "grad_norm": 0.5300371460807926, "learning_rate": 6.0486618004866184e-06, "loss": 0.5853, "step": 30522 }, { "epoch": 0.891156462585034, "grad_norm": 0.52593819479939, "learning_rate": 6.047039740470398e-06, "loss": 0.5895, "step": 30523 }, { "epoch": 0.8911856588128814, "grad_norm": 0.5726171045652104, "learning_rate": 6.045417680454177e-06, "loss": 0.6981, "step": 30524 }, { "epoch": 0.8912148550407287, "grad_norm": 0.5641287067458041, "learning_rate": 6.043795620437957e-06, "loss": 0.6785, "step": 30525 }, { "epoch": 0.8912440512685761, "grad_norm": 0.5096930867539973, "learning_rate": 6.042173560421735e-06, "loss": 0.5217, "step": 30526 }, { "epoch": 0.8912732474964234, "grad_norm": 0.5475058064782151, "learning_rate": 6.040551500405515e-06, "loss": 0.657, "step": 30527 }, { "epoch": 0.8913024437242708, "grad_norm": 0.508480819304929, "learning_rate": 6.038929440389295e-06, "loss": 0.5284, "step": 30528 }, { "epoch": 0.8913316399521182, "grad_norm": 0.5378521680344157, "learning_rate": 6.037307380373075e-06, "loss": 0.6033, "step": 30529 }, { "epoch": 0.8913608361799655, "grad_norm": 0.5478275432694915, "learning_rate": 6.035685320356854e-06, "loss": 0.628, "step": 30530 }, { "epoch": 0.8913900324078129, "grad_norm": 0.5284547096855574, "learning_rate": 6.034063260340632e-06, "loss": 0.5864, "step": 30531 }, { "epoch": 0.8914192286356603, "grad_norm": 0.5342577858956491, "learning_rate": 6.032441200324412e-06, "loss": 0.5851, "step": 30532 }, { "epoch": 0.8914484248635076, "grad_norm": 0.514961903104316, "learning_rate": 6.0308191403081916e-06, "loss": 0.5462, "step": 30533 }, { "epoch": 0.891477621091355, "grad_norm": 0.5156820754043767, "learning_rate": 6.029197080291972e-06, "loss": 0.552, "step": 30534 }, { "epoch": 0.8915068173192023, "grad_norm": 0.5338024897571807, "learning_rate": 6.02757502027575e-06, "loss": 0.5489, "step": 30535 }, { "epoch": 0.8915360135470497, "grad_norm": 0.5464706632753238, "learning_rate": 6.02595296025953e-06, "loss": 0.6401, "step": 30536 }, { "epoch": 0.8915652097748971, "grad_norm": 0.5640964026072194, "learning_rate": 6.024330900243309e-06, "loss": 0.6038, "step": 30537 }, { "epoch": 0.8915944060027444, "grad_norm": 0.5038007421306304, "learning_rate": 6.0227088402270885e-06, "loss": 0.5662, "step": 30538 }, { "epoch": 0.8916236022305918, "grad_norm": 0.5501201324117698, "learning_rate": 6.021086780210869e-06, "loss": 0.5918, "step": 30539 }, { "epoch": 0.8916527984584391, "grad_norm": 0.5130728694936745, "learning_rate": 6.019464720194647e-06, "loss": 0.5527, "step": 30540 }, { "epoch": 0.8916819946862865, "grad_norm": 0.5422214999646736, "learning_rate": 6.017842660178427e-06, "loss": 0.638, "step": 30541 }, { "epoch": 0.8917111909141339, "grad_norm": 0.47685635476515675, "learning_rate": 6.016220600162206e-06, "loss": 0.4875, "step": 30542 }, { "epoch": 0.8917403871419812, "grad_norm": 0.49524530305444847, "learning_rate": 6.014598540145986e-06, "loss": 0.5253, "step": 30543 }, { "epoch": 0.8917695833698286, "grad_norm": 0.583368531244087, "learning_rate": 6.0129764801297655e-06, "loss": 0.6552, "step": 30544 }, { "epoch": 0.891798779597676, "grad_norm": 0.4954752475423527, "learning_rate": 6.011354420113544e-06, "loss": 0.5337, "step": 30545 }, { "epoch": 0.8918279758255233, "grad_norm": 0.594327204804811, "learning_rate": 6.009732360097324e-06, "loss": 0.6661, "step": 30546 }, { "epoch": 0.8918571720533707, "grad_norm": 0.5437789518399566, "learning_rate": 6.008110300081103e-06, "loss": 0.5718, "step": 30547 }, { "epoch": 0.891886368281218, "grad_norm": 0.5087947661591037, "learning_rate": 6.006488240064883e-06, "loss": 0.5172, "step": 30548 }, { "epoch": 0.8919155645090654, "grad_norm": 0.476318399618844, "learning_rate": 6.004866180048662e-06, "loss": 0.5067, "step": 30549 }, { "epoch": 0.8919447607369128, "grad_norm": 0.5285078507280113, "learning_rate": 6.003244120032441e-06, "loss": 0.5989, "step": 30550 }, { "epoch": 0.8919739569647601, "grad_norm": 0.5116246466697588, "learning_rate": 6.001622060016221e-06, "loss": 0.5309, "step": 30551 }, { "epoch": 0.8920031531926075, "grad_norm": 0.5668957266245996, "learning_rate": 6e-06, "loss": 0.6487, "step": 30552 }, { "epoch": 0.8920323494204548, "grad_norm": 0.4730854650486772, "learning_rate": 5.99837793998378e-06, "loss": 0.4825, "step": 30553 }, { "epoch": 0.8920615456483022, "grad_norm": 0.5285688767870602, "learning_rate": 5.996755879967559e-06, "loss": 0.5823, "step": 30554 }, { "epoch": 0.8920907418761496, "grad_norm": 0.5515562868113945, "learning_rate": 5.995133819951339e-06, "loss": 0.6315, "step": 30555 }, { "epoch": 0.8921199381039969, "grad_norm": 0.49663702619161765, "learning_rate": 5.993511759935118e-06, "loss": 0.4964, "step": 30556 }, { "epoch": 0.8921491343318443, "grad_norm": 0.5406362407754869, "learning_rate": 5.991889699918897e-06, "loss": 0.6015, "step": 30557 }, { "epoch": 0.8921783305596916, "grad_norm": 0.4649794666607428, "learning_rate": 5.990267639902677e-06, "loss": 0.4503, "step": 30558 }, { "epoch": 0.892207526787539, "grad_norm": 0.5908044435785106, "learning_rate": 5.9886455798864555e-06, "loss": 0.6768, "step": 30559 }, { "epoch": 0.8922367230153864, "grad_norm": 0.5569504071478096, "learning_rate": 5.987023519870236e-06, "loss": 0.6439, "step": 30560 }, { "epoch": 0.8922659192432337, "grad_norm": 0.4936837537384472, "learning_rate": 5.985401459854015e-06, "loss": 0.5141, "step": 30561 }, { "epoch": 0.8922951154710811, "grad_norm": 0.5399727329585366, "learning_rate": 5.983779399837795e-06, "loss": 0.632, "step": 30562 }, { "epoch": 0.8923243116989285, "grad_norm": 0.5171254942228368, "learning_rate": 5.982157339821573e-06, "loss": 0.5971, "step": 30563 }, { "epoch": 0.8923535079267758, "grad_norm": 0.5182711285346412, "learning_rate": 5.9805352798053525e-06, "loss": 0.5278, "step": 30564 }, { "epoch": 0.8923827041546232, "grad_norm": 0.525070712665011, "learning_rate": 5.9789132197891326e-06, "loss": 0.5866, "step": 30565 }, { "epoch": 0.8924119003824705, "grad_norm": 0.5542232622647757, "learning_rate": 5.977291159772912e-06, "loss": 0.6292, "step": 30566 }, { "epoch": 0.8924410966103179, "grad_norm": 0.5736860938765331, "learning_rate": 5.975669099756692e-06, "loss": 0.664, "step": 30567 }, { "epoch": 0.8924702928381653, "grad_norm": 0.5458068715067699, "learning_rate": 5.97404703974047e-06, "loss": 0.5929, "step": 30568 }, { "epoch": 0.8924994890660126, "grad_norm": 0.46932334287928457, "learning_rate": 5.97242497972425e-06, "loss": 0.495, "step": 30569 }, { "epoch": 0.89252868529386, "grad_norm": 0.5326708803184863, "learning_rate": 5.9708029197080295e-06, "loss": 0.5445, "step": 30570 }, { "epoch": 0.8925578815217073, "grad_norm": 0.5404955241937539, "learning_rate": 5.969180859691809e-06, "loss": 0.5747, "step": 30571 }, { "epoch": 0.8925870777495547, "grad_norm": 0.49348352355893205, "learning_rate": 5.967558799675589e-06, "loss": 0.52, "step": 30572 }, { "epoch": 0.8926162739774022, "grad_norm": 0.5907605715668006, "learning_rate": 5.965936739659367e-06, "loss": 0.7116, "step": 30573 }, { "epoch": 0.8926454702052495, "grad_norm": 0.5128165670258616, "learning_rate": 5.964314679643147e-06, "loss": 0.5133, "step": 30574 }, { "epoch": 0.8926746664330969, "grad_norm": 0.5199381827097513, "learning_rate": 5.9626926196269265e-06, "loss": 0.5729, "step": 30575 }, { "epoch": 0.8927038626609443, "grad_norm": 0.5347229398578386, "learning_rate": 5.961070559610706e-06, "loss": 0.6046, "step": 30576 }, { "epoch": 0.8927330588887916, "grad_norm": 0.5253430272903411, "learning_rate": 5.959448499594486e-06, "loss": 0.5876, "step": 30577 }, { "epoch": 0.892762255116639, "grad_norm": 0.5294235118914886, "learning_rate": 5.957826439578264e-06, "loss": 0.5926, "step": 30578 }, { "epoch": 0.8927914513444863, "grad_norm": 0.5164777625685878, "learning_rate": 5.956204379562044e-06, "loss": 0.5445, "step": 30579 }, { "epoch": 0.8928206475723337, "grad_norm": 0.5019139326366753, "learning_rate": 5.954582319545823e-06, "loss": 0.5425, "step": 30580 }, { "epoch": 0.8928498438001811, "grad_norm": 0.5031484345765399, "learning_rate": 5.9529602595296035e-06, "loss": 0.5222, "step": 30581 }, { "epoch": 0.8928790400280284, "grad_norm": 0.5040152824871188, "learning_rate": 5.951338199513382e-06, "loss": 0.556, "step": 30582 }, { "epoch": 0.8929082362558758, "grad_norm": 0.5191501807272969, "learning_rate": 5.949716139497161e-06, "loss": 0.5613, "step": 30583 }, { "epoch": 0.8929374324837231, "grad_norm": 0.48318092213973524, "learning_rate": 5.948094079480941e-06, "loss": 0.5131, "step": 30584 }, { "epoch": 0.8929666287115705, "grad_norm": 0.500147260019401, "learning_rate": 5.94647201946472e-06, "loss": 0.5182, "step": 30585 }, { "epoch": 0.8929958249394179, "grad_norm": 0.5550806298701532, "learning_rate": 5.9448499594485004e-06, "loss": 0.6716, "step": 30586 }, { "epoch": 0.8930250211672652, "grad_norm": 0.5329541870047461, "learning_rate": 5.943227899432279e-06, "loss": 0.5842, "step": 30587 }, { "epoch": 0.8930542173951126, "grad_norm": 0.5253593697056087, "learning_rate": 5.941605839416059e-06, "loss": 0.59, "step": 30588 }, { "epoch": 0.89308341362296, "grad_norm": 0.5265507080672747, "learning_rate": 5.939983779399838e-06, "loss": 0.5565, "step": 30589 }, { "epoch": 0.8931126098508073, "grad_norm": 0.5544925940961768, "learning_rate": 5.938361719383617e-06, "loss": 0.64, "step": 30590 }, { "epoch": 0.8931418060786547, "grad_norm": 0.5633058383345476, "learning_rate": 5.936739659367397e-06, "loss": 0.6455, "step": 30591 }, { "epoch": 0.893171002306502, "grad_norm": 0.5138071088486994, "learning_rate": 5.935117599351176e-06, "loss": 0.5397, "step": 30592 }, { "epoch": 0.8932001985343494, "grad_norm": 0.5416662561132205, "learning_rate": 5.933495539334956e-06, "loss": 0.59, "step": 30593 }, { "epoch": 0.8932293947621968, "grad_norm": 0.5302090275133734, "learning_rate": 5.931873479318735e-06, "loss": 0.5644, "step": 30594 }, { "epoch": 0.8932585909900441, "grad_norm": 0.5012437708858909, "learning_rate": 5.930251419302515e-06, "loss": 0.5193, "step": 30595 }, { "epoch": 0.8932877872178915, "grad_norm": 0.5102682807882406, "learning_rate": 5.9286293592862935e-06, "loss": 0.5442, "step": 30596 }, { "epoch": 0.8933169834457388, "grad_norm": 0.529688579292703, "learning_rate": 5.927007299270073e-06, "loss": 0.6011, "step": 30597 }, { "epoch": 0.8933461796735862, "grad_norm": 0.5565167988066464, "learning_rate": 5.925385239253853e-06, "loss": 0.6318, "step": 30598 }, { "epoch": 0.8933753759014336, "grad_norm": 0.576686512697969, "learning_rate": 5.923763179237632e-06, "loss": 0.6174, "step": 30599 }, { "epoch": 0.8934045721292809, "grad_norm": 0.4646548842457667, "learning_rate": 5.922141119221412e-06, "loss": 0.4674, "step": 30600 }, { "epoch": 0.8934337683571283, "grad_norm": 0.5420539863574108, "learning_rate": 5.9205190592051904e-06, "loss": 0.6183, "step": 30601 }, { "epoch": 0.8934629645849756, "grad_norm": 0.5147539780494661, "learning_rate": 5.9188969991889705e-06, "loss": 0.5554, "step": 30602 }, { "epoch": 0.893492160812823, "grad_norm": 0.540400350047128, "learning_rate": 5.91727493917275e-06, "loss": 0.6046, "step": 30603 }, { "epoch": 0.8935213570406704, "grad_norm": 0.5920448321300686, "learning_rate": 5.915652879156529e-06, "loss": 0.6883, "step": 30604 }, { "epoch": 0.8935505532685177, "grad_norm": 0.5611509894817756, "learning_rate": 5.914030819140309e-06, "loss": 0.6377, "step": 30605 }, { "epoch": 0.8935797494963651, "grad_norm": 0.5351925976327789, "learning_rate": 5.912408759124087e-06, "loss": 0.5817, "step": 30606 }, { "epoch": 0.8936089457242125, "grad_norm": 0.5450246756306276, "learning_rate": 5.9107866991078675e-06, "loss": 0.6296, "step": 30607 }, { "epoch": 0.8936381419520598, "grad_norm": 0.5220817770239744, "learning_rate": 5.909164639091647e-06, "loss": 0.5994, "step": 30608 }, { "epoch": 0.8936673381799072, "grad_norm": 0.5838545168774324, "learning_rate": 5.907542579075426e-06, "loss": 0.6152, "step": 30609 }, { "epoch": 0.8936965344077545, "grad_norm": 0.6142401554037644, "learning_rate": 5.905920519059205e-06, "loss": 0.6732, "step": 30610 }, { "epoch": 0.8937257306356019, "grad_norm": 0.5121534435078828, "learning_rate": 5.904298459042984e-06, "loss": 0.5582, "step": 30611 }, { "epoch": 0.8937549268634493, "grad_norm": 0.526097598758079, "learning_rate": 5.902676399026764e-06, "loss": 0.5824, "step": 30612 }, { "epoch": 0.8937841230912966, "grad_norm": 0.5015703282556897, "learning_rate": 5.901054339010544e-06, "loss": 0.5297, "step": 30613 }, { "epoch": 0.893813319319144, "grad_norm": 0.5408284440162413, "learning_rate": 5.899432278994324e-06, "loss": 0.5332, "step": 30614 }, { "epoch": 0.8938425155469913, "grad_norm": 0.4970061184744379, "learning_rate": 5.897810218978102e-06, "loss": 0.5357, "step": 30615 }, { "epoch": 0.8938717117748387, "grad_norm": 0.5394001046434935, "learning_rate": 5.896188158961881e-06, "loss": 0.6283, "step": 30616 }, { "epoch": 0.8939009080026861, "grad_norm": 0.5130727306325684, "learning_rate": 5.894566098945661e-06, "loss": 0.5404, "step": 30617 }, { "epoch": 0.8939301042305334, "grad_norm": 0.5627194020083763, "learning_rate": 5.892944038929441e-06, "loss": 0.6209, "step": 30618 }, { "epoch": 0.8939593004583808, "grad_norm": 0.5230457158373943, "learning_rate": 5.891321978913221e-06, "loss": 0.5663, "step": 30619 }, { "epoch": 0.8939884966862282, "grad_norm": 0.5134924856271521, "learning_rate": 5.889699918896999e-06, "loss": 0.5282, "step": 30620 }, { "epoch": 0.8940176929140755, "grad_norm": 0.5477208488953742, "learning_rate": 5.888077858880779e-06, "loss": 0.6242, "step": 30621 }, { "epoch": 0.8940468891419229, "grad_norm": 0.537745357388453, "learning_rate": 5.886455798864558e-06, "loss": 0.5709, "step": 30622 }, { "epoch": 0.8940760853697702, "grad_norm": 0.5053163069864757, "learning_rate": 5.8848337388483375e-06, "loss": 0.5583, "step": 30623 }, { "epoch": 0.8941052815976176, "grad_norm": 0.5237829670849931, "learning_rate": 5.883211678832117e-06, "loss": 0.5217, "step": 30624 }, { "epoch": 0.894134477825465, "grad_norm": 0.5120908265189351, "learning_rate": 5.881589618815896e-06, "loss": 0.5119, "step": 30625 }, { "epoch": 0.8941636740533123, "grad_norm": 0.5251386616572452, "learning_rate": 5.879967558799676e-06, "loss": 0.5658, "step": 30626 }, { "epoch": 0.8941928702811597, "grad_norm": 0.5111651886075287, "learning_rate": 5.878345498783455e-06, "loss": 0.5781, "step": 30627 }, { "epoch": 0.894222066509007, "grad_norm": 0.5001249686434243, "learning_rate": 5.876723438767235e-06, "loss": 0.5197, "step": 30628 }, { "epoch": 0.8942512627368544, "grad_norm": 0.5478082553956025, "learning_rate": 5.875101378751014e-06, "loss": 0.6317, "step": 30629 }, { "epoch": 0.8942804589647018, "grad_norm": 0.5300863799528306, "learning_rate": 5.873479318734793e-06, "loss": 0.5783, "step": 30630 }, { "epoch": 0.8943096551925491, "grad_norm": 0.4972486297237757, "learning_rate": 5.871857258718573e-06, "loss": 0.5139, "step": 30631 }, { "epoch": 0.8943388514203965, "grad_norm": 0.5329672686112156, "learning_rate": 5.870235198702352e-06, "loss": 0.6072, "step": 30632 }, { "epoch": 0.8943680476482438, "grad_norm": 0.524208095090796, "learning_rate": 5.868613138686132e-06, "loss": 0.5731, "step": 30633 }, { "epoch": 0.8943972438760912, "grad_norm": 0.5168069226017978, "learning_rate": 5.866991078669911e-06, "loss": 0.5909, "step": 30634 }, { "epoch": 0.8944264401039386, "grad_norm": 0.5140324142427878, "learning_rate": 5.86536901865369e-06, "loss": 0.521, "step": 30635 }, { "epoch": 0.8944556363317859, "grad_norm": 0.5515420561377204, "learning_rate": 5.86374695863747e-06, "loss": 0.6097, "step": 30636 }, { "epoch": 0.8944848325596333, "grad_norm": 0.5109565403458308, "learning_rate": 5.862124898621249e-06, "loss": 0.5439, "step": 30637 }, { "epoch": 0.8945140287874807, "grad_norm": 0.563007623733056, "learning_rate": 5.860502838605029e-06, "loss": 0.6369, "step": 30638 }, { "epoch": 0.894543225015328, "grad_norm": 0.5134743091708597, "learning_rate": 5.858880778588808e-06, "loss": 0.5572, "step": 30639 }, { "epoch": 0.8945724212431754, "grad_norm": 0.5140245736353855, "learning_rate": 5.857258718572588e-06, "loss": 0.5699, "step": 30640 }, { "epoch": 0.8946016174710227, "grad_norm": 0.5544920217963347, "learning_rate": 5.855636658556367e-06, "loss": 0.5788, "step": 30641 }, { "epoch": 0.8946308136988701, "grad_norm": 0.5845492865162426, "learning_rate": 5.854014598540146e-06, "loss": 0.6331, "step": 30642 }, { "epoch": 0.8946600099267175, "grad_norm": 0.5576480800605454, "learning_rate": 5.852392538523925e-06, "loss": 0.6486, "step": 30643 }, { "epoch": 0.8946892061545648, "grad_norm": 0.5040599729953034, "learning_rate": 5.8507704785077046e-06, "loss": 0.5272, "step": 30644 }, { "epoch": 0.8947184023824122, "grad_norm": 0.5239640897227075, "learning_rate": 5.849148418491485e-06, "loss": 0.5687, "step": 30645 }, { "epoch": 0.8947475986102595, "grad_norm": 0.5014277299827721, "learning_rate": 5.847526358475264e-06, "loss": 0.5402, "step": 30646 }, { "epoch": 0.8947767948381069, "grad_norm": 0.521454022454019, "learning_rate": 5.845904298459044e-06, "loss": 0.5791, "step": 30647 }, { "epoch": 0.8948059910659543, "grad_norm": 0.5290676984175352, "learning_rate": 5.844282238442822e-06, "loss": 0.5238, "step": 30648 }, { "epoch": 0.8948351872938016, "grad_norm": 0.5161153908652596, "learning_rate": 5.8426601784266015e-06, "loss": 0.5898, "step": 30649 }, { "epoch": 0.894864383521649, "grad_norm": 0.5459641635642847, "learning_rate": 5.841038118410382e-06, "loss": 0.6374, "step": 30650 }, { "epoch": 0.8948935797494963, "grad_norm": 0.5618268901357946, "learning_rate": 5.839416058394161e-06, "loss": 0.6346, "step": 30651 }, { "epoch": 0.8949227759773437, "grad_norm": 0.5145846025390217, "learning_rate": 5.837793998377941e-06, "loss": 0.5705, "step": 30652 }, { "epoch": 0.8949519722051911, "grad_norm": 0.5653797168369692, "learning_rate": 5.836171938361719e-06, "loss": 0.6302, "step": 30653 }, { "epoch": 0.8949811684330384, "grad_norm": 0.5224848045824694, "learning_rate": 5.834549878345499e-06, "loss": 0.6174, "step": 30654 }, { "epoch": 0.8950103646608858, "grad_norm": 0.5427333936856792, "learning_rate": 5.8329278183292785e-06, "loss": 0.5458, "step": 30655 }, { "epoch": 0.8950395608887332, "grad_norm": 0.5213075256668387, "learning_rate": 5.831305758313058e-06, "loss": 0.573, "step": 30656 }, { "epoch": 0.8950687571165805, "grad_norm": 0.5644517521202534, "learning_rate": 5.829683698296837e-06, "loss": 0.7228, "step": 30657 }, { "epoch": 0.8950979533444279, "grad_norm": 0.5168475318741155, "learning_rate": 5.828061638280616e-06, "loss": 0.5622, "step": 30658 }, { "epoch": 0.8951271495722752, "grad_norm": 0.5143819924723935, "learning_rate": 5.826439578264396e-06, "loss": 0.5663, "step": 30659 }, { "epoch": 0.8951563458001226, "grad_norm": 0.5276551042059084, "learning_rate": 5.8248175182481755e-06, "loss": 0.5751, "step": 30660 }, { "epoch": 0.89518554202797, "grad_norm": 0.5534576151202576, "learning_rate": 5.8231954582319556e-06, "loss": 0.5664, "step": 30661 }, { "epoch": 0.8952147382558173, "grad_norm": 0.546041714568152, "learning_rate": 5.821573398215734e-06, "loss": 0.5909, "step": 30662 }, { "epoch": 0.8952439344836647, "grad_norm": 0.5183911606376548, "learning_rate": 5.819951338199513e-06, "loss": 0.5586, "step": 30663 }, { "epoch": 0.895273130711512, "grad_norm": 0.5468485803138055, "learning_rate": 5.818329278183293e-06, "loss": 0.5988, "step": 30664 }, { "epoch": 0.8953023269393594, "grad_norm": 0.5172370901117832, "learning_rate": 5.8167072181670724e-06, "loss": 0.562, "step": 30665 }, { "epoch": 0.8953315231672068, "grad_norm": 0.5424592114345668, "learning_rate": 5.8150851581508525e-06, "loss": 0.533, "step": 30666 }, { "epoch": 0.8953607193950541, "grad_norm": 0.5014579456975051, "learning_rate": 5.813463098134631e-06, "loss": 0.5107, "step": 30667 }, { "epoch": 0.8953899156229015, "grad_norm": 0.5236761850686754, "learning_rate": 5.81184103811841e-06, "loss": 0.5571, "step": 30668 }, { "epoch": 0.8954191118507489, "grad_norm": 0.5464113645508252, "learning_rate": 5.81021897810219e-06, "loss": 0.6295, "step": 30669 }, { "epoch": 0.8954483080785962, "grad_norm": 0.5275016705986425, "learning_rate": 5.808596918085969e-06, "loss": 0.5659, "step": 30670 }, { "epoch": 0.8954775043064436, "grad_norm": 0.5087481074668397, "learning_rate": 5.806974858069749e-06, "loss": 0.5435, "step": 30671 }, { "epoch": 0.8955067005342909, "grad_norm": 0.535007652324479, "learning_rate": 5.805352798053528e-06, "loss": 0.606, "step": 30672 }, { "epoch": 0.8955358967621383, "grad_norm": 0.5309838079266085, "learning_rate": 5.803730738037308e-06, "loss": 0.5642, "step": 30673 }, { "epoch": 0.8955650929899857, "grad_norm": 0.5614931103092193, "learning_rate": 5.802108678021087e-06, "loss": 0.6559, "step": 30674 }, { "epoch": 0.895594289217833, "grad_norm": 0.5297496652133813, "learning_rate": 5.800486618004866e-06, "loss": 0.5681, "step": 30675 }, { "epoch": 0.8956234854456804, "grad_norm": 0.5229673127392579, "learning_rate": 5.7988645579886456e-06, "loss": 0.5335, "step": 30676 }, { "epoch": 0.8956526816735277, "grad_norm": 0.4845302350424903, "learning_rate": 5.797242497972425e-06, "loss": 0.482, "step": 30677 }, { "epoch": 0.8956818779013751, "grad_norm": 0.539534836862927, "learning_rate": 5.795620437956205e-06, "loss": 0.6034, "step": 30678 }, { "epoch": 0.8957110741292225, "grad_norm": 0.48330703708023504, "learning_rate": 5.793998377939984e-06, "loss": 0.5162, "step": 30679 }, { "epoch": 0.8957402703570698, "grad_norm": 0.48488509441120253, "learning_rate": 5.792376317923764e-06, "loss": 0.4755, "step": 30680 }, { "epoch": 0.8957694665849172, "grad_norm": 0.49014236222607355, "learning_rate": 5.7907542579075425e-06, "loss": 0.5348, "step": 30681 }, { "epoch": 0.8957986628127645, "grad_norm": 0.5828209479605293, "learning_rate": 5.789132197891322e-06, "loss": 0.6244, "step": 30682 }, { "epoch": 0.8958278590406119, "grad_norm": 0.5085596202056347, "learning_rate": 5.787510137875102e-06, "loss": 0.5265, "step": 30683 }, { "epoch": 0.8958570552684593, "grad_norm": 0.5288996015121255, "learning_rate": 5.785888077858881e-06, "loss": 0.6113, "step": 30684 }, { "epoch": 0.8958862514963066, "grad_norm": 0.49366400349437783, "learning_rate": 5.78426601784266e-06, "loss": 0.49, "step": 30685 }, { "epoch": 0.895915447724154, "grad_norm": 0.5465241056396591, "learning_rate": 5.7826439578264395e-06, "loss": 0.6077, "step": 30686 }, { "epoch": 0.8959446439520014, "grad_norm": 0.5482352067827125, "learning_rate": 5.7810218978102195e-06, "loss": 0.5955, "step": 30687 }, { "epoch": 0.8959738401798487, "grad_norm": 0.4979878042542624, "learning_rate": 5.779399837793999e-06, "loss": 0.5508, "step": 30688 }, { "epoch": 0.8960030364076961, "grad_norm": 0.48687133118220643, "learning_rate": 5.777777777777778e-06, "loss": 0.5074, "step": 30689 }, { "epoch": 0.8960322326355434, "grad_norm": 0.5659630422838714, "learning_rate": 5.776155717761557e-06, "loss": 0.6705, "step": 30690 }, { "epoch": 0.8960614288633908, "grad_norm": 0.5578950932901094, "learning_rate": 5.7745336577453364e-06, "loss": 0.6686, "step": 30691 }, { "epoch": 0.8960906250912382, "grad_norm": 0.5986602468742636, "learning_rate": 5.7729115977291165e-06, "loss": 0.6571, "step": 30692 }, { "epoch": 0.8961198213190855, "grad_norm": 0.5297649744598084, "learning_rate": 5.771289537712896e-06, "loss": 0.5822, "step": 30693 }, { "epoch": 0.896149017546933, "grad_norm": 0.5595112361454067, "learning_rate": 5.769667477696676e-06, "loss": 0.6694, "step": 30694 }, { "epoch": 0.8961782137747804, "grad_norm": 0.5259118079814776, "learning_rate": 5.768045417680454e-06, "loss": 0.5818, "step": 30695 }, { "epoch": 0.8962074100026277, "grad_norm": 0.5415651128231291, "learning_rate": 5.766423357664233e-06, "loss": 0.6005, "step": 30696 }, { "epoch": 0.8962366062304751, "grad_norm": 0.540799016656355, "learning_rate": 5.7648012976480134e-06, "loss": 0.572, "step": 30697 }, { "epoch": 0.8962658024583224, "grad_norm": 0.5154797677762083, "learning_rate": 5.763179237631793e-06, "loss": 0.5568, "step": 30698 }, { "epoch": 0.8962949986861698, "grad_norm": 0.5416677351305057, "learning_rate": 5.761557177615573e-06, "loss": 0.6105, "step": 30699 }, { "epoch": 0.8963241949140172, "grad_norm": 0.49770316259095443, "learning_rate": 5.759935117599351e-06, "loss": 0.5272, "step": 30700 }, { "epoch": 0.8963533911418645, "grad_norm": 0.5527128165736519, "learning_rate": 5.75831305758313e-06, "loss": 0.5959, "step": 30701 }, { "epoch": 0.8963825873697119, "grad_norm": 0.5276192804281703, "learning_rate": 5.75669099756691e-06, "loss": 0.6023, "step": 30702 }, { "epoch": 0.8964117835975592, "grad_norm": 0.5028635386504217, "learning_rate": 5.75506893755069e-06, "loss": 0.5326, "step": 30703 }, { "epoch": 0.8964409798254066, "grad_norm": 0.557773160533712, "learning_rate": 5.753446877534469e-06, "loss": 0.6374, "step": 30704 }, { "epoch": 0.896470176053254, "grad_norm": 0.4925311277966195, "learning_rate": 5.751824817518248e-06, "loss": 0.4954, "step": 30705 }, { "epoch": 0.8964993722811013, "grad_norm": 0.5428809122589617, "learning_rate": 5.750202757502028e-06, "loss": 0.6278, "step": 30706 }, { "epoch": 0.8965285685089487, "grad_norm": 0.5299467663221742, "learning_rate": 5.748580697485807e-06, "loss": 0.579, "step": 30707 }, { "epoch": 0.896557764736796, "grad_norm": 0.5215473938554787, "learning_rate": 5.7469586374695866e-06, "loss": 0.5876, "step": 30708 }, { "epoch": 0.8965869609646434, "grad_norm": 0.5267057494538108, "learning_rate": 5.745336577453366e-06, "loss": 0.56, "step": 30709 }, { "epoch": 0.8966161571924908, "grad_norm": 0.5241155334322725, "learning_rate": 5.743714517437145e-06, "loss": 0.5802, "step": 30710 }, { "epoch": 0.8966453534203381, "grad_norm": 0.5317806488421725, "learning_rate": 5.742092457420925e-06, "loss": 0.5483, "step": 30711 }, { "epoch": 0.8966745496481855, "grad_norm": 0.4940573211171655, "learning_rate": 5.740470397404704e-06, "loss": 0.5319, "step": 30712 }, { "epoch": 0.8967037458760329, "grad_norm": 0.5375029917155371, "learning_rate": 5.738848337388484e-06, "loss": 0.568, "step": 30713 }, { "epoch": 0.8967329421038802, "grad_norm": 0.5124972824122529, "learning_rate": 5.737226277372263e-06, "loss": 0.5163, "step": 30714 }, { "epoch": 0.8967621383317276, "grad_norm": 0.529026133449172, "learning_rate": 5.735604217356042e-06, "loss": 0.5701, "step": 30715 }, { "epoch": 0.8967913345595749, "grad_norm": 0.575582851260696, "learning_rate": 5.733982157339822e-06, "loss": 0.66, "step": 30716 }, { "epoch": 0.8968205307874223, "grad_norm": 0.5895794033052342, "learning_rate": 5.732360097323601e-06, "loss": 0.6628, "step": 30717 }, { "epoch": 0.8968497270152697, "grad_norm": 0.5806101568099205, "learning_rate": 5.7307380373073805e-06, "loss": 0.6871, "step": 30718 }, { "epoch": 0.896878923243117, "grad_norm": 0.5172777423826758, "learning_rate": 5.72911597729116e-06, "loss": 0.5646, "step": 30719 }, { "epoch": 0.8969081194709644, "grad_norm": 0.5460685131083336, "learning_rate": 5.72749391727494e-06, "loss": 0.5799, "step": 30720 }, { "epoch": 0.8969373156988117, "grad_norm": 0.5598733558659055, "learning_rate": 5.725871857258719e-06, "loss": 0.6483, "step": 30721 }, { "epoch": 0.8969665119266591, "grad_norm": 0.5252563133534982, "learning_rate": 5.724249797242498e-06, "loss": 0.616, "step": 30722 }, { "epoch": 0.8969957081545065, "grad_norm": 0.5444085023916492, "learning_rate": 5.7226277372262774e-06, "loss": 0.5872, "step": 30723 }, { "epoch": 0.8970249043823538, "grad_norm": 0.484535304493271, "learning_rate": 5.721005677210057e-06, "loss": 0.5002, "step": 30724 }, { "epoch": 0.8970541006102012, "grad_norm": 0.5223318328199831, "learning_rate": 5.719383617193837e-06, "loss": 0.5628, "step": 30725 }, { "epoch": 0.8970832968380485, "grad_norm": 0.4949804466391009, "learning_rate": 5.717761557177616e-06, "loss": 0.5258, "step": 30726 }, { "epoch": 0.8971124930658959, "grad_norm": 0.5455848363422099, "learning_rate": 5.716139497161395e-06, "loss": 0.6093, "step": 30727 }, { "epoch": 0.8971416892937433, "grad_norm": 0.49141972718436355, "learning_rate": 5.714517437145174e-06, "loss": 0.5191, "step": 30728 }, { "epoch": 0.8971708855215906, "grad_norm": 0.5209895193542174, "learning_rate": 5.712895377128954e-06, "loss": 0.5831, "step": 30729 }, { "epoch": 0.897200081749438, "grad_norm": 0.525060343695118, "learning_rate": 5.711273317112734e-06, "loss": 0.5556, "step": 30730 }, { "epoch": 0.8972292779772854, "grad_norm": 0.5004646627435209, "learning_rate": 5.709651257096513e-06, "loss": 0.5601, "step": 30731 }, { "epoch": 0.8972584742051327, "grad_norm": 0.5164490202944149, "learning_rate": 5.708029197080292e-06, "loss": 0.5495, "step": 30732 }, { "epoch": 0.8972876704329801, "grad_norm": 0.5115765283080707, "learning_rate": 5.706407137064071e-06, "loss": 0.5231, "step": 30733 }, { "epoch": 0.8973168666608274, "grad_norm": 0.5185182849865196, "learning_rate": 5.7047850770478506e-06, "loss": 0.5797, "step": 30734 }, { "epoch": 0.8973460628886748, "grad_norm": 0.554679521100892, "learning_rate": 5.703163017031631e-06, "loss": 0.6471, "step": 30735 }, { "epoch": 0.8973752591165222, "grad_norm": 0.511137583434771, "learning_rate": 5.70154095701541e-06, "loss": 0.5733, "step": 30736 }, { "epoch": 0.8974044553443695, "grad_norm": 0.5409486613986858, "learning_rate": 5.699918896999189e-06, "loss": 0.6261, "step": 30737 }, { "epoch": 0.8974336515722169, "grad_norm": 0.5299814860812383, "learning_rate": 5.698296836982968e-06, "loss": 0.5743, "step": 30738 }, { "epoch": 0.8974628478000642, "grad_norm": 0.5271049920726699, "learning_rate": 5.696674776966748e-06, "loss": 0.5969, "step": 30739 }, { "epoch": 0.8974920440279116, "grad_norm": 0.5068851743321305, "learning_rate": 5.6950527169505276e-06, "loss": 0.5367, "step": 30740 }, { "epoch": 0.897521240255759, "grad_norm": 0.5136582906384185, "learning_rate": 5.693430656934307e-06, "loss": 0.5506, "step": 30741 }, { "epoch": 0.8975504364836063, "grad_norm": 0.5154812564695143, "learning_rate": 5.691808596918086e-06, "loss": 0.5603, "step": 30742 }, { "epoch": 0.8975796327114537, "grad_norm": 0.5384322186124689, "learning_rate": 5.690186536901865e-06, "loss": 0.5729, "step": 30743 }, { "epoch": 0.897608828939301, "grad_norm": 0.5180541759381228, "learning_rate": 5.688564476885645e-06, "loss": 0.5479, "step": 30744 }, { "epoch": 0.8976380251671484, "grad_norm": 0.52967835324471, "learning_rate": 5.6869424168694245e-06, "loss": 0.5903, "step": 30745 }, { "epoch": 0.8976672213949958, "grad_norm": 0.5246647168851731, "learning_rate": 5.685320356853204e-06, "loss": 0.5528, "step": 30746 }, { "epoch": 0.8976964176228431, "grad_norm": 0.528647753117374, "learning_rate": 5.683698296836983e-06, "loss": 0.5178, "step": 30747 }, { "epoch": 0.8977256138506905, "grad_norm": 0.5304641521675403, "learning_rate": 5.682076236820762e-06, "loss": 0.6004, "step": 30748 }, { "epoch": 0.8977548100785379, "grad_norm": 0.5317684120194918, "learning_rate": 5.680454176804542e-06, "loss": 0.5906, "step": 30749 }, { "epoch": 0.8977840063063852, "grad_norm": 0.49880831258568886, "learning_rate": 5.6788321167883215e-06, "loss": 0.5413, "step": 30750 }, { "epoch": 0.8978132025342326, "grad_norm": 0.563840341317418, "learning_rate": 5.677210056772101e-06, "loss": 0.6547, "step": 30751 }, { "epoch": 0.8978423987620799, "grad_norm": 0.5799451499063211, "learning_rate": 5.67558799675588e-06, "loss": 0.7245, "step": 30752 }, { "epoch": 0.8978715949899273, "grad_norm": 0.5610929303206227, "learning_rate": 5.67396593673966e-06, "loss": 0.5958, "step": 30753 }, { "epoch": 0.8979007912177747, "grad_norm": 0.5219919288136113, "learning_rate": 5.672343876723439e-06, "loss": 0.5406, "step": 30754 }, { "epoch": 0.897929987445622, "grad_norm": 0.5204503115956475, "learning_rate": 5.6707218167072184e-06, "loss": 0.5593, "step": 30755 }, { "epoch": 0.8979591836734694, "grad_norm": 0.5657337830017378, "learning_rate": 5.669099756690998e-06, "loss": 0.6573, "step": 30756 }, { "epoch": 0.8979883799013167, "grad_norm": 0.5708506716436981, "learning_rate": 5.667477696674777e-06, "loss": 0.6276, "step": 30757 }, { "epoch": 0.8980175761291641, "grad_norm": 0.5260008390259, "learning_rate": 5.665855636658557e-06, "loss": 0.585, "step": 30758 }, { "epoch": 0.8980467723570115, "grad_norm": 0.5588937946840877, "learning_rate": 5.664233576642336e-06, "loss": 0.6339, "step": 30759 }, { "epoch": 0.8980759685848588, "grad_norm": 0.5368970794355362, "learning_rate": 5.662611516626115e-06, "loss": 0.5994, "step": 30760 }, { "epoch": 0.8981051648127062, "grad_norm": 0.517512621248189, "learning_rate": 5.660989456609895e-06, "loss": 0.5211, "step": 30761 }, { "epoch": 0.8981343610405536, "grad_norm": 0.5003883879102741, "learning_rate": 5.659367396593674e-06, "loss": 0.5176, "step": 30762 }, { "epoch": 0.8981635572684009, "grad_norm": 0.5006058480537631, "learning_rate": 5.657745336577454e-06, "loss": 0.5298, "step": 30763 }, { "epoch": 0.8981927534962483, "grad_norm": 0.510431470551357, "learning_rate": 5.656123276561233e-06, "loss": 0.5668, "step": 30764 }, { "epoch": 0.8982219497240956, "grad_norm": 0.5041390424341261, "learning_rate": 5.654501216545012e-06, "loss": 0.5542, "step": 30765 }, { "epoch": 0.898251145951943, "grad_norm": 0.5118706937835231, "learning_rate": 5.6528791565287916e-06, "loss": 0.5547, "step": 30766 }, { "epoch": 0.8982803421797904, "grad_norm": 0.5482242831197541, "learning_rate": 5.651257096512571e-06, "loss": 0.6435, "step": 30767 }, { "epoch": 0.8983095384076377, "grad_norm": 0.5679648024269214, "learning_rate": 5.649635036496351e-06, "loss": 0.6614, "step": 30768 }, { "epoch": 0.8983387346354851, "grad_norm": 0.5601744497378578, "learning_rate": 5.64801297648013e-06, "loss": 0.6379, "step": 30769 }, { "epoch": 0.8983679308633324, "grad_norm": 0.5048210306327519, "learning_rate": 5.646390916463909e-06, "loss": 0.5268, "step": 30770 }, { "epoch": 0.8983971270911798, "grad_norm": 0.549698716105873, "learning_rate": 5.6447688564476885e-06, "loss": 0.6104, "step": 30771 }, { "epoch": 0.8984263233190272, "grad_norm": 0.5090972742166741, "learning_rate": 5.6431467964314686e-06, "loss": 0.5775, "step": 30772 }, { "epoch": 0.8984555195468745, "grad_norm": 0.5386389209301944, "learning_rate": 5.641524736415248e-06, "loss": 0.6003, "step": 30773 }, { "epoch": 0.8984847157747219, "grad_norm": 0.5648842179630261, "learning_rate": 5.639902676399027e-06, "loss": 0.5884, "step": 30774 }, { "epoch": 0.8985139120025692, "grad_norm": 0.5413604613569032, "learning_rate": 5.638280616382806e-06, "loss": 0.6132, "step": 30775 }, { "epoch": 0.8985431082304166, "grad_norm": 0.49684298409004746, "learning_rate": 5.6366585563665855e-06, "loss": 0.5424, "step": 30776 }, { "epoch": 0.898572304458264, "grad_norm": 0.5126169827044316, "learning_rate": 5.6350364963503655e-06, "loss": 0.5198, "step": 30777 }, { "epoch": 0.8986015006861113, "grad_norm": 0.51494818791142, "learning_rate": 5.633414436334145e-06, "loss": 0.5888, "step": 30778 }, { "epoch": 0.8986306969139587, "grad_norm": 0.539338019668787, "learning_rate": 5.631792376317924e-06, "loss": 0.614, "step": 30779 }, { "epoch": 0.898659893141806, "grad_norm": 0.5229739101503011, "learning_rate": 5.630170316301703e-06, "loss": 0.5807, "step": 30780 }, { "epoch": 0.8986890893696534, "grad_norm": 0.5227358801300055, "learning_rate": 5.628548256285482e-06, "loss": 0.5966, "step": 30781 }, { "epoch": 0.8987182855975008, "grad_norm": 0.5117562863879251, "learning_rate": 5.6269261962692625e-06, "loss": 0.5519, "step": 30782 }, { "epoch": 0.8987474818253481, "grad_norm": 0.5137237567127527, "learning_rate": 5.625304136253042e-06, "loss": 0.5306, "step": 30783 }, { "epoch": 0.8987766780531955, "grad_norm": 0.48612579987516025, "learning_rate": 5.623682076236821e-06, "loss": 0.528, "step": 30784 }, { "epoch": 0.8988058742810429, "grad_norm": 0.5494927816733688, "learning_rate": 5.6220600162206e-06, "loss": 0.5973, "step": 30785 }, { "epoch": 0.8988350705088902, "grad_norm": 0.5405128014668856, "learning_rate": 5.620437956204379e-06, "loss": 0.561, "step": 30786 }, { "epoch": 0.8988642667367376, "grad_norm": 0.5377092017148221, "learning_rate": 5.6188158961881594e-06, "loss": 0.609, "step": 30787 }, { "epoch": 0.8988934629645849, "grad_norm": 0.4482688790388077, "learning_rate": 5.617193836171939e-06, "loss": 0.444, "step": 30788 }, { "epoch": 0.8989226591924323, "grad_norm": 0.4952408378181968, "learning_rate": 5.615571776155718e-06, "loss": 0.5099, "step": 30789 }, { "epoch": 0.8989518554202797, "grad_norm": 0.590037584989551, "learning_rate": 5.613949716139497e-06, "loss": 0.6773, "step": 30790 }, { "epoch": 0.898981051648127, "grad_norm": 0.5256572439203879, "learning_rate": 5.612327656123277e-06, "loss": 0.602, "step": 30791 }, { "epoch": 0.8990102478759744, "grad_norm": 0.6231221121431564, "learning_rate": 5.610705596107056e-06, "loss": 0.7246, "step": 30792 }, { "epoch": 0.8990394441038217, "grad_norm": 0.49879843228385357, "learning_rate": 5.609083536090836e-06, "loss": 0.5018, "step": 30793 }, { "epoch": 0.8990686403316691, "grad_norm": 0.5193783011045207, "learning_rate": 5.607461476074615e-06, "loss": 0.5957, "step": 30794 }, { "epoch": 0.8990978365595165, "grad_norm": 0.4916714525208922, "learning_rate": 5.605839416058394e-06, "loss": 0.5019, "step": 30795 }, { "epoch": 0.8991270327873638, "grad_norm": 0.5365827759286712, "learning_rate": 5.604217356042174e-06, "loss": 0.6206, "step": 30796 }, { "epoch": 0.8991562290152112, "grad_norm": 0.5301346863091226, "learning_rate": 5.602595296025953e-06, "loss": 0.6053, "step": 30797 }, { "epoch": 0.8991854252430586, "grad_norm": 0.5661094281013807, "learning_rate": 5.6009732360097326e-06, "loss": 0.5694, "step": 30798 }, { "epoch": 0.8992146214709059, "grad_norm": 0.5296545507547226, "learning_rate": 5.599351175993512e-06, "loss": 0.5898, "step": 30799 }, { "epoch": 0.8992438176987533, "grad_norm": 0.5448215467621628, "learning_rate": 5.597729115977291e-06, "loss": 0.6248, "step": 30800 }, { "epoch": 0.8992730139266006, "grad_norm": 0.5660563668717215, "learning_rate": 5.596107055961071e-06, "loss": 0.6645, "step": 30801 }, { "epoch": 0.899302210154448, "grad_norm": 0.522067892340955, "learning_rate": 5.59448499594485e-06, "loss": 0.5491, "step": 30802 }, { "epoch": 0.8993314063822954, "grad_norm": 0.5540920115600854, "learning_rate": 5.5928629359286295e-06, "loss": 0.6006, "step": 30803 }, { "epoch": 0.8993606026101427, "grad_norm": 0.5847970016010926, "learning_rate": 5.591240875912409e-06, "loss": 0.6993, "step": 30804 }, { "epoch": 0.8993897988379901, "grad_norm": 0.5672764295225042, "learning_rate": 5.589618815896189e-06, "loss": 0.6346, "step": 30805 }, { "epoch": 0.8994189950658374, "grad_norm": 0.5448779400976895, "learning_rate": 5.587996755879968e-06, "loss": 0.5728, "step": 30806 }, { "epoch": 0.8994481912936848, "grad_norm": 0.5740054054521673, "learning_rate": 5.586374695863747e-06, "loss": 0.5776, "step": 30807 }, { "epoch": 0.8994773875215322, "grad_norm": 0.533409160710384, "learning_rate": 5.5847526358475265e-06, "loss": 0.6253, "step": 30808 }, { "epoch": 0.8995065837493795, "grad_norm": 0.5485276655835989, "learning_rate": 5.583130575831306e-06, "loss": 0.603, "step": 30809 }, { "epoch": 0.8995357799772269, "grad_norm": 0.5500362545879008, "learning_rate": 5.581508515815086e-06, "loss": 0.5975, "step": 30810 }, { "epoch": 0.8995649762050743, "grad_norm": 0.5176270670564325, "learning_rate": 5.579886455798865e-06, "loss": 0.5348, "step": 30811 }, { "epoch": 0.8995941724329216, "grad_norm": 0.5658359323396778, "learning_rate": 5.578264395782644e-06, "loss": 0.615, "step": 30812 }, { "epoch": 0.899623368660769, "grad_norm": 0.5232399943413663, "learning_rate": 5.576642335766423e-06, "loss": 0.5461, "step": 30813 }, { "epoch": 0.8996525648886164, "grad_norm": 0.530160303365887, "learning_rate": 5.575020275750203e-06, "loss": 0.5565, "step": 30814 }, { "epoch": 0.8996817611164638, "grad_norm": 0.568528395939671, "learning_rate": 5.573398215733983e-06, "loss": 0.6361, "step": 30815 }, { "epoch": 0.8997109573443112, "grad_norm": 0.5347614137732914, "learning_rate": 5.571776155717762e-06, "loss": 0.5878, "step": 30816 }, { "epoch": 0.8997401535721585, "grad_norm": 0.5293989600856979, "learning_rate": 5.570154095701541e-06, "loss": 0.5754, "step": 30817 }, { "epoch": 0.8997693498000059, "grad_norm": 0.534825492290067, "learning_rate": 5.56853203568532e-06, "loss": 0.5751, "step": 30818 }, { "epoch": 0.8997985460278533, "grad_norm": 0.5207082179345808, "learning_rate": 5.5669099756691e-06, "loss": 0.5371, "step": 30819 }, { "epoch": 0.8998277422557006, "grad_norm": 0.4880681698730198, "learning_rate": 5.56528791565288e-06, "loss": 0.4823, "step": 30820 }, { "epoch": 0.899856938483548, "grad_norm": 0.4895685235694285, "learning_rate": 5.563665855636659e-06, "loss": 0.4872, "step": 30821 }, { "epoch": 0.8998861347113953, "grad_norm": 0.5893233261165804, "learning_rate": 5.562043795620438e-06, "loss": 0.6322, "step": 30822 }, { "epoch": 0.8999153309392427, "grad_norm": 0.506771899998716, "learning_rate": 5.560421735604217e-06, "loss": 0.5868, "step": 30823 }, { "epoch": 0.8999445271670901, "grad_norm": 0.5224824663152557, "learning_rate": 5.558799675587997e-06, "loss": 0.586, "step": 30824 }, { "epoch": 0.8999737233949374, "grad_norm": 0.5566139259846697, "learning_rate": 5.557177615571777e-06, "loss": 0.5802, "step": 30825 }, { "epoch": 0.9000029196227848, "grad_norm": 0.5458242837299843, "learning_rate": 5.555555555555556e-06, "loss": 0.5937, "step": 30826 }, { "epoch": 0.9000321158506321, "grad_norm": 0.5605830601150348, "learning_rate": 5.553933495539335e-06, "loss": 0.6006, "step": 30827 }, { "epoch": 0.9000613120784795, "grad_norm": 0.5034398764579492, "learning_rate": 5.552311435523114e-06, "loss": 0.5126, "step": 30828 }, { "epoch": 0.9000905083063269, "grad_norm": 0.530379166326411, "learning_rate": 5.550689375506894e-06, "loss": 0.5701, "step": 30829 }, { "epoch": 0.9001197045341742, "grad_norm": 0.5356432396261848, "learning_rate": 5.5490673154906736e-06, "loss": 0.6265, "step": 30830 }, { "epoch": 0.9001489007620216, "grad_norm": 0.536877057722064, "learning_rate": 5.547445255474453e-06, "loss": 0.5995, "step": 30831 }, { "epoch": 0.900178096989869, "grad_norm": 0.5607264573174038, "learning_rate": 5.545823195458232e-06, "loss": 0.6445, "step": 30832 }, { "epoch": 0.9002072932177163, "grad_norm": 0.5636736989823612, "learning_rate": 5.544201135442011e-06, "loss": 0.6252, "step": 30833 }, { "epoch": 0.9002364894455637, "grad_norm": 0.5354846633972875, "learning_rate": 5.542579075425791e-06, "loss": 0.5829, "step": 30834 }, { "epoch": 0.900265685673411, "grad_norm": 0.5155109297553496, "learning_rate": 5.5409570154095705e-06, "loss": 0.5302, "step": 30835 }, { "epoch": 0.9002948819012584, "grad_norm": 0.5225759265919261, "learning_rate": 5.53933495539335e-06, "loss": 0.5908, "step": 30836 }, { "epoch": 0.9003240781291058, "grad_norm": 0.5508906678133141, "learning_rate": 5.537712895377129e-06, "loss": 0.6194, "step": 30837 }, { "epoch": 0.9003532743569531, "grad_norm": 0.48858886606656965, "learning_rate": 5.536090835360909e-06, "loss": 0.5377, "step": 30838 }, { "epoch": 0.9003824705848005, "grad_norm": 0.5304308753122264, "learning_rate": 5.534468775344688e-06, "loss": 0.6046, "step": 30839 }, { "epoch": 0.9004116668126478, "grad_norm": 0.5289178191803597, "learning_rate": 5.5328467153284675e-06, "loss": 0.549, "step": 30840 }, { "epoch": 0.9004408630404952, "grad_norm": 0.5178113752624633, "learning_rate": 5.531224655312247e-06, "loss": 0.5967, "step": 30841 }, { "epoch": 0.9004700592683426, "grad_norm": 0.5511902123978419, "learning_rate": 5.529602595296026e-06, "loss": 0.6094, "step": 30842 }, { "epoch": 0.9004992554961899, "grad_norm": 0.5242349697368185, "learning_rate": 5.527980535279806e-06, "loss": 0.5791, "step": 30843 }, { "epoch": 0.9005284517240373, "grad_norm": 0.533766103718234, "learning_rate": 5.526358475263585e-06, "loss": 0.5653, "step": 30844 }, { "epoch": 0.9005576479518846, "grad_norm": 0.5212205183355924, "learning_rate": 5.524736415247364e-06, "loss": 0.5346, "step": 30845 }, { "epoch": 0.900586844179732, "grad_norm": 0.5210585548585991, "learning_rate": 5.523114355231144e-06, "loss": 0.5631, "step": 30846 }, { "epoch": 0.9006160404075794, "grad_norm": 0.5331405071093199, "learning_rate": 5.521492295214923e-06, "loss": 0.598, "step": 30847 }, { "epoch": 0.9006452366354267, "grad_norm": 0.4856405655876352, "learning_rate": 5.519870235198703e-06, "loss": 0.4731, "step": 30848 }, { "epoch": 0.9006744328632741, "grad_norm": 0.5987261777251044, "learning_rate": 5.518248175182482e-06, "loss": 0.6641, "step": 30849 }, { "epoch": 0.9007036290911214, "grad_norm": 0.4908324913054601, "learning_rate": 5.516626115166261e-06, "loss": 0.5098, "step": 30850 }, { "epoch": 0.9007328253189688, "grad_norm": 0.5547467396572381, "learning_rate": 5.515004055150041e-06, "loss": 0.6702, "step": 30851 }, { "epoch": 0.9007620215468162, "grad_norm": 0.5550273403407018, "learning_rate": 5.51338199513382e-06, "loss": 0.5936, "step": 30852 }, { "epoch": 0.9007912177746635, "grad_norm": 0.5502479826663357, "learning_rate": 5.5117599351176e-06, "loss": 0.6308, "step": 30853 }, { "epoch": 0.9008204140025109, "grad_norm": 0.5010249789307298, "learning_rate": 5.510137875101379e-06, "loss": 0.5486, "step": 30854 }, { "epoch": 0.9008496102303583, "grad_norm": 0.5043302925560788, "learning_rate": 5.508515815085158e-06, "loss": 0.5407, "step": 30855 }, { "epoch": 0.9008788064582056, "grad_norm": 0.5229467726763117, "learning_rate": 5.5068937550689375e-06, "loss": 0.589, "step": 30856 }, { "epoch": 0.900908002686053, "grad_norm": 0.5132577593333946, "learning_rate": 5.505271695052718e-06, "loss": 0.5767, "step": 30857 }, { "epoch": 0.9009371989139003, "grad_norm": 0.5251869265752123, "learning_rate": 5.503649635036497e-06, "loss": 0.5754, "step": 30858 }, { "epoch": 0.9009663951417477, "grad_norm": 0.5427643732776236, "learning_rate": 5.502027575020276e-06, "loss": 0.5926, "step": 30859 }, { "epoch": 0.9009955913695951, "grad_norm": 0.5246272821782745, "learning_rate": 5.500405515004055e-06, "loss": 0.6009, "step": 30860 }, { "epoch": 0.9010247875974424, "grad_norm": 0.552350682994837, "learning_rate": 5.4987834549878345e-06, "loss": 0.6149, "step": 30861 }, { "epoch": 0.9010539838252898, "grad_norm": 0.4887967832794569, "learning_rate": 5.4971613949716146e-06, "loss": 0.4967, "step": 30862 }, { "epoch": 0.9010831800531371, "grad_norm": 0.5402725253047761, "learning_rate": 5.495539334955394e-06, "loss": 0.5964, "step": 30863 }, { "epoch": 0.9011123762809845, "grad_norm": 0.5387194933019539, "learning_rate": 5.493917274939173e-06, "loss": 0.5938, "step": 30864 }, { "epoch": 0.9011415725088319, "grad_norm": 0.5157509708353044, "learning_rate": 5.492295214922952e-06, "loss": 0.5843, "step": 30865 }, { "epoch": 0.9011707687366792, "grad_norm": 0.5245421824361687, "learning_rate": 5.4906731549067314e-06, "loss": 0.6083, "step": 30866 }, { "epoch": 0.9011999649645266, "grad_norm": 0.5472158995120642, "learning_rate": 5.4890510948905115e-06, "loss": 0.6051, "step": 30867 }, { "epoch": 0.901229161192374, "grad_norm": 0.5369196914651854, "learning_rate": 5.487429034874291e-06, "loss": 0.6092, "step": 30868 }, { "epoch": 0.9012583574202213, "grad_norm": 0.5544798495335099, "learning_rate": 5.48580697485807e-06, "loss": 0.6207, "step": 30869 }, { "epoch": 0.9012875536480687, "grad_norm": 0.5142389538976498, "learning_rate": 5.484184914841849e-06, "loss": 0.5332, "step": 30870 }, { "epoch": 0.901316749875916, "grad_norm": 0.5309091399068676, "learning_rate": 5.482562854825629e-06, "loss": 0.5664, "step": 30871 }, { "epoch": 0.9013459461037634, "grad_norm": 0.503089959197021, "learning_rate": 5.4809407948094085e-06, "loss": 0.5249, "step": 30872 }, { "epoch": 0.9013751423316108, "grad_norm": 0.5405151438128473, "learning_rate": 5.479318734793188e-06, "loss": 0.6384, "step": 30873 }, { "epoch": 0.9014043385594581, "grad_norm": 0.5502964846908813, "learning_rate": 5.477696674776967e-06, "loss": 0.5742, "step": 30874 }, { "epoch": 0.9014335347873055, "grad_norm": 0.5708860737927124, "learning_rate": 5.476074614760746e-06, "loss": 0.6181, "step": 30875 }, { "epoch": 0.9014627310151528, "grad_norm": 0.5067218566174598, "learning_rate": 5.474452554744526e-06, "loss": 0.5375, "step": 30876 }, { "epoch": 0.9014919272430002, "grad_norm": 0.515323400221513, "learning_rate": 5.472830494728305e-06, "loss": 0.5472, "step": 30877 }, { "epoch": 0.9015211234708476, "grad_norm": 0.48954904121654513, "learning_rate": 5.471208434712085e-06, "loss": 0.5191, "step": 30878 }, { "epoch": 0.9015503196986949, "grad_norm": 0.49415989053436166, "learning_rate": 5.469586374695864e-06, "loss": 0.4859, "step": 30879 }, { "epoch": 0.9015795159265423, "grad_norm": 0.5214775083223389, "learning_rate": 5.467964314679643e-06, "loss": 0.5645, "step": 30880 }, { "epoch": 0.9016087121543896, "grad_norm": 0.49883449129743396, "learning_rate": 5.466342254663423e-06, "loss": 0.5454, "step": 30881 }, { "epoch": 0.901637908382237, "grad_norm": 0.4942734699141236, "learning_rate": 5.464720194647202e-06, "loss": 0.5575, "step": 30882 }, { "epoch": 0.9016671046100844, "grad_norm": 0.539310829610124, "learning_rate": 5.463098134630982e-06, "loss": 0.6114, "step": 30883 }, { "epoch": 0.9016963008379317, "grad_norm": 0.5044864356600975, "learning_rate": 5.461476074614761e-06, "loss": 0.5422, "step": 30884 }, { "epoch": 0.9017254970657791, "grad_norm": 0.5548062143305573, "learning_rate": 5.45985401459854e-06, "loss": 0.6371, "step": 30885 }, { "epoch": 0.9017546932936265, "grad_norm": 0.5223269411203952, "learning_rate": 5.45823195458232e-06, "loss": 0.549, "step": 30886 }, { "epoch": 0.9017838895214738, "grad_norm": 0.5326763108131342, "learning_rate": 5.456609894566099e-06, "loss": 0.6088, "step": 30887 }, { "epoch": 0.9018130857493212, "grad_norm": 0.5174539243469223, "learning_rate": 5.4549878345498785e-06, "loss": 0.5533, "step": 30888 }, { "epoch": 0.9018422819771685, "grad_norm": 0.5459367917621032, "learning_rate": 5.453365774533658e-06, "loss": 0.6111, "step": 30889 }, { "epoch": 0.9018714782050159, "grad_norm": 0.5386129751501969, "learning_rate": 5.451743714517438e-06, "loss": 0.6116, "step": 30890 }, { "epoch": 0.9019006744328633, "grad_norm": 0.5560500055957217, "learning_rate": 5.450121654501217e-06, "loss": 0.6333, "step": 30891 }, { "epoch": 0.9019298706607106, "grad_norm": 0.5543523931608371, "learning_rate": 5.448499594484996e-06, "loss": 0.6308, "step": 30892 }, { "epoch": 0.901959066888558, "grad_norm": 0.4926066618787705, "learning_rate": 5.4468775344687755e-06, "loss": 0.4973, "step": 30893 }, { "epoch": 0.9019882631164053, "grad_norm": 0.5389156741222676, "learning_rate": 5.445255474452555e-06, "loss": 0.5818, "step": 30894 }, { "epoch": 0.9020174593442527, "grad_norm": 0.5334467687623141, "learning_rate": 5.443633414436335e-06, "loss": 0.5826, "step": 30895 }, { "epoch": 0.9020466555721001, "grad_norm": 0.4882610147414063, "learning_rate": 5.442011354420114e-06, "loss": 0.519, "step": 30896 }, { "epoch": 0.9020758517999474, "grad_norm": 0.5047625615081698, "learning_rate": 5.440389294403893e-06, "loss": 0.5474, "step": 30897 }, { "epoch": 0.9021050480277948, "grad_norm": 0.4812664619144281, "learning_rate": 5.4387672343876724e-06, "loss": 0.4955, "step": 30898 }, { "epoch": 0.9021342442556421, "grad_norm": 0.5556463933294465, "learning_rate": 5.437145174371452e-06, "loss": 0.6277, "step": 30899 }, { "epoch": 0.9021634404834895, "grad_norm": 0.5515169678150631, "learning_rate": 5.435523114355232e-06, "loss": 0.6151, "step": 30900 }, { "epoch": 0.9021926367113369, "grad_norm": 0.5544251513755695, "learning_rate": 5.433901054339011e-06, "loss": 0.66, "step": 30901 }, { "epoch": 0.9022218329391842, "grad_norm": 0.5249968529809051, "learning_rate": 5.43227899432279e-06, "loss": 0.5849, "step": 30902 }, { "epoch": 0.9022510291670316, "grad_norm": 0.5224402354105385, "learning_rate": 5.430656934306569e-06, "loss": 0.5532, "step": 30903 }, { "epoch": 0.902280225394879, "grad_norm": 0.5106411032782604, "learning_rate": 5.4290348742903495e-06, "loss": 0.5889, "step": 30904 }, { "epoch": 0.9023094216227263, "grad_norm": 0.4994704527177203, "learning_rate": 5.427412814274129e-06, "loss": 0.532, "step": 30905 }, { "epoch": 0.9023386178505737, "grad_norm": 0.4869760659737133, "learning_rate": 5.425790754257908e-06, "loss": 0.4829, "step": 30906 }, { "epoch": 0.902367814078421, "grad_norm": 0.555300625307516, "learning_rate": 5.424168694241687e-06, "loss": 0.6167, "step": 30907 }, { "epoch": 0.9023970103062684, "grad_norm": 0.5354466315959617, "learning_rate": 5.422546634225466e-06, "loss": 0.6305, "step": 30908 }, { "epoch": 0.9024262065341158, "grad_norm": 0.5262367269974826, "learning_rate": 5.420924574209246e-06, "loss": 0.6022, "step": 30909 }, { "epoch": 0.9024554027619631, "grad_norm": 0.5074435886777691, "learning_rate": 5.419302514193026e-06, "loss": 0.5652, "step": 30910 }, { "epoch": 0.9024845989898105, "grad_norm": 0.5708591012932999, "learning_rate": 5.417680454176805e-06, "loss": 0.6732, "step": 30911 }, { "epoch": 0.9025137952176578, "grad_norm": 0.5207850835467371, "learning_rate": 5.416058394160584e-06, "loss": 0.5379, "step": 30912 }, { "epoch": 0.9025429914455052, "grad_norm": 0.5047632529036964, "learning_rate": 5.414436334144363e-06, "loss": 0.5371, "step": 30913 }, { "epoch": 0.9025721876733526, "grad_norm": 0.5287164550388144, "learning_rate": 5.412814274128143e-06, "loss": 0.573, "step": 30914 }, { "epoch": 0.9026013839011999, "grad_norm": 0.5502199088714812, "learning_rate": 5.411192214111923e-06, "loss": 0.5401, "step": 30915 }, { "epoch": 0.9026305801290473, "grad_norm": 0.5198499636741044, "learning_rate": 5.409570154095702e-06, "loss": 0.5594, "step": 30916 }, { "epoch": 0.9026597763568946, "grad_norm": 0.5135976273233551, "learning_rate": 5.407948094079481e-06, "loss": 0.5679, "step": 30917 }, { "epoch": 0.902688972584742, "grad_norm": 0.5078293921001298, "learning_rate": 5.40632603406326e-06, "loss": 0.5425, "step": 30918 }, { "epoch": 0.9027181688125894, "grad_norm": 0.5230518046691833, "learning_rate": 5.40470397404704e-06, "loss": 0.5356, "step": 30919 }, { "epoch": 0.9027473650404367, "grad_norm": 0.5052298647386103, "learning_rate": 5.4030819140308195e-06, "loss": 0.5372, "step": 30920 }, { "epoch": 0.9027765612682841, "grad_norm": 0.5849884774588726, "learning_rate": 5.401459854014599e-06, "loss": 0.6724, "step": 30921 }, { "epoch": 0.9028057574961315, "grad_norm": 0.5228177829443722, "learning_rate": 5.399837793998378e-06, "loss": 0.5147, "step": 30922 }, { "epoch": 0.9028349537239788, "grad_norm": 0.5138171093750824, "learning_rate": 5.398215733982158e-06, "loss": 0.5303, "step": 30923 }, { "epoch": 0.9028641499518262, "grad_norm": 0.5243217226614455, "learning_rate": 5.396593673965937e-06, "loss": 0.5804, "step": 30924 }, { "epoch": 0.9028933461796735, "grad_norm": 0.5803228297139619, "learning_rate": 5.3949716139497165e-06, "loss": 0.6601, "step": 30925 }, { "epoch": 0.9029225424075209, "grad_norm": 0.5308432506674379, "learning_rate": 5.393349553933496e-06, "loss": 0.6018, "step": 30926 }, { "epoch": 0.9029517386353683, "grad_norm": 0.5071564203674748, "learning_rate": 5.391727493917275e-06, "loss": 0.4929, "step": 30927 }, { "epoch": 0.9029809348632156, "grad_norm": 0.4826389821172432, "learning_rate": 5.390105433901055e-06, "loss": 0.5023, "step": 30928 }, { "epoch": 0.903010131091063, "grad_norm": 0.5288967375494255, "learning_rate": 5.388483373884834e-06, "loss": 0.5643, "step": 30929 }, { "epoch": 0.9030393273189103, "grad_norm": 0.4873940364624682, "learning_rate": 5.3868613138686134e-06, "loss": 0.5016, "step": 30930 }, { "epoch": 0.9030685235467577, "grad_norm": 0.5433702106520144, "learning_rate": 5.385239253852393e-06, "loss": 0.6721, "step": 30931 }, { "epoch": 0.9030977197746051, "grad_norm": 0.5267327108686095, "learning_rate": 5.383617193836172e-06, "loss": 0.5793, "step": 30932 }, { "epoch": 0.9031269160024524, "grad_norm": 0.4869700861252381, "learning_rate": 5.381995133819952e-06, "loss": 0.5361, "step": 30933 }, { "epoch": 0.9031561122302998, "grad_norm": 0.5270902009965618, "learning_rate": 5.380373073803731e-06, "loss": 0.5563, "step": 30934 }, { "epoch": 0.9031853084581473, "grad_norm": 0.5307996842916116, "learning_rate": 5.37875101378751e-06, "loss": 0.5436, "step": 30935 }, { "epoch": 0.9032145046859946, "grad_norm": 0.49021861707404646, "learning_rate": 5.37712895377129e-06, "loss": 0.4897, "step": 30936 }, { "epoch": 0.903243700913842, "grad_norm": 0.552372850809355, "learning_rate": 5.375506893755069e-06, "loss": 0.6048, "step": 30937 }, { "epoch": 0.9032728971416893, "grad_norm": 0.537381205819892, "learning_rate": 5.373884833738849e-06, "loss": 0.5815, "step": 30938 }, { "epoch": 0.9033020933695367, "grad_norm": 0.5177222193747355, "learning_rate": 5.372262773722628e-06, "loss": 0.5254, "step": 30939 }, { "epoch": 0.9033312895973841, "grad_norm": 0.5174365704670771, "learning_rate": 5.370640713706407e-06, "loss": 0.5476, "step": 30940 }, { "epoch": 0.9033604858252314, "grad_norm": 0.5174227714702138, "learning_rate": 5.3690186536901866e-06, "loss": 0.5547, "step": 30941 }, { "epoch": 0.9033896820530788, "grad_norm": 0.5497984591502799, "learning_rate": 5.367396593673967e-06, "loss": 0.607, "step": 30942 }, { "epoch": 0.9034188782809262, "grad_norm": 0.5488147667329625, "learning_rate": 5.365774533657746e-06, "loss": 0.5989, "step": 30943 }, { "epoch": 0.9034480745087735, "grad_norm": 0.5009596380318422, "learning_rate": 5.364152473641525e-06, "loss": 0.5695, "step": 30944 }, { "epoch": 0.9034772707366209, "grad_norm": 0.5286133291187831, "learning_rate": 5.362530413625304e-06, "loss": 0.5694, "step": 30945 }, { "epoch": 0.9035064669644682, "grad_norm": 0.4898912288820527, "learning_rate": 5.3609083536090835e-06, "loss": 0.5238, "step": 30946 }, { "epoch": 0.9035356631923156, "grad_norm": 0.5201383462874082, "learning_rate": 5.359286293592864e-06, "loss": 0.5465, "step": 30947 }, { "epoch": 0.903564859420163, "grad_norm": 0.509129643025925, "learning_rate": 5.357664233576643e-06, "loss": 0.5868, "step": 30948 }, { "epoch": 0.9035940556480103, "grad_norm": 0.5227131876262234, "learning_rate": 5.356042173560422e-06, "loss": 0.5639, "step": 30949 }, { "epoch": 0.9036232518758577, "grad_norm": 0.5552843445261784, "learning_rate": 5.354420113544201e-06, "loss": 0.647, "step": 30950 }, { "epoch": 0.903652448103705, "grad_norm": 0.5038331909966938, "learning_rate": 5.3527980535279805e-06, "loss": 0.5605, "step": 30951 }, { "epoch": 0.9036816443315524, "grad_norm": 0.5259591494390906, "learning_rate": 5.3511759935117605e-06, "loss": 0.5234, "step": 30952 }, { "epoch": 0.9037108405593998, "grad_norm": 0.56557400771074, "learning_rate": 5.34955393349554e-06, "loss": 0.6607, "step": 30953 }, { "epoch": 0.9037400367872471, "grad_norm": 0.5248331021443936, "learning_rate": 5.347931873479319e-06, "loss": 0.5812, "step": 30954 }, { "epoch": 0.9037692330150945, "grad_norm": 0.508276579656453, "learning_rate": 5.346309813463098e-06, "loss": 0.5796, "step": 30955 }, { "epoch": 0.9037984292429418, "grad_norm": 0.5119529048886248, "learning_rate": 5.344687753446878e-06, "loss": 0.5933, "step": 30956 }, { "epoch": 0.9038276254707892, "grad_norm": 0.5130002015488433, "learning_rate": 5.3430656934306575e-06, "loss": 0.5524, "step": 30957 }, { "epoch": 0.9038568216986366, "grad_norm": 0.5202485690112508, "learning_rate": 5.341443633414437e-06, "loss": 0.5772, "step": 30958 }, { "epoch": 0.9038860179264839, "grad_norm": 0.5143477282297251, "learning_rate": 5.339821573398216e-06, "loss": 0.5521, "step": 30959 }, { "epoch": 0.9039152141543313, "grad_norm": 0.5445643662328221, "learning_rate": 5.338199513381995e-06, "loss": 0.6339, "step": 30960 }, { "epoch": 0.9039444103821787, "grad_norm": 0.48888950950189486, "learning_rate": 5.336577453365775e-06, "loss": 0.5014, "step": 30961 }, { "epoch": 0.903973606610026, "grad_norm": 0.5927554394108193, "learning_rate": 5.3349553933495544e-06, "loss": 0.6326, "step": 30962 }, { "epoch": 0.9040028028378734, "grad_norm": 0.5273495812737209, "learning_rate": 5.333333333333334e-06, "loss": 0.5611, "step": 30963 }, { "epoch": 0.9040319990657207, "grad_norm": 0.5150178761562184, "learning_rate": 5.331711273317113e-06, "loss": 0.5695, "step": 30964 }, { "epoch": 0.9040611952935681, "grad_norm": 0.5256866553887924, "learning_rate": 5.330089213300892e-06, "loss": 0.5699, "step": 30965 }, { "epoch": 0.9040903915214155, "grad_norm": 0.5121105985202021, "learning_rate": 5.328467153284672e-06, "loss": 0.4945, "step": 30966 }, { "epoch": 0.9041195877492628, "grad_norm": 0.5725056167471596, "learning_rate": 5.326845093268451e-06, "loss": 0.6363, "step": 30967 }, { "epoch": 0.9041487839771102, "grad_norm": 0.5071722039246443, "learning_rate": 5.325223033252231e-06, "loss": 0.574, "step": 30968 }, { "epoch": 0.9041779802049575, "grad_norm": 0.5325003238545781, "learning_rate": 5.32360097323601e-06, "loss": 0.5923, "step": 30969 }, { "epoch": 0.9042071764328049, "grad_norm": 0.5040019587970737, "learning_rate": 5.321978913219789e-06, "loss": 0.5368, "step": 30970 }, { "epoch": 0.9042363726606523, "grad_norm": 0.49828020062798506, "learning_rate": 5.320356853203569e-06, "loss": 0.5193, "step": 30971 }, { "epoch": 0.9042655688884996, "grad_norm": 0.5441827761858049, "learning_rate": 5.318734793187348e-06, "loss": 0.6167, "step": 30972 }, { "epoch": 0.904294765116347, "grad_norm": 0.565693294815579, "learning_rate": 5.3171127331711276e-06, "loss": 0.6524, "step": 30973 }, { "epoch": 0.9043239613441943, "grad_norm": 0.5267203096622518, "learning_rate": 5.315490673154907e-06, "loss": 0.5818, "step": 30974 }, { "epoch": 0.9043531575720417, "grad_norm": 0.5630116180385276, "learning_rate": 5.313868613138687e-06, "loss": 0.6469, "step": 30975 }, { "epoch": 0.9043823537998891, "grad_norm": 0.548635129656118, "learning_rate": 5.312246553122466e-06, "loss": 0.5578, "step": 30976 }, { "epoch": 0.9044115500277364, "grad_norm": 0.49817745809827485, "learning_rate": 5.310624493106245e-06, "loss": 0.5483, "step": 30977 }, { "epoch": 0.9044407462555838, "grad_norm": 0.559315862351836, "learning_rate": 5.3090024330900245e-06, "loss": 0.6032, "step": 30978 }, { "epoch": 0.9044699424834312, "grad_norm": 0.5552114702109343, "learning_rate": 5.307380373073804e-06, "loss": 0.6496, "step": 30979 }, { "epoch": 0.9044991387112785, "grad_norm": 0.5533351904708905, "learning_rate": 5.305758313057584e-06, "loss": 0.6447, "step": 30980 }, { "epoch": 0.9045283349391259, "grad_norm": 0.551960544098167, "learning_rate": 5.304136253041363e-06, "loss": 0.619, "step": 30981 }, { "epoch": 0.9045575311669732, "grad_norm": 0.518551391213655, "learning_rate": 5.302514193025142e-06, "loss": 0.5891, "step": 30982 }, { "epoch": 0.9045867273948206, "grad_norm": 0.5248637560330445, "learning_rate": 5.3008921330089215e-06, "loss": 0.5547, "step": 30983 }, { "epoch": 0.904615923622668, "grad_norm": 0.5097064652814615, "learning_rate": 5.299270072992701e-06, "loss": 0.5399, "step": 30984 }, { "epoch": 0.9046451198505153, "grad_norm": 0.5469387941177498, "learning_rate": 5.297648012976481e-06, "loss": 0.6128, "step": 30985 }, { "epoch": 0.9046743160783627, "grad_norm": 0.5442789878217531, "learning_rate": 5.29602595296026e-06, "loss": 0.5735, "step": 30986 }, { "epoch": 0.90470351230621, "grad_norm": 0.4905696592078078, "learning_rate": 5.294403892944039e-06, "loss": 0.5251, "step": 30987 }, { "epoch": 0.9047327085340574, "grad_norm": 0.537866076994446, "learning_rate": 5.292781832927818e-06, "loss": 0.5825, "step": 30988 }, { "epoch": 0.9047619047619048, "grad_norm": 0.5208413708959486, "learning_rate": 5.2911597729115985e-06, "loss": 0.5389, "step": 30989 }, { "epoch": 0.9047911009897521, "grad_norm": 0.5446299969335578, "learning_rate": 5.289537712895378e-06, "loss": 0.6333, "step": 30990 }, { "epoch": 0.9048202972175995, "grad_norm": 0.5953705044023717, "learning_rate": 5.287915652879157e-06, "loss": 0.6846, "step": 30991 }, { "epoch": 0.9048494934454469, "grad_norm": 0.5254897739234354, "learning_rate": 5.286293592862936e-06, "loss": 0.5379, "step": 30992 }, { "epoch": 0.9048786896732942, "grad_norm": 0.5277303814372318, "learning_rate": 5.284671532846715e-06, "loss": 0.543, "step": 30993 }, { "epoch": 0.9049078859011416, "grad_norm": 0.4730740253682771, "learning_rate": 5.2830494728304954e-06, "loss": 0.4692, "step": 30994 }, { "epoch": 0.9049370821289889, "grad_norm": 0.5435399670897869, "learning_rate": 5.281427412814275e-06, "loss": 0.5916, "step": 30995 }, { "epoch": 0.9049662783568363, "grad_norm": 0.5501662608411214, "learning_rate": 5.279805352798053e-06, "loss": 0.5895, "step": 30996 }, { "epoch": 0.9049954745846837, "grad_norm": 0.5346078181880121, "learning_rate": 5.278183292781833e-06, "loss": 0.5751, "step": 30997 }, { "epoch": 0.905024670812531, "grad_norm": 0.5174940887398257, "learning_rate": 5.276561232765612e-06, "loss": 0.5679, "step": 30998 }, { "epoch": 0.9050538670403784, "grad_norm": 0.49809330147544917, "learning_rate": 5.274939172749392e-06, "loss": 0.5354, "step": 30999 }, { "epoch": 0.9050830632682257, "grad_norm": 0.5558091437836991, "learning_rate": 5.273317112733172e-06, "loss": 0.6249, "step": 31000 }, { "epoch": 0.9051122594960731, "grad_norm": 0.5859612887645999, "learning_rate": 5.271695052716951e-06, "loss": 0.6069, "step": 31001 }, { "epoch": 0.9051414557239205, "grad_norm": 0.538709461897346, "learning_rate": 5.27007299270073e-06, "loss": 0.6119, "step": 31002 }, { "epoch": 0.9051706519517678, "grad_norm": 0.5360189753899954, "learning_rate": 5.268450932684509e-06, "loss": 0.5827, "step": 31003 }, { "epoch": 0.9051998481796152, "grad_norm": 0.5521790896905693, "learning_rate": 5.266828872668289e-06, "loss": 0.6255, "step": 31004 }, { "epoch": 0.9052290444074625, "grad_norm": 0.5118608162574255, "learning_rate": 5.2652068126520686e-06, "loss": 0.5843, "step": 31005 }, { "epoch": 0.9052582406353099, "grad_norm": 0.5299600583122168, "learning_rate": 5.263584752635848e-06, "loss": 0.6313, "step": 31006 }, { "epoch": 0.9052874368631573, "grad_norm": 0.5485723858699499, "learning_rate": 5.261962692619627e-06, "loss": 0.5857, "step": 31007 }, { "epoch": 0.9053166330910046, "grad_norm": 0.5184124380161826, "learning_rate": 5.260340632603407e-06, "loss": 0.5493, "step": 31008 }, { "epoch": 0.905345829318852, "grad_norm": 0.5142680498937129, "learning_rate": 5.258718572587186e-06, "loss": 0.5727, "step": 31009 }, { "epoch": 0.9053750255466994, "grad_norm": 0.5763984881044419, "learning_rate": 5.257096512570965e-06, "loss": 0.615, "step": 31010 }, { "epoch": 0.9054042217745467, "grad_norm": 0.5607860198077161, "learning_rate": 5.255474452554745e-06, "loss": 0.5885, "step": 31011 }, { "epoch": 0.9054334180023941, "grad_norm": 0.5702829751996104, "learning_rate": 5.253852392538524e-06, "loss": 0.6558, "step": 31012 }, { "epoch": 0.9054626142302414, "grad_norm": 0.5176099540734064, "learning_rate": 5.252230332522304e-06, "loss": 0.5485, "step": 31013 }, { "epoch": 0.9054918104580888, "grad_norm": 0.5743738804061326, "learning_rate": 5.250608272506083e-06, "loss": 0.649, "step": 31014 }, { "epoch": 0.9055210066859362, "grad_norm": 0.5570392620614514, "learning_rate": 5.2489862124898625e-06, "loss": 0.6724, "step": 31015 }, { "epoch": 0.9055502029137835, "grad_norm": 0.5094545390554747, "learning_rate": 5.247364152473642e-06, "loss": 0.5582, "step": 31016 }, { "epoch": 0.9055793991416309, "grad_norm": 0.5235144339302897, "learning_rate": 5.245742092457421e-06, "loss": 0.5802, "step": 31017 }, { "epoch": 0.9056085953694782, "grad_norm": 0.5256679660115179, "learning_rate": 5.244120032441201e-06, "loss": 0.572, "step": 31018 }, { "epoch": 0.9056377915973256, "grad_norm": 0.5439474535338319, "learning_rate": 5.24249797242498e-06, "loss": 0.5974, "step": 31019 }, { "epoch": 0.905666987825173, "grad_norm": 0.493682403625726, "learning_rate": 5.240875912408759e-06, "loss": 0.4923, "step": 31020 }, { "epoch": 0.9056961840530203, "grad_norm": 0.5347650673797529, "learning_rate": 5.239253852392539e-06, "loss": 0.5953, "step": 31021 }, { "epoch": 0.9057253802808677, "grad_norm": 0.5576537300738079, "learning_rate": 5.237631792376319e-06, "loss": 0.6524, "step": 31022 }, { "epoch": 0.905754576508715, "grad_norm": 0.5459940904615503, "learning_rate": 5.236009732360098e-06, "loss": 0.63, "step": 31023 }, { "epoch": 0.9057837727365624, "grad_norm": 0.4927713175699445, "learning_rate": 5.234387672343877e-06, "loss": 0.5485, "step": 31024 }, { "epoch": 0.9058129689644098, "grad_norm": 0.5750301866774065, "learning_rate": 5.232765612327656e-06, "loss": 0.708, "step": 31025 }, { "epoch": 0.9058421651922571, "grad_norm": 0.4927842964966022, "learning_rate": 5.231143552311436e-06, "loss": 0.5065, "step": 31026 }, { "epoch": 0.9058713614201045, "grad_norm": 0.4777420379565805, "learning_rate": 5.229521492295216e-06, "loss": 0.5123, "step": 31027 }, { "epoch": 0.9059005576479519, "grad_norm": 0.5273903025455811, "learning_rate": 5.227899432278995e-06, "loss": 0.5843, "step": 31028 }, { "epoch": 0.9059297538757992, "grad_norm": 0.5509493439495299, "learning_rate": 5.226277372262773e-06, "loss": 0.5792, "step": 31029 }, { "epoch": 0.9059589501036466, "grad_norm": 0.5442471268795775, "learning_rate": 5.224655312246553e-06, "loss": 0.6473, "step": 31030 }, { "epoch": 0.9059881463314939, "grad_norm": 0.535484817062813, "learning_rate": 5.2230332522303325e-06, "loss": 0.6239, "step": 31031 }, { "epoch": 0.9060173425593413, "grad_norm": 0.5263917279343702, "learning_rate": 5.221411192214113e-06, "loss": 0.6201, "step": 31032 }, { "epoch": 0.9060465387871887, "grad_norm": 0.5170381341928054, "learning_rate": 5.219789132197892e-06, "loss": 0.5762, "step": 31033 }, { "epoch": 0.906075735015036, "grad_norm": 0.5228576393940989, "learning_rate": 5.218167072181671e-06, "loss": 0.6157, "step": 31034 }, { "epoch": 0.9061049312428834, "grad_norm": 0.546837453596286, "learning_rate": 5.21654501216545e-06, "loss": 0.5894, "step": 31035 }, { "epoch": 0.9061341274707307, "grad_norm": 0.5014888304270478, "learning_rate": 5.2149229521492295e-06, "loss": 0.5007, "step": 31036 }, { "epoch": 0.9061633236985781, "grad_norm": 0.5109280365826095, "learning_rate": 5.2133008921330096e-06, "loss": 0.5768, "step": 31037 }, { "epoch": 0.9061925199264255, "grad_norm": 0.5044731109566862, "learning_rate": 5.211678832116789e-06, "loss": 0.5275, "step": 31038 }, { "epoch": 0.9062217161542728, "grad_norm": 0.5314521285609938, "learning_rate": 5.210056772100568e-06, "loss": 0.5874, "step": 31039 }, { "epoch": 0.9062509123821202, "grad_norm": 0.5469650058009062, "learning_rate": 5.208434712084347e-06, "loss": 0.6345, "step": 31040 }, { "epoch": 0.9062801086099675, "grad_norm": 0.5379041624648567, "learning_rate": 5.206812652068127e-06, "loss": 0.6099, "step": 31041 }, { "epoch": 0.9063093048378149, "grad_norm": 0.48084426210890896, "learning_rate": 5.2051905920519065e-06, "loss": 0.5317, "step": 31042 }, { "epoch": 0.9063385010656623, "grad_norm": 0.5237406059058038, "learning_rate": 5.203568532035685e-06, "loss": 0.5436, "step": 31043 }, { "epoch": 0.9063676972935096, "grad_norm": 0.4832940216362824, "learning_rate": 5.201946472019465e-06, "loss": 0.5006, "step": 31044 }, { "epoch": 0.906396893521357, "grad_norm": 0.4907116106025984, "learning_rate": 5.200324412003244e-06, "loss": 0.4871, "step": 31045 }, { "epoch": 0.9064260897492044, "grad_norm": 0.5093751616154936, "learning_rate": 5.198702351987024e-06, "loss": 0.5564, "step": 31046 }, { "epoch": 0.9064552859770517, "grad_norm": 0.531556661125916, "learning_rate": 5.1970802919708035e-06, "loss": 0.5786, "step": 31047 }, { "epoch": 0.9064844822048991, "grad_norm": 0.4938764888636803, "learning_rate": 5.195458231954583e-06, "loss": 0.527, "step": 31048 }, { "epoch": 0.9065136784327464, "grad_norm": 0.5475971842684817, "learning_rate": 5.193836171938362e-06, "loss": 0.5965, "step": 31049 }, { "epoch": 0.9065428746605938, "grad_norm": 0.5195880165951297, "learning_rate": 5.192214111922141e-06, "loss": 0.5594, "step": 31050 }, { "epoch": 0.9065720708884412, "grad_norm": 0.5124724206996546, "learning_rate": 5.190592051905921e-06, "loss": 0.5757, "step": 31051 }, { "epoch": 0.9066012671162885, "grad_norm": 0.519503892380261, "learning_rate": 5.1889699918897e-06, "loss": 0.5663, "step": 31052 }, { "epoch": 0.9066304633441359, "grad_norm": 0.5456790516268356, "learning_rate": 5.18734793187348e-06, "loss": 0.6274, "step": 31053 }, { "epoch": 0.9066596595719832, "grad_norm": 0.4924765113287789, "learning_rate": 5.185725871857259e-06, "loss": 0.5073, "step": 31054 }, { "epoch": 0.9066888557998306, "grad_norm": 0.5625383379812148, "learning_rate": 5.184103811841038e-06, "loss": 0.6519, "step": 31055 }, { "epoch": 0.9067180520276781, "grad_norm": 0.5825311656870656, "learning_rate": 5.182481751824818e-06, "loss": 0.5978, "step": 31056 }, { "epoch": 0.9067472482555254, "grad_norm": 0.5780762065676492, "learning_rate": 5.1808596918085965e-06, "loss": 0.6588, "step": 31057 }, { "epoch": 0.9067764444833728, "grad_norm": 0.5317551970892064, "learning_rate": 5.179237631792377e-06, "loss": 0.5756, "step": 31058 }, { "epoch": 0.9068056407112202, "grad_norm": 0.4839152664472497, "learning_rate": 5.177615571776156e-06, "loss": 0.529, "step": 31059 }, { "epoch": 0.9068348369390675, "grad_norm": 0.515674756703452, "learning_rate": 5.175993511759936e-06, "loss": 0.5434, "step": 31060 }, { "epoch": 0.9068640331669149, "grad_norm": 0.5209247403318991, "learning_rate": 5.174371451743715e-06, "loss": 0.5126, "step": 31061 }, { "epoch": 0.9068932293947622, "grad_norm": 0.5364946111455322, "learning_rate": 5.1727493917274935e-06, "loss": 0.549, "step": 31062 }, { "epoch": 0.9069224256226096, "grad_norm": 0.5291073009769759, "learning_rate": 5.1711273317112735e-06, "loss": 0.5908, "step": 31063 }, { "epoch": 0.906951621850457, "grad_norm": 0.5261617696690557, "learning_rate": 5.169505271695053e-06, "loss": 0.5865, "step": 31064 }, { "epoch": 0.9069808180783043, "grad_norm": 0.5184285844249595, "learning_rate": 5.167883211678833e-06, "loss": 0.5834, "step": 31065 }, { "epoch": 0.9070100143061517, "grad_norm": 0.546932049926223, "learning_rate": 5.166261151662612e-06, "loss": 0.6172, "step": 31066 }, { "epoch": 0.907039210533999, "grad_norm": 0.553002685524977, "learning_rate": 5.164639091646391e-06, "loss": 0.6505, "step": 31067 }, { "epoch": 0.9070684067618464, "grad_norm": 0.5236096172892841, "learning_rate": 5.1630170316301705e-06, "loss": 0.5727, "step": 31068 }, { "epoch": 0.9070976029896938, "grad_norm": 0.5711672905987142, "learning_rate": 5.16139497161395e-06, "loss": 0.687, "step": 31069 }, { "epoch": 0.9071267992175411, "grad_norm": 0.5338643688254794, "learning_rate": 5.15977291159773e-06, "loss": 0.5776, "step": 31070 }, { "epoch": 0.9071559954453885, "grad_norm": 0.5095807234263665, "learning_rate": 5.158150851581508e-06, "loss": 0.537, "step": 31071 }, { "epoch": 0.9071851916732359, "grad_norm": 0.47427079380608783, "learning_rate": 5.156528791565288e-06, "loss": 0.4795, "step": 31072 }, { "epoch": 0.9072143879010832, "grad_norm": 0.5206464438850127, "learning_rate": 5.1549067315490674e-06, "loss": 0.5522, "step": 31073 }, { "epoch": 0.9072435841289306, "grad_norm": 0.5496787903328981, "learning_rate": 5.1532846715328475e-06, "loss": 0.5966, "step": 31074 }, { "epoch": 0.9072727803567779, "grad_norm": 0.49840369991320094, "learning_rate": 5.151662611516627e-06, "loss": 0.5069, "step": 31075 }, { "epoch": 0.9073019765846253, "grad_norm": 0.5191709336876598, "learning_rate": 5.150040551500405e-06, "loss": 0.5633, "step": 31076 }, { "epoch": 0.9073311728124727, "grad_norm": 0.524834018803551, "learning_rate": 5.148418491484185e-06, "loss": 0.5357, "step": 31077 }, { "epoch": 0.90736036904032, "grad_norm": 0.5252011727921817, "learning_rate": 5.146796431467964e-06, "loss": 0.5543, "step": 31078 }, { "epoch": 0.9073895652681674, "grad_norm": 0.5043201788887545, "learning_rate": 5.1451743714517445e-06, "loss": 0.4953, "step": 31079 }, { "epoch": 0.9074187614960147, "grad_norm": 0.4879020541228366, "learning_rate": 5.143552311435524e-06, "loss": 0.5044, "step": 31080 }, { "epoch": 0.9074479577238621, "grad_norm": 0.502776974667948, "learning_rate": 5.141930251419303e-06, "loss": 0.5159, "step": 31081 }, { "epoch": 0.9074771539517095, "grad_norm": 0.5135374034804016, "learning_rate": 5.140308191403082e-06, "loss": 0.5742, "step": 31082 }, { "epoch": 0.9075063501795568, "grad_norm": 0.5459629802393431, "learning_rate": 5.138686131386861e-06, "loss": 0.5643, "step": 31083 }, { "epoch": 0.9075355464074042, "grad_norm": 0.518875051254717, "learning_rate": 5.137064071370641e-06, "loss": 0.5832, "step": 31084 }, { "epoch": 0.9075647426352516, "grad_norm": 0.5249856921449132, "learning_rate": 5.135442011354421e-06, "loss": 0.5763, "step": 31085 }, { "epoch": 0.9075939388630989, "grad_norm": 0.5429672092899964, "learning_rate": 5.1338199513382e-06, "loss": 0.6179, "step": 31086 }, { "epoch": 0.9076231350909463, "grad_norm": 0.6040124694353379, "learning_rate": 5.132197891321979e-06, "loss": 0.6857, "step": 31087 }, { "epoch": 0.9076523313187936, "grad_norm": 0.4626480870115304, "learning_rate": 5.130575831305758e-06, "loss": 0.424, "step": 31088 }, { "epoch": 0.907681527546641, "grad_norm": 0.5417295478716688, "learning_rate": 5.128953771289538e-06, "loss": 0.5742, "step": 31089 }, { "epoch": 0.9077107237744884, "grad_norm": 0.528065713599114, "learning_rate": 5.127331711273317e-06, "loss": 0.5934, "step": 31090 }, { "epoch": 0.9077399200023357, "grad_norm": 0.5465354654564679, "learning_rate": 5.125709651257097e-06, "loss": 0.5551, "step": 31091 }, { "epoch": 0.9077691162301831, "grad_norm": 0.49125522048264514, "learning_rate": 5.124087591240876e-06, "loss": 0.4935, "step": 31092 }, { "epoch": 0.9077983124580304, "grad_norm": 0.5089188737390777, "learning_rate": 5.122465531224656e-06, "loss": 0.5708, "step": 31093 }, { "epoch": 0.9078275086858778, "grad_norm": 0.5236649373647205, "learning_rate": 5.120843471208435e-06, "loss": 0.5275, "step": 31094 }, { "epoch": 0.9078567049137252, "grad_norm": 0.5318246499193439, "learning_rate": 5.119221411192214e-06, "loss": 0.6238, "step": 31095 }, { "epoch": 0.9078859011415725, "grad_norm": 0.4820785691726856, "learning_rate": 5.117599351175994e-06, "loss": 0.4918, "step": 31096 }, { "epoch": 0.9079150973694199, "grad_norm": 0.4970219858361754, "learning_rate": 5.115977291159773e-06, "loss": 0.4914, "step": 31097 }, { "epoch": 0.9079442935972672, "grad_norm": 0.5303627581385455, "learning_rate": 5.114355231143553e-06, "loss": 0.5286, "step": 31098 }, { "epoch": 0.9079734898251146, "grad_norm": 0.5245155923091175, "learning_rate": 5.112733171127332e-06, "loss": 0.564, "step": 31099 }, { "epoch": 0.908002686052962, "grad_norm": 0.5093741154535096, "learning_rate": 5.1111111111111115e-06, "loss": 0.5696, "step": 31100 }, { "epoch": 0.9080318822808093, "grad_norm": 0.553327101807513, "learning_rate": 5.109489051094891e-06, "loss": 0.6361, "step": 31101 }, { "epoch": 0.9080610785086567, "grad_norm": 0.5626569596596054, "learning_rate": 5.10786699107867e-06, "loss": 0.6302, "step": 31102 }, { "epoch": 0.908090274736504, "grad_norm": 0.5675540619212196, "learning_rate": 5.10624493106245e-06, "loss": 0.6285, "step": 31103 }, { "epoch": 0.9081194709643514, "grad_norm": 0.5334826131336796, "learning_rate": 5.104622871046228e-06, "loss": 0.6156, "step": 31104 }, { "epoch": 0.9081486671921988, "grad_norm": 0.5461173923807557, "learning_rate": 5.1030008110300084e-06, "loss": 0.5827, "step": 31105 }, { "epoch": 0.9081778634200461, "grad_norm": 0.5069978567009977, "learning_rate": 5.101378751013788e-06, "loss": 0.5535, "step": 31106 }, { "epoch": 0.9082070596478935, "grad_norm": 0.5353418262492293, "learning_rate": 5.099756690997568e-06, "loss": 0.5715, "step": 31107 }, { "epoch": 0.9082362558757409, "grad_norm": 0.5269969406653636, "learning_rate": 5.098134630981347e-06, "loss": 0.5992, "step": 31108 }, { "epoch": 0.9082654521035882, "grad_norm": 0.5387398871780584, "learning_rate": 5.096512570965125e-06, "loss": 0.619, "step": 31109 }, { "epoch": 0.9082946483314356, "grad_norm": 0.5435985914951722, "learning_rate": 5.094890510948905e-06, "loss": 0.5847, "step": 31110 }, { "epoch": 0.908323844559283, "grad_norm": 0.5201218408814511, "learning_rate": 5.093268450932685e-06, "loss": 0.5491, "step": 31111 }, { "epoch": 0.9083530407871303, "grad_norm": 0.5121274893660372, "learning_rate": 5.091646390916465e-06, "loss": 0.5345, "step": 31112 }, { "epoch": 0.9083822370149777, "grad_norm": 0.5652648985999614, "learning_rate": 5.090024330900244e-06, "loss": 0.5732, "step": 31113 }, { "epoch": 0.908411433242825, "grad_norm": 0.5878327129149803, "learning_rate": 5.088402270884023e-06, "loss": 0.7015, "step": 31114 }, { "epoch": 0.9084406294706724, "grad_norm": 0.5392795207393444, "learning_rate": 5.086780210867802e-06, "loss": 0.6156, "step": 31115 }, { "epoch": 0.9084698256985198, "grad_norm": 0.5169129172401292, "learning_rate": 5.0851581508515816e-06, "loss": 0.5831, "step": 31116 }, { "epoch": 0.9084990219263671, "grad_norm": 0.5351050141080043, "learning_rate": 5.083536090835362e-06, "loss": 0.5809, "step": 31117 }, { "epoch": 0.9085282181542145, "grad_norm": 0.515750985979861, "learning_rate": 5.08191403081914e-06, "loss": 0.5851, "step": 31118 }, { "epoch": 0.9085574143820618, "grad_norm": 0.5080082784303539, "learning_rate": 5.08029197080292e-06, "loss": 0.52, "step": 31119 }, { "epoch": 0.9085866106099092, "grad_norm": 0.5264407773634953, "learning_rate": 5.078669910786699e-06, "loss": 0.5665, "step": 31120 }, { "epoch": 0.9086158068377566, "grad_norm": 0.4904173528068369, "learning_rate": 5.0770478507704785e-06, "loss": 0.4891, "step": 31121 }, { "epoch": 0.9086450030656039, "grad_norm": 0.5358327966175433, "learning_rate": 5.075425790754259e-06, "loss": 0.5501, "step": 31122 }, { "epoch": 0.9086741992934513, "grad_norm": 0.47501841057504973, "learning_rate": 5.073803730738037e-06, "loss": 0.5049, "step": 31123 }, { "epoch": 0.9087033955212986, "grad_norm": 0.5412488739529837, "learning_rate": 5.072181670721817e-06, "loss": 0.5786, "step": 31124 }, { "epoch": 0.908732591749146, "grad_norm": 0.5477040692293059, "learning_rate": 5.070559610705596e-06, "loss": 0.5812, "step": 31125 }, { "epoch": 0.9087617879769934, "grad_norm": 0.5022291784911839, "learning_rate": 5.068937550689376e-06, "loss": 0.5616, "step": 31126 }, { "epoch": 0.9087909842048407, "grad_norm": 0.5458228853031885, "learning_rate": 5.0673154906731555e-06, "loss": 0.5867, "step": 31127 }, { "epoch": 0.9088201804326881, "grad_norm": 0.566880627116037, "learning_rate": 5.065693430656934e-06, "loss": 0.6102, "step": 31128 }, { "epoch": 0.9088493766605354, "grad_norm": 0.49121762754002246, "learning_rate": 5.064071370640714e-06, "loss": 0.5072, "step": 31129 }, { "epoch": 0.9088785728883828, "grad_norm": 0.5316847392213695, "learning_rate": 5.062449310624493e-06, "loss": 0.5665, "step": 31130 }, { "epoch": 0.9089077691162302, "grad_norm": 0.5425430123755699, "learning_rate": 5.060827250608273e-06, "loss": 0.6179, "step": 31131 }, { "epoch": 0.9089369653440775, "grad_norm": 0.5069644003021958, "learning_rate": 5.059205190592052e-06, "loss": 0.5344, "step": 31132 }, { "epoch": 0.9089661615719249, "grad_norm": 0.5065851864084, "learning_rate": 5.057583130575832e-06, "loss": 0.5247, "step": 31133 }, { "epoch": 0.9089953577997723, "grad_norm": 0.5134078845120833, "learning_rate": 5.055961070559611e-06, "loss": 0.5494, "step": 31134 }, { "epoch": 0.9090245540276196, "grad_norm": 0.5180819780595062, "learning_rate": 5.05433901054339e-06, "loss": 0.5165, "step": 31135 }, { "epoch": 0.909053750255467, "grad_norm": 0.5305773264940489, "learning_rate": 5.05271695052717e-06, "loss": 0.5742, "step": 31136 }, { "epoch": 0.9090829464833143, "grad_norm": 0.5548379043233183, "learning_rate": 5.051094890510949e-06, "loss": 0.648, "step": 31137 }, { "epoch": 0.9091121427111617, "grad_norm": 0.5109149365760796, "learning_rate": 5.049472830494729e-06, "loss": 0.5641, "step": 31138 }, { "epoch": 0.9091413389390091, "grad_norm": 0.5566377194443181, "learning_rate": 5.047850770478508e-06, "loss": 0.6767, "step": 31139 }, { "epoch": 0.9091705351668564, "grad_norm": 0.49799782421935207, "learning_rate": 5.046228710462288e-06, "loss": 0.5336, "step": 31140 }, { "epoch": 0.9091997313947038, "grad_norm": 0.5313560311488463, "learning_rate": 5.044606650446067e-06, "loss": 0.6272, "step": 31141 }, { "epoch": 0.9092289276225511, "grad_norm": 0.5521224184662623, "learning_rate": 5.0429845904298455e-06, "loss": 0.6349, "step": 31142 }, { "epoch": 0.9092581238503985, "grad_norm": 0.5542855364569843, "learning_rate": 5.041362530413626e-06, "loss": 0.6128, "step": 31143 }, { "epoch": 0.9092873200782459, "grad_norm": 0.5532655187879487, "learning_rate": 5.039740470397405e-06, "loss": 0.5923, "step": 31144 }, { "epoch": 0.9093165163060932, "grad_norm": 0.5001927882150436, "learning_rate": 5.038118410381185e-06, "loss": 0.5173, "step": 31145 }, { "epoch": 0.9093457125339406, "grad_norm": 0.5239499532060424, "learning_rate": 5.036496350364963e-06, "loss": 0.5575, "step": 31146 }, { "epoch": 0.909374908761788, "grad_norm": 0.505578962171602, "learning_rate": 5.0348742903487425e-06, "loss": 0.5174, "step": 31147 }, { "epoch": 0.9094041049896353, "grad_norm": 0.5330026314376779, "learning_rate": 5.0332522303325226e-06, "loss": 0.5852, "step": 31148 }, { "epoch": 0.9094333012174827, "grad_norm": 0.53742946385273, "learning_rate": 5.031630170316302e-06, "loss": 0.6255, "step": 31149 }, { "epoch": 0.90946249744533, "grad_norm": 0.5257943615424258, "learning_rate": 5.030008110300082e-06, "loss": 0.5752, "step": 31150 }, { "epoch": 0.9094916936731774, "grad_norm": 0.4899678914442348, "learning_rate": 5.02838605028386e-06, "loss": 0.472, "step": 31151 }, { "epoch": 0.9095208899010248, "grad_norm": 0.5581863116797714, "learning_rate": 5.02676399026764e-06, "loss": 0.6383, "step": 31152 }, { "epoch": 0.9095500861288721, "grad_norm": 0.5409898651370114, "learning_rate": 5.0251419302514195e-06, "loss": 0.6208, "step": 31153 }, { "epoch": 0.9095792823567195, "grad_norm": 0.48250022236772877, "learning_rate": 5.023519870235199e-06, "loss": 0.4983, "step": 31154 }, { "epoch": 0.9096084785845668, "grad_norm": 0.5055469337594535, "learning_rate": 5.021897810218979e-06, "loss": 0.5458, "step": 31155 }, { "epoch": 0.9096376748124142, "grad_norm": 0.5086175835586517, "learning_rate": 5.020275750202757e-06, "loss": 0.5461, "step": 31156 }, { "epoch": 0.9096668710402616, "grad_norm": 0.5613610711925143, "learning_rate": 5.018653690186537e-06, "loss": 0.6519, "step": 31157 }, { "epoch": 0.9096960672681089, "grad_norm": 0.5427421826725383, "learning_rate": 5.0170316301703165e-06, "loss": 0.6053, "step": 31158 }, { "epoch": 0.9097252634959563, "grad_norm": 0.534870187647986, "learning_rate": 5.0154095701540965e-06, "loss": 0.5857, "step": 31159 }, { "epoch": 0.9097544597238036, "grad_norm": 0.539535238855133, "learning_rate": 5.013787510137876e-06, "loss": 0.6004, "step": 31160 }, { "epoch": 0.909783655951651, "grad_norm": 0.4856592212679198, "learning_rate": 5.012165450121654e-06, "loss": 0.5281, "step": 31161 }, { "epoch": 0.9098128521794984, "grad_norm": 0.5571902636043544, "learning_rate": 5.010543390105434e-06, "loss": 0.665, "step": 31162 }, { "epoch": 0.9098420484073457, "grad_norm": 0.6154044486950597, "learning_rate": 5.008921330089213e-06, "loss": 0.721, "step": 31163 }, { "epoch": 0.9098712446351931, "grad_norm": 0.48978938457015, "learning_rate": 5.0072992700729935e-06, "loss": 0.5212, "step": 31164 }, { "epoch": 0.9099004408630404, "grad_norm": 0.4917563502178818, "learning_rate": 5.005677210056772e-06, "loss": 0.5002, "step": 31165 }, { "epoch": 0.9099296370908878, "grad_norm": 0.5139047111617199, "learning_rate": 5.004055150040552e-06, "loss": 0.5426, "step": 31166 }, { "epoch": 0.9099588333187352, "grad_norm": 0.5578573269023237, "learning_rate": 5.002433090024331e-06, "loss": 0.6071, "step": 31167 }, { "epoch": 0.9099880295465825, "grad_norm": 0.555897118743572, "learning_rate": 5.00081103000811e-06, "loss": 0.6236, "step": 31168 }, { "epoch": 0.9100172257744299, "grad_norm": 0.5371871979355051, "learning_rate": 4.9991889699918904e-06, "loss": 0.5749, "step": 31169 }, { "epoch": 0.9100464220022773, "grad_norm": 0.5486852289056896, "learning_rate": 4.997566909975669e-06, "loss": 0.6152, "step": 31170 }, { "epoch": 0.9100756182301246, "grad_norm": 0.5165827076837007, "learning_rate": 4.995944849959449e-06, "loss": 0.5262, "step": 31171 }, { "epoch": 0.910104814457972, "grad_norm": 0.5387012258670337, "learning_rate": 4.994322789943228e-06, "loss": 0.6144, "step": 31172 }, { "epoch": 0.9101340106858193, "grad_norm": 0.5144938590198433, "learning_rate": 4.992700729927008e-06, "loss": 0.5657, "step": 31173 }, { "epoch": 0.9101632069136667, "grad_norm": 0.5275018093896563, "learning_rate": 4.991078669910787e-06, "loss": 0.5915, "step": 31174 }, { "epoch": 0.9101924031415141, "grad_norm": 0.5585532140921021, "learning_rate": 4.989456609894566e-06, "loss": 0.6183, "step": 31175 }, { "epoch": 0.9102215993693615, "grad_norm": 0.5243824312396381, "learning_rate": 4.987834549878346e-06, "loss": 0.5901, "step": 31176 }, { "epoch": 0.9102507955972089, "grad_norm": 0.5413675780948296, "learning_rate": 4.986212489862125e-06, "loss": 0.5932, "step": 31177 }, { "epoch": 0.9102799918250563, "grad_norm": 0.542160078285166, "learning_rate": 4.984590429845905e-06, "loss": 0.6333, "step": 31178 }, { "epoch": 0.9103091880529036, "grad_norm": 0.5309652285201623, "learning_rate": 4.9829683698296835e-06, "loss": 0.5795, "step": 31179 }, { "epoch": 0.910338384280751, "grad_norm": 0.5199169647653539, "learning_rate": 4.981346309813463e-06, "loss": 0.5626, "step": 31180 }, { "epoch": 0.9103675805085983, "grad_norm": 0.5265489879256502, "learning_rate": 4.979724249797243e-06, "loss": 0.5746, "step": 31181 }, { "epoch": 0.9103967767364457, "grad_norm": 0.5139298143362612, "learning_rate": 4.978102189781022e-06, "loss": 0.5114, "step": 31182 }, { "epoch": 0.9104259729642931, "grad_norm": 0.5156390037353183, "learning_rate": 4.976480129764802e-06, "loss": 0.5568, "step": 31183 }, { "epoch": 0.9104551691921404, "grad_norm": 0.527239998807325, "learning_rate": 4.9748580697485805e-06, "loss": 0.5332, "step": 31184 }, { "epoch": 0.9104843654199878, "grad_norm": 0.5259198137873506, "learning_rate": 4.9732360097323605e-06, "loss": 0.5823, "step": 31185 }, { "epoch": 0.9105135616478351, "grad_norm": 0.5450544078743649, "learning_rate": 4.97161394971614e-06, "loss": 0.616, "step": 31186 }, { "epoch": 0.9105427578756825, "grad_norm": 0.5290461927680397, "learning_rate": 4.969991889699919e-06, "loss": 0.5791, "step": 31187 }, { "epoch": 0.9105719541035299, "grad_norm": 0.5198244227280704, "learning_rate": 4.968369829683699e-06, "loss": 0.5915, "step": 31188 }, { "epoch": 0.9106011503313772, "grad_norm": 0.5503537837205832, "learning_rate": 4.966747769667477e-06, "loss": 0.6389, "step": 31189 }, { "epoch": 0.9106303465592246, "grad_norm": 0.5271707649471795, "learning_rate": 4.9651257096512575e-06, "loss": 0.5664, "step": 31190 }, { "epoch": 0.910659542787072, "grad_norm": 0.47067407054960797, "learning_rate": 4.963503649635037e-06, "loss": 0.4667, "step": 31191 }, { "epoch": 0.9106887390149193, "grad_norm": 0.5776048892141644, "learning_rate": 4.961881589618817e-06, "loss": 0.6507, "step": 31192 }, { "epoch": 0.9107179352427667, "grad_norm": 0.5134897429731303, "learning_rate": 4.960259529602595e-06, "loss": 0.6004, "step": 31193 }, { "epoch": 0.910747131470614, "grad_norm": 0.5274029799089336, "learning_rate": 4.958637469586374e-06, "loss": 0.5756, "step": 31194 }, { "epoch": 0.9107763276984614, "grad_norm": 0.5122732573908149, "learning_rate": 4.957015409570154e-06, "loss": 0.5558, "step": 31195 }, { "epoch": 0.9108055239263088, "grad_norm": 0.5703021303055262, "learning_rate": 4.955393349553934e-06, "loss": 0.6748, "step": 31196 }, { "epoch": 0.9108347201541561, "grad_norm": 0.5155963172428846, "learning_rate": 4.953771289537714e-06, "loss": 0.5568, "step": 31197 }, { "epoch": 0.9108639163820035, "grad_norm": 0.5471019881196434, "learning_rate": 4.952149229521492e-06, "loss": 0.5809, "step": 31198 }, { "epoch": 0.9108931126098508, "grad_norm": 0.5033373318110784, "learning_rate": 4.950527169505272e-06, "loss": 0.5717, "step": 31199 }, { "epoch": 0.9109223088376982, "grad_norm": 0.5519912491138962, "learning_rate": 4.948905109489051e-06, "loss": 0.5572, "step": 31200 }, { "epoch": 0.9109515050655456, "grad_norm": 0.5422484145549403, "learning_rate": 4.947283049472831e-06, "loss": 0.6132, "step": 31201 }, { "epoch": 0.9109807012933929, "grad_norm": 0.5692757526000005, "learning_rate": 4.945660989456611e-06, "loss": 0.6363, "step": 31202 }, { "epoch": 0.9110098975212403, "grad_norm": 0.5890127464747801, "learning_rate": 4.944038929440389e-06, "loss": 0.66, "step": 31203 }, { "epoch": 0.9110390937490876, "grad_norm": 0.5301456441971569, "learning_rate": 4.942416869424169e-06, "loss": 0.6139, "step": 31204 }, { "epoch": 0.911068289976935, "grad_norm": 0.5336885435138096, "learning_rate": 4.940794809407948e-06, "loss": 0.5612, "step": 31205 }, { "epoch": 0.9110974862047824, "grad_norm": 0.49583768649253374, "learning_rate": 4.9391727493917275e-06, "loss": 0.5036, "step": 31206 }, { "epoch": 0.9111266824326297, "grad_norm": 0.5087221074930797, "learning_rate": 4.937550689375507e-06, "loss": 0.5158, "step": 31207 }, { "epoch": 0.9111558786604771, "grad_norm": 0.5896384809162692, "learning_rate": 4.935928629359286e-06, "loss": 0.6509, "step": 31208 }, { "epoch": 0.9111850748883245, "grad_norm": 0.5209580648046286, "learning_rate": 4.934306569343066e-06, "loss": 0.5532, "step": 31209 }, { "epoch": 0.9112142711161718, "grad_norm": 0.4978933222466633, "learning_rate": 4.932684509326845e-06, "loss": 0.5323, "step": 31210 }, { "epoch": 0.9112434673440192, "grad_norm": 0.5393209862685624, "learning_rate": 4.931062449310625e-06, "loss": 0.6022, "step": 31211 }, { "epoch": 0.9112726635718665, "grad_norm": 0.5722752877344953, "learning_rate": 4.929440389294404e-06, "loss": 0.6398, "step": 31212 }, { "epoch": 0.9113018597997139, "grad_norm": 0.5495360376948124, "learning_rate": 4.927818329278183e-06, "loss": 0.5972, "step": 31213 }, { "epoch": 0.9113310560275613, "grad_norm": 0.5596573777722088, "learning_rate": 4.926196269261963e-06, "loss": 0.6356, "step": 31214 }, { "epoch": 0.9113602522554086, "grad_norm": 0.5301179271068102, "learning_rate": 4.924574209245742e-06, "loss": 0.5696, "step": 31215 }, { "epoch": 0.911389448483256, "grad_norm": 0.5700678723937881, "learning_rate": 4.922952149229522e-06, "loss": 0.6578, "step": 31216 }, { "epoch": 0.9114186447111033, "grad_norm": 0.5041305402494034, "learning_rate": 4.921330089213301e-06, "loss": 0.5489, "step": 31217 }, { "epoch": 0.9114478409389507, "grad_norm": 0.585806595335538, "learning_rate": 4.919708029197081e-06, "loss": 0.6856, "step": 31218 }, { "epoch": 0.9114770371667981, "grad_norm": 0.5085736013813424, "learning_rate": 4.91808596918086e-06, "loss": 0.5339, "step": 31219 }, { "epoch": 0.9115062333946454, "grad_norm": 0.5142175391605536, "learning_rate": 4.916463909164639e-06, "loss": 0.539, "step": 31220 }, { "epoch": 0.9115354296224928, "grad_norm": 0.5207311171788409, "learning_rate": 4.914841849148419e-06, "loss": 0.5348, "step": 31221 }, { "epoch": 0.9115646258503401, "grad_norm": 0.4861548872923477, "learning_rate": 4.913219789132198e-06, "loss": 0.5392, "step": 31222 }, { "epoch": 0.9115938220781875, "grad_norm": 0.5328812698903637, "learning_rate": 4.911597729115978e-06, "loss": 0.6203, "step": 31223 }, { "epoch": 0.9116230183060349, "grad_norm": 0.5207431703836358, "learning_rate": 4.909975669099757e-06, "loss": 0.5841, "step": 31224 }, { "epoch": 0.9116522145338822, "grad_norm": 0.5325107834978858, "learning_rate": 4.908353609083537e-06, "loss": 0.5862, "step": 31225 }, { "epoch": 0.9116814107617296, "grad_norm": 0.5684647226645577, "learning_rate": 4.906731549067315e-06, "loss": 0.6483, "step": 31226 }, { "epoch": 0.911710606989577, "grad_norm": 0.503628378660404, "learning_rate": 4.9051094890510946e-06, "loss": 0.5165, "step": 31227 }, { "epoch": 0.9117398032174243, "grad_norm": 0.5387150513900736, "learning_rate": 4.903487429034875e-06, "loss": 0.6079, "step": 31228 }, { "epoch": 0.9117689994452717, "grad_norm": 0.4883279357091763, "learning_rate": 4.901865369018654e-06, "loss": 0.5103, "step": 31229 }, { "epoch": 0.911798195673119, "grad_norm": 0.48870850240829783, "learning_rate": 4.900243309002434e-06, "loss": 0.4808, "step": 31230 }, { "epoch": 0.9118273919009664, "grad_norm": 0.5356494325667391, "learning_rate": 4.898621248986212e-06, "loss": 0.6036, "step": 31231 }, { "epoch": 0.9118565881288138, "grad_norm": 0.5189344329676574, "learning_rate": 4.896999188969992e-06, "loss": 0.5404, "step": 31232 }, { "epoch": 0.9118857843566611, "grad_norm": 0.5210079144781472, "learning_rate": 4.895377128953772e-06, "loss": 0.5472, "step": 31233 }, { "epoch": 0.9119149805845085, "grad_norm": 0.5465463387239511, "learning_rate": 4.893755068937551e-06, "loss": 0.5744, "step": 31234 }, { "epoch": 0.9119441768123558, "grad_norm": 0.5074689892600124, "learning_rate": 4.892133008921331e-06, "loss": 0.5168, "step": 31235 }, { "epoch": 0.9119733730402032, "grad_norm": 0.5123813059942777, "learning_rate": 4.890510948905109e-06, "loss": 0.5453, "step": 31236 }, { "epoch": 0.9120025692680506, "grad_norm": 0.6090856015953671, "learning_rate": 4.888888888888889e-06, "loss": 0.6446, "step": 31237 }, { "epoch": 0.9120317654958979, "grad_norm": 0.5694406321490284, "learning_rate": 4.8872668288726685e-06, "loss": 0.6116, "step": 31238 }, { "epoch": 0.9120609617237453, "grad_norm": 0.519146111673426, "learning_rate": 4.885644768856448e-06, "loss": 0.5824, "step": 31239 }, { "epoch": 0.9120901579515927, "grad_norm": 0.5431298581653786, "learning_rate": 4.884022708840227e-06, "loss": 0.5816, "step": 31240 }, { "epoch": 0.91211935417944, "grad_norm": 0.520492507081064, "learning_rate": 4.882400648824006e-06, "loss": 0.6041, "step": 31241 }, { "epoch": 0.9121485504072874, "grad_norm": 0.502714842915816, "learning_rate": 4.880778588807786e-06, "loss": 0.5297, "step": 31242 }, { "epoch": 0.9121777466351347, "grad_norm": 0.5671384136036013, "learning_rate": 4.8791565287915655e-06, "loss": 0.6765, "step": 31243 }, { "epoch": 0.9122069428629821, "grad_norm": 0.5161952486781786, "learning_rate": 4.8775344687753456e-06, "loss": 0.4995, "step": 31244 }, { "epoch": 0.9122361390908295, "grad_norm": 0.4695526295360134, "learning_rate": 4.875912408759124e-06, "loss": 0.4672, "step": 31245 }, { "epoch": 0.9122653353186768, "grad_norm": 0.5024939064061744, "learning_rate": 4.874290348742903e-06, "loss": 0.5614, "step": 31246 }, { "epoch": 0.9122945315465242, "grad_norm": 0.532889169537553, "learning_rate": 4.872668288726683e-06, "loss": 0.5877, "step": 31247 }, { "epoch": 0.9123237277743715, "grad_norm": 0.5048131187480827, "learning_rate": 4.8710462287104625e-06, "loss": 0.5487, "step": 31248 }, { "epoch": 0.9123529240022189, "grad_norm": 0.5348482215900778, "learning_rate": 4.8694241686942425e-06, "loss": 0.5863, "step": 31249 }, { "epoch": 0.9123821202300663, "grad_norm": 0.5432118681119891, "learning_rate": 4.867802108678021e-06, "loss": 0.5604, "step": 31250 }, { "epoch": 0.9124113164579136, "grad_norm": 0.5559380335103201, "learning_rate": 4.866180048661801e-06, "loss": 0.6241, "step": 31251 }, { "epoch": 0.912440512685761, "grad_norm": 0.5186957286428954, "learning_rate": 4.86455798864558e-06, "loss": 0.5594, "step": 31252 }, { "epoch": 0.9124697089136083, "grad_norm": 0.559864137852744, "learning_rate": 4.862935928629359e-06, "loss": 0.621, "step": 31253 }, { "epoch": 0.9124989051414557, "grad_norm": 0.5414313144165959, "learning_rate": 4.861313868613139e-06, "loss": 0.6074, "step": 31254 }, { "epoch": 0.9125281013693031, "grad_norm": 0.5285018029217748, "learning_rate": 4.859691808596918e-06, "loss": 0.5553, "step": 31255 }, { "epoch": 0.9125572975971504, "grad_norm": 0.5434467938337939, "learning_rate": 4.858069748580698e-06, "loss": 0.6355, "step": 31256 }, { "epoch": 0.9125864938249978, "grad_norm": 0.5386606699438911, "learning_rate": 4.856447688564477e-06, "loss": 0.5912, "step": 31257 }, { "epoch": 0.9126156900528452, "grad_norm": 0.5312973481582286, "learning_rate": 4.854825628548257e-06, "loss": 0.574, "step": 31258 }, { "epoch": 0.9126448862806925, "grad_norm": 0.544922974737466, "learning_rate": 4.8532035685320356e-06, "loss": 0.591, "step": 31259 }, { "epoch": 0.9126740825085399, "grad_norm": 0.46282746354098336, "learning_rate": 4.851581508515815e-06, "loss": 0.4681, "step": 31260 }, { "epoch": 0.9127032787363872, "grad_norm": 0.5178258691980893, "learning_rate": 4.849959448499595e-06, "loss": 0.562, "step": 31261 }, { "epoch": 0.9127324749642346, "grad_norm": 0.5599416716063771, "learning_rate": 4.848337388483374e-06, "loss": 0.6194, "step": 31262 }, { "epoch": 0.912761671192082, "grad_norm": 0.5484386497499607, "learning_rate": 4.846715328467154e-06, "loss": 0.6827, "step": 31263 }, { "epoch": 0.9127908674199293, "grad_norm": 0.5313303189437825, "learning_rate": 4.8450932684509325e-06, "loss": 0.5779, "step": 31264 }, { "epoch": 0.9128200636477767, "grad_norm": 0.5730025424915127, "learning_rate": 4.843471208434712e-06, "loss": 0.6368, "step": 31265 }, { "epoch": 0.912849259875624, "grad_norm": 0.5245439262503595, "learning_rate": 4.841849148418492e-06, "loss": 0.599, "step": 31266 }, { "epoch": 0.9128784561034714, "grad_norm": 0.5278332001049647, "learning_rate": 4.840227088402271e-06, "loss": 0.5571, "step": 31267 }, { "epoch": 0.9129076523313188, "grad_norm": 0.5504302672140285, "learning_rate": 4.83860502838605e-06, "loss": 0.5878, "step": 31268 }, { "epoch": 0.9129368485591661, "grad_norm": 0.5283128958236855, "learning_rate": 4.8369829683698295e-06, "loss": 0.5988, "step": 31269 }, { "epoch": 0.9129660447870135, "grad_norm": 0.48771063012729515, "learning_rate": 4.8353609083536095e-06, "loss": 0.4848, "step": 31270 }, { "epoch": 0.9129952410148608, "grad_norm": 0.4909757956089898, "learning_rate": 4.833738848337389e-06, "loss": 0.4964, "step": 31271 }, { "epoch": 0.9130244372427082, "grad_norm": 0.5013998445910639, "learning_rate": 4.832116788321168e-06, "loss": 0.5706, "step": 31272 }, { "epoch": 0.9130536334705556, "grad_norm": 0.5419992318865017, "learning_rate": 4.830494728304947e-06, "loss": 0.5998, "step": 31273 }, { "epoch": 0.9130828296984029, "grad_norm": 0.4772766249455191, "learning_rate": 4.8288726682887264e-06, "loss": 0.4981, "step": 31274 }, { "epoch": 0.9131120259262503, "grad_norm": 0.526281784855252, "learning_rate": 4.8272506082725065e-06, "loss": 0.5591, "step": 31275 }, { "epoch": 0.9131412221540977, "grad_norm": 0.5212764772880474, "learning_rate": 4.825628548256286e-06, "loss": 0.5645, "step": 31276 }, { "epoch": 0.913170418381945, "grad_norm": 0.5206472599972012, "learning_rate": 4.824006488240066e-06, "loss": 0.5415, "step": 31277 }, { "epoch": 0.9131996146097924, "grad_norm": 0.5267727071187628, "learning_rate": 4.822384428223844e-06, "loss": 0.5801, "step": 31278 }, { "epoch": 0.9132288108376397, "grad_norm": 0.5133588893813658, "learning_rate": 4.820762368207623e-06, "loss": 0.552, "step": 31279 }, { "epoch": 0.9132580070654871, "grad_norm": 0.5372637826036533, "learning_rate": 4.8191403081914035e-06, "loss": 0.5871, "step": 31280 }, { "epoch": 0.9132872032933345, "grad_norm": 0.4732111718689866, "learning_rate": 4.817518248175183e-06, "loss": 0.4662, "step": 31281 }, { "epoch": 0.9133163995211818, "grad_norm": 0.5382502256684272, "learning_rate": 4.815896188158963e-06, "loss": 0.5871, "step": 31282 }, { "epoch": 0.9133455957490292, "grad_norm": 0.5173445287334553, "learning_rate": 4.814274128142741e-06, "loss": 0.5619, "step": 31283 }, { "epoch": 0.9133747919768765, "grad_norm": 0.5122376556834051, "learning_rate": 4.812652068126521e-06, "loss": 0.5446, "step": 31284 }, { "epoch": 0.9134039882047239, "grad_norm": 0.4922398890356889, "learning_rate": 4.8110300081103e-06, "loss": 0.5284, "step": 31285 }, { "epoch": 0.9134331844325713, "grad_norm": 0.5064762642649886, "learning_rate": 4.80940794809408e-06, "loss": 0.5696, "step": 31286 }, { "epoch": 0.9134623806604186, "grad_norm": 0.4759160578278785, "learning_rate": 4.807785888077859e-06, "loss": 0.5038, "step": 31287 }, { "epoch": 0.913491576888266, "grad_norm": 0.5499566559634855, "learning_rate": 4.806163828061638e-06, "loss": 0.6186, "step": 31288 }, { "epoch": 0.9135207731161133, "grad_norm": 0.49961961816517597, "learning_rate": 4.804541768045418e-06, "loss": 0.5209, "step": 31289 }, { "epoch": 0.9135499693439607, "grad_norm": 0.5112466046383414, "learning_rate": 4.802919708029197e-06, "loss": 0.549, "step": 31290 }, { "epoch": 0.9135791655718081, "grad_norm": 0.5760983093813049, "learning_rate": 4.801297648012977e-06, "loss": 0.6197, "step": 31291 }, { "epoch": 0.9136083617996554, "grad_norm": 0.5657098516238919, "learning_rate": 4.799675587996756e-06, "loss": 0.615, "step": 31292 }, { "epoch": 0.9136375580275028, "grad_norm": 0.5038715449499782, "learning_rate": 4.798053527980535e-06, "loss": 0.5363, "step": 31293 }, { "epoch": 0.9136667542553502, "grad_norm": 0.5279959622265806, "learning_rate": 4.796431467964315e-06, "loss": 0.5531, "step": 31294 }, { "epoch": 0.9136959504831975, "grad_norm": 0.48842460913805413, "learning_rate": 4.794809407948094e-06, "loss": 0.4861, "step": 31295 }, { "epoch": 0.9137251467110449, "grad_norm": 0.5042709555400998, "learning_rate": 4.793187347931874e-06, "loss": 0.5131, "step": 31296 }, { "epoch": 0.9137543429388923, "grad_norm": 0.5332105847940766, "learning_rate": 4.791565287915653e-06, "loss": 0.5907, "step": 31297 }, { "epoch": 0.9137835391667397, "grad_norm": 0.5410034237400672, "learning_rate": 4.789943227899432e-06, "loss": 0.6068, "step": 31298 }, { "epoch": 0.9138127353945871, "grad_norm": 0.5600495151001271, "learning_rate": 4.788321167883212e-06, "loss": 0.6635, "step": 31299 }, { "epoch": 0.9138419316224344, "grad_norm": 0.5477943439431955, "learning_rate": 4.786699107866991e-06, "loss": 0.584, "step": 31300 }, { "epoch": 0.9138711278502818, "grad_norm": 0.4880722128682208, "learning_rate": 4.7850770478507705e-06, "loss": 0.5097, "step": 31301 }, { "epoch": 0.9139003240781292, "grad_norm": 0.5663603590093977, "learning_rate": 4.78345498783455e-06, "loss": 0.6751, "step": 31302 }, { "epoch": 0.9139295203059765, "grad_norm": 0.5433658238463662, "learning_rate": 4.78183292781833e-06, "loss": 0.606, "step": 31303 }, { "epoch": 0.9139587165338239, "grad_norm": 0.508559295306333, "learning_rate": 4.780210867802109e-06, "loss": 0.5399, "step": 31304 }, { "epoch": 0.9139879127616712, "grad_norm": 0.5446116575008677, "learning_rate": 4.778588807785888e-06, "loss": 0.5795, "step": 31305 }, { "epoch": 0.9140171089895186, "grad_norm": 0.49987243044105123, "learning_rate": 4.7769667477696674e-06, "loss": 0.5239, "step": 31306 }, { "epoch": 0.914046305217366, "grad_norm": 0.5600391339798415, "learning_rate": 4.775344687753447e-06, "loss": 0.6285, "step": 31307 }, { "epoch": 0.9140755014452133, "grad_norm": 0.5003484306055497, "learning_rate": 4.773722627737227e-06, "loss": 0.5415, "step": 31308 }, { "epoch": 0.9141046976730607, "grad_norm": 0.569811331594051, "learning_rate": 4.772100567721006e-06, "loss": 0.618, "step": 31309 }, { "epoch": 0.914133893900908, "grad_norm": 0.5417988442823161, "learning_rate": 4.770478507704786e-06, "loss": 0.5786, "step": 31310 }, { "epoch": 0.9141630901287554, "grad_norm": 0.5320570290097213, "learning_rate": 4.768856447688564e-06, "loss": 0.6023, "step": 31311 }, { "epoch": 0.9141922863566028, "grad_norm": 0.5162814738837637, "learning_rate": 4.767234387672344e-06, "loss": 0.5884, "step": 31312 }, { "epoch": 0.9142214825844501, "grad_norm": 0.5147386775624403, "learning_rate": 4.765612327656124e-06, "loss": 0.5466, "step": 31313 }, { "epoch": 0.9142506788122975, "grad_norm": 0.5578039051019971, "learning_rate": 4.763990267639903e-06, "loss": 0.6486, "step": 31314 }, { "epoch": 0.9142798750401449, "grad_norm": 0.5557134225412136, "learning_rate": 4.762368207623682e-06, "loss": 0.5828, "step": 31315 }, { "epoch": 0.9143090712679922, "grad_norm": 0.5471522715472562, "learning_rate": 4.760746147607461e-06, "loss": 0.6146, "step": 31316 }, { "epoch": 0.9143382674958396, "grad_norm": 0.5183666833471353, "learning_rate": 4.759124087591241e-06, "loss": 0.5571, "step": 31317 }, { "epoch": 0.9143674637236869, "grad_norm": 0.4914328690512434, "learning_rate": 4.757502027575021e-06, "loss": 0.4889, "step": 31318 }, { "epoch": 0.9143966599515343, "grad_norm": 0.525542074987405, "learning_rate": 4.7558799675588e-06, "loss": 0.5847, "step": 31319 }, { "epoch": 0.9144258561793817, "grad_norm": 0.5054754287300934, "learning_rate": 4.754257907542579e-06, "loss": 0.5639, "step": 31320 }, { "epoch": 0.914455052407229, "grad_norm": 0.5270353193641207, "learning_rate": 4.752635847526358e-06, "loss": 0.6038, "step": 31321 }, { "epoch": 0.9144842486350764, "grad_norm": 0.5165180447849415, "learning_rate": 4.751013787510138e-06, "loss": 0.526, "step": 31322 }, { "epoch": 0.9145134448629237, "grad_norm": 0.5493179108044491, "learning_rate": 4.7493917274939176e-06, "loss": 0.5724, "step": 31323 }, { "epoch": 0.9145426410907711, "grad_norm": 0.5174851501900427, "learning_rate": 4.747769667477698e-06, "loss": 0.5965, "step": 31324 }, { "epoch": 0.9145718373186185, "grad_norm": 0.5310540025026147, "learning_rate": 4.746147607461476e-06, "loss": 0.5451, "step": 31325 }, { "epoch": 0.9146010335464658, "grad_norm": 0.5370050980663381, "learning_rate": 4.744525547445255e-06, "loss": 0.561, "step": 31326 }, { "epoch": 0.9146302297743132, "grad_norm": 0.5286595160975125, "learning_rate": 4.742903487429035e-06, "loss": 0.6032, "step": 31327 }, { "epoch": 0.9146594260021605, "grad_norm": 0.477488489739402, "learning_rate": 4.7412814274128145e-06, "loss": 0.4789, "step": 31328 }, { "epoch": 0.9146886222300079, "grad_norm": 0.5410459901161981, "learning_rate": 4.739659367396594e-06, "loss": 0.5882, "step": 31329 }, { "epoch": 0.9147178184578553, "grad_norm": 0.5553143777632539, "learning_rate": 4.738037307380373e-06, "loss": 0.6088, "step": 31330 }, { "epoch": 0.9147470146857026, "grad_norm": 0.49729543820165667, "learning_rate": 4.736415247364152e-06, "loss": 0.5217, "step": 31331 }, { "epoch": 0.91477621091355, "grad_norm": 0.5720956030245349, "learning_rate": 4.734793187347932e-06, "loss": 0.6187, "step": 31332 }, { "epoch": 0.9148054071413974, "grad_norm": 0.5167281403282216, "learning_rate": 4.7331711273317115e-06, "loss": 0.5704, "step": 31333 }, { "epoch": 0.9148346033692447, "grad_norm": 0.4882474993533892, "learning_rate": 4.731549067315491e-06, "loss": 0.5107, "step": 31334 }, { "epoch": 0.9148637995970921, "grad_norm": 0.5727679614918274, "learning_rate": 4.72992700729927e-06, "loss": 0.6572, "step": 31335 }, { "epoch": 0.9148929958249394, "grad_norm": 0.4932641826660412, "learning_rate": 4.72830494728305e-06, "loss": 0.5437, "step": 31336 }, { "epoch": 0.9149221920527868, "grad_norm": 0.510502126630772, "learning_rate": 4.726682887266829e-06, "loss": 0.5453, "step": 31337 }, { "epoch": 0.9149513882806342, "grad_norm": 0.5160764227245042, "learning_rate": 4.7250608272506084e-06, "loss": 0.527, "step": 31338 }, { "epoch": 0.9149805845084815, "grad_norm": 0.5287395253574486, "learning_rate": 4.723438767234388e-06, "loss": 0.591, "step": 31339 }, { "epoch": 0.9150097807363289, "grad_norm": 0.562521380532516, "learning_rate": 4.721816707218167e-06, "loss": 0.6694, "step": 31340 }, { "epoch": 0.9150389769641762, "grad_norm": 0.5196107362095377, "learning_rate": 4.720194647201947e-06, "loss": 0.5602, "step": 31341 }, { "epoch": 0.9150681731920236, "grad_norm": 0.5555268585007352, "learning_rate": 4.718572587185726e-06, "loss": 0.6333, "step": 31342 }, { "epoch": 0.915097369419871, "grad_norm": 0.5674873371906284, "learning_rate": 4.716950527169506e-06, "loss": 0.637, "step": 31343 }, { "epoch": 0.9151265656477183, "grad_norm": 0.589741749026339, "learning_rate": 4.715328467153285e-06, "loss": 0.6198, "step": 31344 }, { "epoch": 0.9151557618755657, "grad_norm": 0.5145038013910499, "learning_rate": 4.713706407137064e-06, "loss": 0.5591, "step": 31345 }, { "epoch": 0.915184958103413, "grad_norm": 0.5238113122625733, "learning_rate": 4.712084347120844e-06, "loss": 0.5681, "step": 31346 }, { "epoch": 0.9152141543312604, "grad_norm": 0.5093133960208186, "learning_rate": 4.710462287104623e-06, "loss": 0.5612, "step": 31347 }, { "epoch": 0.9152433505591078, "grad_norm": 0.5227217992931102, "learning_rate": 4.708840227088402e-06, "loss": 0.5576, "step": 31348 }, { "epoch": 0.9152725467869551, "grad_norm": 0.5703609773766262, "learning_rate": 4.7072181670721816e-06, "loss": 0.6135, "step": 31349 }, { "epoch": 0.9153017430148025, "grad_norm": 0.4847943862479957, "learning_rate": 4.705596107055962e-06, "loss": 0.4661, "step": 31350 }, { "epoch": 0.9153309392426499, "grad_norm": 0.5102158029245053, "learning_rate": 4.703974047039741e-06, "loss": 0.5497, "step": 31351 }, { "epoch": 0.9153601354704972, "grad_norm": 0.5122015352134632, "learning_rate": 4.70235198702352e-06, "loss": 0.5174, "step": 31352 }, { "epoch": 0.9153893316983446, "grad_norm": 0.5060034189549192, "learning_rate": 4.700729927007299e-06, "loss": 0.4972, "step": 31353 }, { "epoch": 0.9154185279261919, "grad_norm": 0.5126384645496918, "learning_rate": 4.6991078669910785e-06, "loss": 0.5458, "step": 31354 }, { "epoch": 0.9154477241540393, "grad_norm": 0.4905429987739708, "learning_rate": 4.6974858069748586e-06, "loss": 0.4996, "step": 31355 }, { "epoch": 0.9154769203818867, "grad_norm": 0.49623937877301894, "learning_rate": 4.695863746958638e-06, "loss": 0.4727, "step": 31356 }, { "epoch": 0.915506116609734, "grad_norm": 0.5728344554627798, "learning_rate": 4.694241686942417e-06, "loss": 0.6971, "step": 31357 }, { "epoch": 0.9155353128375814, "grad_norm": 0.5482144360350565, "learning_rate": 4.692619626926196e-06, "loss": 0.6074, "step": 31358 }, { "epoch": 0.9155645090654287, "grad_norm": 0.5276963655374428, "learning_rate": 4.6909975669099755e-06, "loss": 0.6013, "step": 31359 }, { "epoch": 0.9155937052932761, "grad_norm": 0.5556759448718548, "learning_rate": 4.6893755068937555e-06, "loss": 0.5653, "step": 31360 }, { "epoch": 0.9156229015211235, "grad_norm": 0.5174527801263833, "learning_rate": 4.687753446877535e-06, "loss": 0.5316, "step": 31361 }, { "epoch": 0.9156520977489708, "grad_norm": 0.5427708098013679, "learning_rate": 4.686131386861314e-06, "loss": 0.6112, "step": 31362 }, { "epoch": 0.9156812939768182, "grad_norm": 0.5838875266028539, "learning_rate": 4.684509326845093e-06, "loss": 0.6819, "step": 31363 }, { "epoch": 0.9157104902046656, "grad_norm": 0.5336109248464104, "learning_rate": 4.682887266828872e-06, "loss": 0.566, "step": 31364 }, { "epoch": 0.9157396864325129, "grad_norm": 0.595855414086335, "learning_rate": 4.6812652068126525e-06, "loss": 0.6932, "step": 31365 }, { "epoch": 0.9157688826603603, "grad_norm": 0.4765711359632099, "learning_rate": 4.679643146796432e-06, "loss": 0.4558, "step": 31366 }, { "epoch": 0.9157980788882076, "grad_norm": 0.5467527881343001, "learning_rate": 4.678021086780211e-06, "loss": 0.6267, "step": 31367 }, { "epoch": 0.915827275116055, "grad_norm": 0.5518693737304429, "learning_rate": 4.67639902676399e-06, "loss": 0.6448, "step": 31368 }, { "epoch": 0.9158564713439024, "grad_norm": 0.5000993300382092, "learning_rate": 4.67477696674777e-06, "loss": 0.5412, "step": 31369 }, { "epoch": 0.9158856675717497, "grad_norm": 0.47689489156538223, "learning_rate": 4.6731549067315494e-06, "loss": 0.4862, "step": 31370 }, { "epoch": 0.9159148637995971, "grad_norm": 0.536943123487394, "learning_rate": 4.671532846715329e-06, "loss": 0.6121, "step": 31371 }, { "epoch": 0.9159440600274444, "grad_norm": 0.5207007927138455, "learning_rate": 4.669910786699108e-06, "loss": 0.5756, "step": 31372 }, { "epoch": 0.9159732562552918, "grad_norm": 0.5197828182994233, "learning_rate": 4.668288726682887e-06, "loss": 0.5969, "step": 31373 }, { "epoch": 0.9160024524831392, "grad_norm": 0.48516266624513205, "learning_rate": 4.666666666666667e-06, "loss": 0.5057, "step": 31374 }, { "epoch": 0.9160316487109865, "grad_norm": 0.5009245097933824, "learning_rate": 4.665044606650446e-06, "loss": 0.5345, "step": 31375 }, { "epoch": 0.9160608449388339, "grad_norm": 0.5549882749410796, "learning_rate": 4.663422546634226e-06, "loss": 0.624, "step": 31376 }, { "epoch": 0.9160900411666812, "grad_norm": 0.4983984308839849, "learning_rate": 4.661800486618005e-06, "loss": 0.5299, "step": 31377 }, { "epoch": 0.9161192373945286, "grad_norm": 0.5492018540653106, "learning_rate": 4.660178426601784e-06, "loss": 0.638, "step": 31378 }, { "epoch": 0.916148433622376, "grad_norm": 0.5295088268677474, "learning_rate": 4.658556366585564e-06, "loss": 0.5768, "step": 31379 }, { "epoch": 0.9161776298502233, "grad_norm": 0.5180851467590424, "learning_rate": 4.656934306569343e-06, "loss": 0.582, "step": 31380 }, { "epoch": 0.9162068260780707, "grad_norm": 0.5350592231677029, "learning_rate": 4.6553122465531226e-06, "loss": 0.5835, "step": 31381 }, { "epoch": 0.916236022305918, "grad_norm": 0.5516484540672375, "learning_rate": 4.653690186536902e-06, "loss": 0.5817, "step": 31382 }, { "epoch": 0.9162652185337654, "grad_norm": 0.5014895356683206, "learning_rate": 4.652068126520682e-06, "loss": 0.5616, "step": 31383 }, { "epoch": 0.9162944147616128, "grad_norm": 0.5283086044698523, "learning_rate": 4.650446066504461e-06, "loss": 0.5765, "step": 31384 }, { "epoch": 0.9163236109894601, "grad_norm": 0.5223471564076475, "learning_rate": 4.64882400648824e-06, "loss": 0.5643, "step": 31385 }, { "epoch": 0.9163528072173075, "grad_norm": 0.5538213312991865, "learning_rate": 4.6472019464720195e-06, "loss": 0.6298, "step": 31386 }, { "epoch": 0.9163820034451549, "grad_norm": 0.5146123099996971, "learning_rate": 4.645579886455799e-06, "loss": 0.5311, "step": 31387 }, { "epoch": 0.9164111996730022, "grad_norm": 0.4970586572945237, "learning_rate": 4.643957826439579e-06, "loss": 0.5245, "step": 31388 }, { "epoch": 0.9164403959008496, "grad_norm": 0.5509149627646505, "learning_rate": 4.642335766423358e-06, "loss": 0.6265, "step": 31389 }, { "epoch": 0.9164695921286969, "grad_norm": 0.5256279306838794, "learning_rate": 4.640713706407137e-06, "loss": 0.556, "step": 31390 }, { "epoch": 0.9164987883565443, "grad_norm": 0.5132970174322178, "learning_rate": 4.6390916463909165e-06, "loss": 0.5392, "step": 31391 }, { "epoch": 0.9165279845843917, "grad_norm": 0.5657050077078456, "learning_rate": 4.637469586374696e-06, "loss": 0.6078, "step": 31392 }, { "epoch": 0.916557180812239, "grad_norm": 0.4947039165145941, "learning_rate": 4.635847526358476e-06, "loss": 0.5063, "step": 31393 }, { "epoch": 0.9165863770400864, "grad_norm": 0.537164811967099, "learning_rate": 4.634225466342255e-06, "loss": 0.5869, "step": 31394 }, { "epoch": 0.9166155732679337, "grad_norm": 0.533135170159747, "learning_rate": 4.632603406326034e-06, "loss": 0.587, "step": 31395 }, { "epoch": 0.9166447694957811, "grad_norm": 0.5325147928617494, "learning_rate": 4.630981346309813e-06, "loss": 0.585, "step": 31396 }, { "epoch": 0.9166739657236285, "grad_norm": 0.5298447495277275, "learning_rate": 4.629359286293593e-06, "loss": 0.611, "step": 31397 }, { "epoch": 0.9167031619514758, "grad_norm": 0.5264337430061258, "learning_rate": 4.627737226277373e-06, "loss": 0.5636, "step": 31398 }, { "epoch": 0.9167323581793232, "grad_norm": 0.5408298826867498, "learning_rate": 4.626115166261152e-06, "loss": 0.6078, "step": 31399 }, { "epoch": 0.9167615544071706, "grad_norm": 0.49752735287241917, "learning_rate": 4.624493106244931e-06, "loss": 0.498, "step": 31400 }, { "epoch": 0.9167907506350179, "grad_norm": 0.5275267017568313, "learning_rate": 4.62287104622871e-06, "loss": 0.6113, "step": 31401 }, { "epoch": 0.9168199468628653, "grad_norm": 0.5132237682948518, "learning_rate": 4.6212489862124904e-06, "loss": 0.5831, "step": 31402 }, { "epoch": 0.9168491430907126, "grad_norm": 0.5145887787596981, "learning_rate": 4.61962692619627e-06, "loss": 0.5549, "step": 31403 }, { "epoch": 0.91687833931856, "grad_norm": 0.5635856570341224, "learning_rate": 4.618004866180049e-06, "loss": 0.6616, "step": 31404 }, { "epoch": 0.9169075355464074, "grad_norm": 0.5702893112323251, "learning_rate": 4.616382806163828e-06, "loss": 0.5841, "step": 31405 }, { "epoch": 0.9169367317742547, "grad_norm": 0.5801708192201147, "learning_rate": 4.614760746147607e-06, "loss": 0.6857, "step": 31406 }, { "epoch": 0.9169659280021021, "grad_norm": 0.506963183069285, "learning_rate": 4.613138686131387e-06, "loss": 0.5451, "step": 31407 }, { "epoch": 0.9169951242299494, "grad_norm": 0.5536865772906404, "learning_rate": 4.611516626115167e-06, "loss": 0.6138, "step": 31408 }, { "epoch": 0.9170243204577968, "grad_norm": 0.48968007340649133, "learning_rate": 4.609894566098946e-06, "loss": 0.4681, "step": 31409 }, { "epoch": 0.9170535166856442, "grad_norm": 0.5004618641616577, "learning_rate": 4.608272506082725e-06, "loss": 0.5099, "step": 31410 }, { "epoch": 0.9170827129134915, "grad_norm": 0.5025593732179132, "learning_rate": 4.606650446066504e-06, "loss": 0.5258, "step": 31411 }, { "epoch": 0.9171119091413389, "grad_norm": 0.5283642679815499, "learning_rate": 4.605028386050284e-06, "loss": 0.5901, "step": 31412 }, { "epoch": 0.9171411053691862, "grad_norm": 0.5464811707762607, "learning_rate": 4.6034063260340636e-06, "loss": 0.5794, "step": 31413 }, { "epoch": 0.9171703015970336, "grad_norm": 0.5262408757746397, "learning_rate": 4.601784266017843e-06, "loss": 0.5805, "step": 31414 }, { "epoch": 0.917199497824881, "grad_norm": 0.5299821626885837, "learning_rate": 4.600162206001622e-06, "loss": 0.5708, "step": 31415 }, { "epoch": 0.9172286940527283, "grad_norm": 0.5155152930679342, "learning_rate": 4.598540145985401e-06, "loss": 0.55, "step": 31416 }, { "epoch": 0.9172578902805758, "grad_norm": 0.5567573313929058, "learning_rate": 4.596918085969181e-06, "loss": 0.6047, "step": 31417 }, { "epoch": 0.9172870865084232, "grad_norm": 0.532873902078, "learning_rate": 4.5952960259529605e-06, "loss": 0.5369, "step": 31418 }, { "epoch": 0.9173162827362705, "grad_norm": 0.5554693020623338, "learning_rate": 4.59367396593674e-06, "loss": 0.6637, "step": 31419 }, { "epoch": 0.9173454789641179, "grad_norm": 0.4601617117219078, "learning_rate": 4.592051905920519e-06, "loss": 0.4418, "step": 31420 }, { "epoch": 0.9173746751919652, "grad_norm": 0.5184212953004569, "learning_rate": 4.590429845904299e-06, "loss": 0.5674, "step": 31421 }, { "epoch": 0.9174038714198126, "grad_norm": 0.4996605271230535, "learning_rate": 4.588807785888078e-06, "loss": 0.5422, "step": 31422 }, { "epoch": 0.91743306764766, "grad_norm": 0.5537835494459695, "learning_rate": 4.5871857258718575e-06, "loss": 0.6479, "step": 31423 }, { "epoch": 0.9174622638755073, "grad_norm": 0.5466794074504561, "learning_rate": 4.585563665855637e-06, "loss": 0.6229, "step": 31424 }, { "epoch": 0.9174914601033547, "grad_norm": 0.5199485759480207, "learning_rate": 4.583941605839416e-06, "loss": 0.5679, "step": 31425 }, { "epoch": 0.9175206563312021, "grad_norm": 0.5952564377777704, "learning_rate": 4.582319545823196e-06, "loss": 0.6521, "step": 31426 }, { "epoch": 0.9175498525590494, "grad_norm": 0.5512103276522214, "learning_rate": 4.580697485806975e-06, "loss": 0.5966, "step": 31427 }, { "epoch": 0.9175790487868968, "grad_norm": 0.5303743802952349, "learning_rate": 4.579075425790754e-06, "loss": 0.591, "step": 31428 }, { "epoch": 0.9176082450147441, "grad_norm": 0.5029668790697758, "learning_rate": 4.577453365774534e-06, "loss": 0.5292, "step": 31429 }, { "epoch": 0.9176374412425915, "grad_norm": 0.601877358156137, "learning_rate": 4.575831305758313e-06, "loss": 0.622, "step": 31430 }, { "epoch": 0.9176666374704389, "grad_norm": 0.4867187320626645, "learning_rate": 4.574209245742093e-06, "loss": 0.5252, "step": 31431 }, { "epoch": 0.9176958336982862, "grad_norm": 0.4717166552643621, "learning_rate": 4.572587185725872e-06, "loss": 0.508, "step": 31432 }, { "epoch": 0.9177250299261336, "grad_norm": 0.5432471431981624, "learning_rate": 4.570965125709651e-06, "loss": 0.6137, "step": 31433 }, { "epoch": 0.917754226153981, "grad_norm": 0.5260440689398722, "learning_rate": 4.569343065693431e-06, "loss": 0.5501, "step": 31434 }, { "epoch": 0.9177834223818283, "grad_norm": 0.4919214674352539, "learning_rate": 4.567721005677211e-06, "loss": 0.5029, "step": 31435 }, { "epoch": 0.9178126186096757, "grad_norm": 0.5157195221936917, "learning_rate": 4.56609894566099e-06, "loss": 0.5736, "step": 31436 }, { "epoch": 0.917841814837523, "grad_norm": 0.5085618056503816, "learning_rate": 4.564476885644769e-06, "loss": 0.5543, "step": 31437 }, { "epoch": 0.9178710110653704, "grad_norm": 0.5028966712313475, "learning_rate": 4.562854825628548e-06, "loss": 0.5538, "step": 31438 }, { "epoch": 0.9179002072932178, "grad_norm": 0.5172404256819357, "learning_rate": 4.5612327656123275e-06, "loss": 0.5716, "step": 31439 }, { "epoch": 0.9179294035210651, "grad_norm": 0.4955235084789533, "learning_rate": 4.559610705596108e-06, "loss": 0.5063, "step": 31440 }, { "epoch": 0.9179585997489125, "grad_norm": 0.5389513969096031, "learning_rate": 4.557988645579887e-06, "loss": 0.6362, "step": 31441 }, { "epoch": 0.9179877959767598, "grad_norm": 0.5401821927599177, "learning_rate": 4.556366585563666e-06, "loss": 0.6106, "step": 31442 }, { "epoch": 0.9180169922046072, "grad_norm": 0.5610437546114174, "learning_rate": 4.554744525547445e-06, "loss": 0.6198, "step": 31443 }, { "epoch": 0.9180461884324546, "grad_norm": 0.5597942481467496, "learning_rate": 4.5531224655312245e-06, "loss": 0.58, "step": 31444 }, { "epoch": 0.9180753846603019, "grad_norm": 0.5400539941338758, "learning_rate": 4.5515004055150046e-06, "loss": 0.5854, "step": 31445 }, { "epoch": 0.9181045808881493, "grad_norm": 0.5003271772234354, "learning_rate": 4.549878345498784e-06, "loss": 0.5301, "step": 31446 }, { "epoch": 0.9181337771159966, "grad_norm": 0.5434455804454081, "learning_rate": 4.548256285482563e-06, "loss": 0.5879, "step": 31447 }, { "epoch": 0.918162973343844, "grad_norm": 0.5279080202823482, "learning_rate": 4.546634225466342e-06, "loss": 0.605, "step": 31448 }, { "epoch": 0.9181921695716914, "grad_norm": 0.4933893491395887, "learning_rate": 4.5450121654501214e-06, "loss": 0.5078, "step": 31449 }, { "epoch": 0.9182213657995387, "grad_norm": 0.5521302108522949, "learning_rate": 4.5433901054339015e-06, "loss": 0.5779, "step": 31450 }, { "epoch": 0.9182505620273861, "grad_norm": 0.48225856321580773, "learning_rate": 4.541768045417681e-06, "loss": 0.4795, "step": 31451 }, { "epoch": 0.9182797582552334, "grad_norm": 0.5132257151999161, "learning_rate": 4.54014598540146e-06, "loss": 0.5583, "step": 31452 }, { "epoch": 0.9183089544830808, "grad_norm": 0.5674382350160474, "learning_rate": 4.538523925385239e-06, "loss": 0.6738, "step": 31453 }, { "epoch": 0.9183381507109282, "grad_norm": 0.5385291720917373, "learning_rate": 4.536901865369019e-06, "loss": 0.6251, "step": 31454 }, { "epoch": 0.9183673469387755, "grad_norm": 0.5761311409862948, "learning_rate": 4.5352798053527985e-06, "loss": 0.5642, "step": 31455 }, { "epoch": 0.9183965431666229, "grad_norm": 0.4819639138424616, "learning_rate": 4.533657745336578e-06, "loss": 0.4528, "step": 31456 }, { "epoch": 0.9184257393944703, "grad_norm": 0.5846700730651954, "learning_rate": 4.532035685320357e-06, "loss": 0.6817, "step": 31457 }, { "epoch": 0.9184549356223176, "grad_norm": 0.6170649700564449, "learning_rate": 4.530413625304136e-06, "loss": 0.6959, "step": 31458 }, { "epoch": 0.918484131850165, "grad_norm": 0.5631722335923515, "learning_rate": 4.528791565287916e-06, "loss": 0.5545, "step": 31459 }, { "epoch": 0.9185133280780123, "grad_norm": 0.5634533069829256, "learning_rate": 4.527169505271695e-06, "loss": 0.5914, "step": 31460 }, { "epoch": 0.9185425243058597, "grad_norm": 0.5439733952490374, "learning_rate": 4.525547445255475e-06, "loss": 0.595, "step": 31461 }, { "epoch": 0.9185717205337071, "grad_norm": 0.5171480653632902, "learning_rate": 4.523925385239254e-06, "loss": 0.5589, "step": 31462 }, { "epoch": 0.9186009167615544, "grad_norm": 0.555093870630203, "learning_rate": 4.522303325223033e-06, "loss": 0.6381, "step": 31463 }, { "epoch": 0.9186301129894018, "grad_norm": 0.49884224060707505, "learning_rate": 4.520681265206813e-06, "loss": 0.5045, "step": 31464 }, { "epoch": 0.9186593092172491, "grad_norm": 0.517055410005063, "learning_rate": 4.519059205190592e-06, "loss": 0.5518, "step": 31465 }, { "epoch": 0.9186885054450965, "grad_norm": 0.567153978784685, "learning_rate": 4.517437145174372e-06, "loss": 0.596, "step": 31466 }, { "epoch": 0.9187177016729439, "grad_norm": 0.5234091650165931, "learning_rate": 4.515815085158151e-06, "loss": 0.5683, "step": 31467 }, { "epoch": 0.9187468979007912, "grad_norm": 0.5161627000500382, "learning_rate": 4.514193025141931e-06, "loss": 0.5072, "step": 31468 }, { "epoch": 0.9187760941286386, "grad_norm": 0.5510751812147892, "learning_rate": 4.51257096512571e-06, "loss": 0.6048, "step": 31469 }, { "epoch": 0.918805290356486, "grad_norm": 0.5270831347893293, "learning_rate": 4.510948905109489e-06, "loss": 0.5825, "step": 31470 }, { "epoch": 0.9188344865843333, "grad_norm": 0.5047552594246004, "learning_rate": 4.5093268450932685e-06, "loss": 0.5249, "step": 31471 }, { "epoch": 0.9188636828121807, "grad_norm": 0.5097759146081303, "learning_rate": 4.507704785077048e-06, "loss": 0.5816, "step": 31472 }, { "epoch": 0.918892879040028, "grad_norm": 0.5584916240103166, "learning_rate": 4.506082725060828e-06, "loss": 0.6469, "step": 31473 }, { "epoch": 0.9189220752678754, "grad_norm": 0.5217569329383058, "learning_rate": 4.504460665044607e-06, "loss": 0.5608, "step": 31474 }, { "epoch": 0.9189512714957228, "grad_norm": 0.5121570459657152, "learning_rate": 4.502838605028386e-06, "loss": 0.5603, "step": 31475 }, { "epoch": 0.9189804677235701, "grad_norm": 0.530807068726421, "learning_rate": 4.5012165450121655e-06, "loss": 0.611, "step": 31476 }, { "epoch": 0.9190096639514175, "grad_norm": 0.5760623328577662, "learning_rate": 4.499594484995945e-06, "loss": 0.5961, "step": 31477 }, { "epoch": 0.9190388601792648, "grad_norm": 0.4904242460013539, "learning_rate": 4.497972424979725e-06, "loss": 0.5219, "step": 31478 }, { "epoch": 0.9190680564071122, "grad_norm": 0.5167393366946659, "learning_rate": 4.496350364963504e-06, "loss": 0.522, "step": 31479 }, { "epoch": 0.9190972526349596, "grad_norm": 0.5131698490198552, "learning_rate": 4.494728304947283e-06, "loss": 0.548, "step": 31480 }, { "epoch": 0.9191264488628069, "grad_norm": 0.537179739310336, "learning_rate": 4.4931062449310624e-06, "loss": 0.5737, "step": 31481 }, { "epoch": 0.9191556450906543, "grad_norm": 0.4856995713722517, "learning_rate": 4.491484184914842e-06, "loss": 0.4986, "step": 31482 }, { "epoch": 0.9191848413185016, "grad_norm": 0.5706646862581489, "learning_rate": 4.489862124898622e-06, "loss": 0.6505, "step": 31483 }, { "epoch": 0.919214037546349, "grad_norm": 0.5285884680702803, "learning_rate": 4.488240064882401e-06, "loss": 0.5637, "step": 31484 }, { "epoch": 0.9192432337741964, "grad_norm": 0.5480980487501482, "learning_rate": 4.48661800486618e-06, "loss": 0.5997, "step": 31485 }, { "epoch": 0.9192724300020437, "grad_norm": 0.5543715751960656, "learning_rate": 4.484995944849959e-06, "loss": 0.6341, "step": 31486 }, { "epoch": 0.9193016262298911, "grad_norm": 0.5014588687864373, "learning_rate": 4.4833738848337395e-06, "loss": 0.4959, "step": 31487 }, { "epoch": 0.9193308224577385, "grad_norm": 0.5312861120815058, "learning_rate": 4.481751824817519e-06, "loss": 0.5716, "step": 31488 }, { "epoch": 0.9193600186855858, "grad_norm": 0.5578716812461209, "learning_rate": 4.480129764801298e-06, "loss": 0.6237, "step": 31489 }, { "epoch": 0.9193892149134332, "grad_norm": 0.5601461915626651, "learning_rate": 4.478507704785077e-06, "loss": 0.637, "step": 31490 }, { "epoch": 0.9194184111412805, "grad_norm": 0.5253805746497975, "learning_rate": 4.476885644768856e-06, "loss": 0.5744, "step": 31491 }, { "epoch": 0.9194476073691279, "grad_norm": 0.49295124866290435, "learning_rate": 4.475263584752636e-06, "loss": 0.5016, "step": 31492 }, { "epoch": 0.9194768035969753, "grad_norm": 0.5288601799399755, "learning_rate": 4.473641524736416e-06, "loss": 0.5266, "step": 31493 }, { "epoch": 0.9195059998248226, "grad_norm": 0.49572093237131926, "learning_rate": 4.472019464720195e-06, "loss": 0.5274, "step": 31494 }, { "epoch": 0.91953519605267, "grad_norm": 0.5346034944546786, "learning_rate": 4.470397404703974e-06, "loss": 0.5591, "step": 31495 }, { "epoch": 0.9195643922805173, "grad_norm": 0.5401007738584681, "learning_rate": 4.468775344687753e-06, "loss": 0.6384, "step": 31496 }, { "epoch": 0.9195935885083647, "grad_norm": 0.5800662774786286, "learning_rate": 4.467153284671533e-06, "loss": 0.6767, "step": 31497 }, { "epoch": 0.9196227847362121, "grad_norm": 0.5271644603890628, "learning_rate": 4.465531224655313e-06, "loss": 0.5772, "step": 31498 }, { "epoch": 0.9196519809640594, "grad_norm": 0.5181126687748994, "learning_rate": 4.463909164639092e-06, "loss": 0.5602, "step": 31499 }, { "epoch": 0.9196811771919068, "grad_norm": 0.551177255863959, "learning_rate": 4.462287104622871e-06, "loss": 0.6054, "step": 31500 }, { "epoch": 0.9197103734197541, "grad_norm": 0.5198223402373288, "learning_rate": 4.460665044606651e-06, "loss": 0.5494, "step": 31501 }, { "epoch": 0.9197395696476015, "grad_norm": 0.565946249017307, "learning_rate": 4.45904298459043e-06, "loss": 0.6671, "step": 31502 }, { "epoch": 0.9197687658754489, "grad_norm": 0.5396093476864372, "learning_rate": 4.4574209245742095e-06, "loss": 0.5903, "step": 31503 }, { "epoch": 0.9197979621032962, "grad_norm": 0.5713399201782065, "learning_rate": 4.455798864557989e-06, "loss": 0.6608, "step": 31504 }, { "epoch": 0.9198271583311436, "grad_norm": 0.5154079592435497, "learning_rate": 4.454176804541768e-06, "loss": 0.5314, "step": 31505 }, { "epoch": 0.919856354558991, "grad_norm": 0.5613577731585299, "learning_rate": 4.452554744525548e-06, "loss": 0.6223, "step": 31506 }, { "epoch": 0.9198855507868383, "grad_norm": 0.535040983493608, "learning_rate": 4.450932684509327e-06, "loss": 0.5813, "step": 31507 }, { "epoch": 0.9199147470146857, "grad_norm": 0.5098592795434925, "learning_rate": 4.4493106244931065e-06, "loss": 0.5337, "step": 31508 }, { "epoch": 0.919943943242533, "grad_norm": 0.5620323379322205, "learning_rate": 4.447688564476886e-06, "loss": 0.6485, "step": 31509 }, { "epoch": 0.9199731394703804, "grad_norm": 0.6031965336384101, "learning_rate": 4.446066504460665e-06, "loss": 0.538, "step": 31510 }, { "epoch": 0.9200023356982278, "grad_norm": 0.513473353295254, "learning_rate": 4.444444444444445e-06, "loss": 0.5479, "step": 31511 }, { "epoch": 0.9200315319260751, "grad_norm": 0.5096127629608016, "learning_rate": 4.442822384428224e-06, "loss": 0.5418, "step": 31512 }, { "epoch": 0.9200607281539225, "grad_norm": 0.5060631077589715, "learning_rate": 4.4412003244120034e-06, "loss": 0.517, "step": 31513 }, { "epoch": 0.9200899243817698, "grad_norm": 0.5698702950562194, "learning_rate": 4.439578264395783e-06, "loss": 0.6134, "step": 31514 }, { "epoch": 0.9201191206096172, "grad_norm": 0.5166423301252425, "learning_rate": 4.437956204379562e-06, "loss": 0.5821, "step": 31515 }, { "epoch": 0.9201483168374646, "grad_norm": 0.5232350783312237, "learning_rate": 4.436334144363342e-06, "loss": 0.5788, "step": 31516 }, { "epoch": 0.9201775130653119, "grad_norm": 0.5263451903498197, "learning_rate": 4.434712084347121e-06, "loss": 0.56, "step": 31517 }, { "epoch": 0.9202067092931593, "grad_norm": 0.5331505022149494, "learning_rate": 4.4330900243309e-06, "loss": 0.5796, "step": 31518 }, { "epoch": 0.9202359055210066, "grad_norm": 0.4950538778297439, "learning_rate": 4.43146796431468e-06, "loss": 0.4843, "step": 31519 }, { "epoch": 0.920265101748854, "grad_norm": 0.5291810506157916, "learning_rate": 4.42984590429846e-06, "loss": 0.5648, "step": 31520 }, { "epoch": 0.9202942979767014, "grad_norm": 0.555273337580525, "learning_rate": 4.428223844282239e-06, "loss": 0.6117, "step": 31521 }, { "epoch": 0.9203234942045487, "grad_norm": 0.5679748264352518, "learning_rate": 4.426601784266018e-06, "loss": 0.6365, "step": 31522 }, { "epoch": 0.9203526904323961, "grad_norm": 0.5095443144110585, "learning_rate": 4.424979724249797e-06, "loss": 0.5139, "step": 31523 }, { "epoch": 0.9203818866602435, "grad_norm": 0.46691926892529756, "learning_rate": 4.4233576642335766e-06, "loss": 0.5006, "step": 31524 }, { "epoch": 0.9204110828880908, "grad_norm": 0.4956921239681744, "learning_rate": 4.421735604217357e-06, "loss": 0.4804, "step": 31525 }, { "epoch": 0.9204402791159382, "grad_norm": 0.5492908507444777, "learning_rate": 4.420113544201136e-06, "loss": 0.5669, "step": 31526 }, { "epoch": 0.9204694753437855, "grad_norm": 0.5165153596169771, "learning_rate": 4.418491484184915e-06, "loss": 0.5673, "step": 31527 }, { "epoch": 0.9204986715716329, "grad_norm": 0.5198883526261959, "learning_rate": 4.416869424168694e-06, "loss": 0.5878, "step": 31528 }, { "epoch": 0.9205278677994803, "grad_norm": 0.4921477395450885, "learning_rate": 4.4152473641524735e-06, "loss": 0.508, "step": 31529 }, { "epoch": 0.9205570640273276, "grad_norm": 0.5349341416187265, "learning_rate": 4.413625304136254e-06, "loss": 0.5993, "step": 31530 }, { "epoch": 0.920586260255175, "grad_norm": 0.5551955805364678, "learning_rate": 4.412003244120033e-06, "loss": 0.594, "step": 31531 }, { "epoch": 0.9206154564830223, "grad_norm": 0.5198782050452562, "learning_rate": 4.410381184103812e-06, "loss": 0.5602, "step": 31532 }, { "epoch": 0.9206446527108697, "grad_norm": 0.549770118021792, "learning_rate": 4.408759124087591e-06, "loss": 0.6012, "step": 31533 }, { "epoch": 0.9206738489387171, "grad_norm": 0.545144526427075, "learning_rate": 4.407137064071371e-06, "loss": 0.6528, "step": 31534 }, { "epoch": 0.9207030451665644, "grad_norm": 0.557150973805102, "learning_rate": 4.4055150040551505e-06, "loss": 0.5835, "step": 31535 }, { "epoch": 0.9207322413944118, "grad_norm": 0.5507505934588567, "learning_rate": 4.40389294403893e-06, "loss": 0.6067, "step": 31536 }, { "epoch": 0.9207614376222591, "grad_norm": 0.4921910252399184, "learning_rate": 4.402270884022709e-06, "loss": 0.5196, "step": 31537 }, { "epoch": 0.9207906338501066, "grad_norm": 0.4880454248019859, "learning_rate": 4.400648824006488e-06, "loss": 0.5296, "step": 31538 }, { "epoch": 0.920819830077954, "grad_norm": 0.5321474091769693, "learning_rate": 4.399026763990268e-06, "loss": 0.5552, "step": 31539 }, { "epoch": 0.9208490263058013, "grad_norm": 0.4987894730348901, "learning_rate": 4.3974047039740475e-06, "loss": 0.5392, "step": 31540 }, { "epoch": 0.9208782225336487, "grad_norm": 0.60137102336995, "learning_rate": 4.395782643957827e-06, "loss": 0.5847, "step": 31541 }, { "epoch": 0.9209074187614961, "grad_norm": 0.5290433535526928, "learning_rate": 4.394160583941606e-06, "loss": 0.6024, "step": 31542 }, { "epoch": 0.9209366149893434, "grad_norm": 0.5324971461688529, "learning_rate": 4.392538523925385e-06, "loss": 0.5733, "step": 31543 }, { "epoch": 0.9209658112171908, "grad_norm": 0.5114957371341937, "learning_rate": 4.390916463909165e-06, "loss": 0.5335, "step": 31544 }, { "epoch": 0.9209950074450381, "grad_norm": 0.5250553376319663, "learning_rate": 4.3892944038929444e-06, "loss": 0.5948, "step": 31545 }, { "epoch": 0.9210242036728855, "grad_norm": 0.5680546708034758, "learning_rate": 4.387672343876724e-06, "loss": 0.6426, "step": 31546 }, { "epoch": 0.9210533999007329, "grad_norm": 0.5749057434312896, "learning_rate": 4.386050283860503e-06, "loss": 0.6084, "step": 31547 }, { "epoch": 0.9210825961285802, "grad_norm": 0.5869483271370849, "learning_rate": 4.384428223844282e-06, "loss": 0.6847, "step": 31548 }, { "epoch": 0.9211117923564276, "grad_norm": 0.5691609826500713, "learning_rate": 4.382806163828062e-06, "loss": 0.6736, "step": 31549 }, { "epoch": 0.921140988584275, "grad_norm": 0.5399084366659849, "learning_rate": 4.381184103811841e-06, "loss": 0.6089, "step": 31550 }, { "epoch": 0.9211701848121223, "grad_norm": 0.533971402895024, "learning_rate": 4.379562043795621e-06, "loss": 0.5639, "step": 31551 }, { "epoch": 0.9211993810399697, "grad_norm": 0.5008131646876256, "learning_rate": 4.3779399837794e-06, "loss": 0.5466, "step": 31552 }, { "epoch": 0.921228577267817, "grad_norm": 0.5220267995284557, "learning_rate": 4.37631792376318e-06, "loss": 0.5886, "step": 31553 }, { "epoch": 0.9212577734956644, "grad_norm": 0.5414872286829362, "learning_rate": 4.374695863746959e-06, "loss": 0.5268, "step": 31554 }, { "epoch": 0.9212869697235118, "grad_norm": 0.530686971686488, "learning_rate": 4.373073803730738e-06, "loss": 0.5492, "step": 31555 }, { "epoch": 0.9213161659513591, "grad_norm": 0.5723295119822783, "learning_rate": 4.3714517437145176e-06, "loss": 0.6272, "step": 31556 }, { "epoch": 0.9213453621792065, "grad_norm": 0.5091233672790101, "learning_rate": 4.369829683698297e-06, "loss": 0.5589, "step": 31557 }, { "epoch": 0.9213745584070538, "grad_norm": 0.5631259395857819, "learning_rate": 4.368207623682077e-06, "loss": 0.6675, "step": 31558 }, { "epoch": 0.9214037546349012, "grad_norm": 0.5572238037625689, "learning_rate": 4.366585563665856e-06, "loss": 0.6223, "step": 31559 }, { "epoch": 0.9214329508627486, "grad_norm": 0.5180776771361238, "learning_rate": 4.364963503649635e-06, "loss": 0.5687, "step": 31560 }, { "epoch": 0.9214621470905959, "grad_norm": 0.5443500874558483, "learning_rate": 4.3633414436334145e-06, "loss": 0.6083, "step": 31561 }, { "epoch": 0.9214913433184433, "grad_norm": 0.5436141885394191, "learning_rate": 4.361719383617194e-06, "loss": 0.6348, "step": 31562 }, { "epoch": 0.9215205395462907, "grad_norm": 0.5057550467361227, "learning_rate": 4.360097323600974e-06, "loss": 0.5435, "step": 31563 }, { "epoch": 0.921549735774138, "grad_norm": 0.49094269091640824, "learning_rate": 4.358475263584753e-06, "loss": 0.4882, "step": 31564 }, { "epoch": 0.9215789320019854, "grad_norm": 0.5058545790701576, "learning_rate": 4.356853203568532e-06, "loss": 0.5209, "step": 31565 }, { "epoch": 0.9216081282298327, "grad_norm": 0.5193519730023552, "learning_rate": 4.3552311435523115e-06, "loss": 0.5666, "step": 31566 }, { "epoch": 0.9216373244576801, "grad_norm": 0.520896204884855, "learning_rate": 4.353609083536091e-06, "loss": 0.5575, "step": 31567 }, { "epoch": 0.9216665206855275, "grad_norm": 0.5370809730237359, "learning_rate": 4.351987023519871e-06, "loss": 0.6146, "step": 31568 }, { "epoch": 0.9216957169133748, "grad_norm": 0.5302689191896983, "learning_rate": 4.35036496350365e-06, "loss": 0.5761, "step": 31569 }, { "epoch": 0.9217249131412222, "grad_norm": 0.5424943266000762, "learning_rate": 4.348742903487429e-06, "loss": 0.6066, "step": 31570 }, { "epoch": 0.9217541093690695, "grad_norm": 0.5780914614734536, "learning_rate": 4.347120843471208e-06, "loss": 0.6067, "step": 31571 }, { "epoch": 0.9217833055969169, "grad_norm": 0.4912295134731601, "learning_rate": 4.3454987834549885e-06, "loss": 0.4892, "step": 31572 }, { "epoch": 0.9218125018247643, "grad_norm": 0.5350586619197003, "learning_rate": 4.343876723438768e-06, "loss": 0.5811, "step": 31573 }, { "epoch": 0.9218416980526116, "grad_norm": 0.49765734166447384, "learning_rate": 4.342254663422547e-06, "loss": 0.5038, "step": 31574 }, { "epoch": 0.921870894280459, "grad_norm": 0.4934400891855032, "learning_rate": 4.340632603406326e-06, "loss": 0.5243, "step": 31575 }, { "epoch": 0.9219000905083063, "grad_norm": 0.5190068911232097, "learning_rate": 4.339010543390105e-06, "loss": 0.5333, "step": 31576 }, { "epoch": 0.9219292867361537, "grad_norm": 0.571103244177564, "learning_rate": 4.3373884833738854e-06, "loss": 0.6285, "step": 31577 }, { "epoch": 0.9219584829640011, "grad_norm": 0.49236786265658933, "learning_rate": 4.335766423357665e-06, "loss": 0.5086, "step": 31578 }, { "epoch": 0.9219876791918484, "grad_norm": 0.5618327601932434, "learning_rate": 4.334144363341444e-06, "loss": 0.6126, "step": 31579 }, { "epoch": 0.9220168754196958, "grad_norm": 0.5589352552535394, "learning_rate": 4.332522303325223e-06, "loss": 0.6408, "step": 31580 }, { "epoch": 0.9220460716475432, "grad_norm": 0.5420307394654816, "learning_rate": 4.330900243309002e-06, "loss": 0.5982, "step": 31581 }, { "epoch": 0.9220752678753905, "grad_norm": 0.5295711405547872, "learning_rate": 4.329278183292782e-06, "loss": 0.597, "step": 31582 }, { "epoch": 0.9221044641032379, "grad_norm": 0.5581004412684857, "learning_rate": 4.327656123276562e-06, "loss": 0.5576, "step": 31583 }, { "epoch": 0.9221336603310852, "grad_norm": 0.5437143154420264, "learning_rate": 4.326034063260341e-06, "loss": 0.5952, "step": 31584 }, { "epoch": 0.9221628565589326, "grad_norm": 0.517941698915388, "learning_rate": 4.32441200324412e-06, "loss": 0.5793, "step": 31585 }, { "epoch": 0.92219205278678, "grad_norm": 0.5281113212296392, "learning_rate": 4.3227899432279e-06, "loss": 0.5526, "step": 31586 }, { "epoch": 0.9222212490146273, "grad_norm": 0.5186614481856223, "learning_rate": 4.321167883211679e-06, "loss": 0.558, "step": 31587 }, { "epoch": 0.9222504452424747, "grad_norm": 0.5450551558680978, "learning_rate": 4.3195458231954586e-06, "loss": 0.6162, "step": 31588 }, { "epoch": 0.922279641470322, "grad_norm": 0.5031221825761053, "learning_rate": 4.317923763179238e-06, "loss": 0.5275, "step": 31589 }, { "epoch": 0.9223088376981694, "grad_norm": 0.49762744469444614, "learning_rate": 4.316301703163017e-06, "loss": 0.536, "step": 31590 }, { "epoch": 0.9223380339260168, "grad_norm": 0.5469126207167733, "learning_rate": 4.314679643146797e-06, "loss": 0.571, "step": 31591 }, { "epoch": 0.9223672301538641, "grad_norm": 0.5268913609149201, "learning_rate": 4.313057583130576e-06, "loss": 0.5781, "step": 31592 }, { "epoch": 0.9223964263817115, "grad_norm": 0.5280692643291862, "learning_rate": 4.3114355231143555e-06, "loss": 0.5131, "step": 31593 }, { "epoch": 0.9224256226095588, "grad_norm": 0.49249526141384137, "learning_rate": 4.309813463098135e-06, "loss": 0.5222, "step": 31594 }, { "epoch": 0.9224548188374062, "grad_norm": 0.5484398372829915, "learning_rate": 4.308191403081914e-06, "loss": 0.6005, "step": 31595 }, { "epoch": 0.9224840150652536, "grad_norm": 0.5137608194183865, "learning_rate": 4.306569343065694e-06, "loss": 0.5312, "step": 31596 }, { "epoch": 0.9225132112931009, "grad_norm": 0.515769024350481, "learning_rate": 4.304947283049473e-06, "loss": 0.5152, "step": 31597 }, { "epoch": 0.9225424075209483, "grad_norm": 0.5587123222693664, "learning_rate": 4.3033252230332525e-06, "loss": 0.6308, "step": 31598 }, { "epoch": 0.9225716037487957, "grad_norm": 0.5228515884314627, "learning_rate": 4.301703163017032e-06, "loss": 0.5384, "step": 31599 }, { "epoch": 0.922600799976643, "grad_norm": 0.541195606693936, "learning_rate": 4.300081103000811e-06, "loss": 0.5843, "step": 31600 }, { "epoch": 0.9226299962044904, "grad_norm": 0.5153797645667267, "learning_rate": 4.298459042984591e-06, "loss": 0.5482, "step": 31601 }, { "epoch": 0.9226591924323377, "grad_norm": 0.5494054213213653, "learning_rate": 4.29683698296837e-06, "loss": 0.6056, "step": 31602 }, { "epoch": 0.9226883886601851, "grad_norm": 0.5360227339198453, "learning_rate": 4.295214922952149e-06, "loss": 0.5741, "step": 31603 }, { "epoch": 0.9227175848880325, "grad_norm": 0.5329392146860268, "learning_rate": 4.293592862935929e-06, "loss": 0.6141, "step": 31604 }, { "epoch": 0.9227467811158798, "grad_norm": 0.532615494363049, "learning_rate": 4.291970802919709e-06, "loss": 0.5459, "step": 31605 }, { "epoch": 0.9227759773437272, "grad_norm": 0.5295546573605017, "learning_rate": 4.290348742903488e-06, "loss": 0.5204, "step": 31606 }, { "epoch": 0.9228051735715745, "grad_norm": 0.569813767704949, "learning_rate": 4.288726682887267e-06, "loss": 0.6673, "step": 31607 }, { "epoch": 0.9228343697994219, "grad_norm": 0.5044792564749505, "learning_rate": 4.287104622871046e-06, "loss": 0.4861, "step": 31608 }, { "epoch": 0.9228635660272693, "grad_norm": 0.5143132280822913, "learning_rate": 4.285482562854826e-06, "loss": 0.5311, "step": 31609 }, { "epoch": 0.9228927622551166, "grad_norm": 0.5029600065265954, "learning_rate": 4.283860502838606e-06, "loss": 0.515, "step": 31610 }, { "epoch": 0.922921958482964, "grad_norm": 0.541370609938592, "learning_rate": 4.282238442822385e-06, "loss": 0.5728, "step": 31611 }, { "epoch": 0.9229511547108114, "grad_norm": 0.533820279956406, "learning_rate": 4.280616382806164e-06, "loss": 0.6051, "step": 31612 }, { "epoch": 0.9229803509386587, "grad_norm": 0.5300298057048995, "learning_rate": 4.278994322789943e-06, "loss": 0.5799, "step": 31613 }, { "epoch": 0.9230095471665061, "grad_norm": 0.5208746044911824, "learning_rate": 4.2773722627737225e-06, "loss": 0.534, "step": 31614 }, { "epoch": 0.9230387433943534, "grad_norm": 0.5119582007384048, "learning_rate": 4.275750202757503e-06, "loss": 0.5512, "step": 31615 }, { "epoch": 0.9230679396222008, "grad_norm": 0.5154571115858466, "learning_rate": 4.274128142741282e-06, "loss": 0.5736, "step": 31616 }, { "epoch": 0.9230971358500482, "grad_norm": 0.6110643154763786, "learning_rate": 4.272506082725061e-06, "loss": 0.5441, "step": 31617 }, { "epoch": 0.9231263320778955, "grad_norm": 0.5606210358258253, "learning_rate": 4.27088402270884e-06, "loss": 0.6275, "step": 31618 }, { "epoch": 0.9231555283057429, "grad_norm": 0.5702248931690681, "learning_rate": 4.26926196269262e-06, "loss": 0.6682, "step": 31619 }, { "epoch": 0.9231847245335902, "grad_norm": 0.5248132485249132, "learning_rate": 4.2676399026763996e-06, "loss": 0.5752, "step": 31620 }, { "epoch": 0.9232139207614376, "grad_norm": 0.5289023000631159, "learning_rate": 4.266017842660179e-06, "loss": 0.5611, "step": 31621 }, { "epoch": 0.923243116989285, "grad_norm": 0.5493662753676398, "learning_rate": 4.264395782643958e-06, "loss": 0.6165, "step": 31622 }, { "epoch": 0.9232723132171323, "grad_norm": 0.5584459369180012, "learning_rate": 4.262773722627737e-06, "loss": 0.6223, "step": 31623 }, { "epoch": 0.9233015094449797, "grad_norm": 0.49411098478157206, "learning_rate": 4.261151662611517e-06, "loss": 0.5083, "step": 31624 }, { "epoch": 0.923330705672827, "grad_norm": 0.5138417044242218, "learning_rate": 4.2595296025952965e-06, "loss": 0.555, "step": 31625 }, { "epoch": 0.9233599019006744, "grad_norm": 0.5384211405349354, "learning_rate": 4.257907542579075e-06, "loss": 0.6263, "step": 31626 }, { "epoch": 0.9233890981285218, "grad_norm": 0.5202271926684504, "learning_rate": 4.256285482562855e-06, "loss": 0.5693, "step": 31627 }, { "epoch": 0.9234182943563691, "grad_norm": 0.5143139035194548, "learning_rate": 4.254663422546634e-06, "loss": 0.5778, "step": 31628 }, { "epoch": 0.9234474905842165, "grad_norm": 0.5250044888254742, "learning_rate": 4.253041362530414e-06, "loss": 0.561, "step": 31629 }, { "epoch": 0.9234766868120639, "grad_norm": 0.5380326523138352, "learning_rate": 4.2514193025141935e-06, "loss": 0.6212, "step": 31630 }, { "epoch": 0.9235058830399112, "grad_norm": 0.5332873582718479, "learning_rate": 4.249797242497973e-06, "loss": 0.6039, "step": 31631 }, { "epoch": 0.9235350792677586, "grad_norm": 0.5259861303560062, "learning_rate": 4.248175182481752e-06, "loss": 0.5549, "step": 31632 }, { "epoch": 0.9235642754956059, "grad_norm": 0.5416689261670976, "learning_rate": 4.246553122465531e-06, "loss": 0.5873, "step": 31633 }, { "epoch": 0.9235934717234533, "grad_norm": 0.527118886274611, "learning_rate": 4.244931062449311e-06, "loss": 0.5577, "step": 31634 }, { "epoch": 0.9236226679513007, "grad_norm": 0.530974506358107, "learning_rate": 4.24330900243309e-06, "loss": 0.6047, "step": 31635 }, { "epoch": 0.923651864179148, "grad_norm": 0.5114848470248781, "learning_rate": 4.24168694241687e-06, "loss": 0.5472, "step": 31636 }, { "epoch": 0.9236810604069954, "grad_norm": 0.5737406448967717, "learning_rate": 4.240064882400649e-06, "loss": 0.6678, "step": 31637 }, { "epoch": 0.9237102566348427, "grad_norm": 0.530519230348059, "learning_rate": 4.238442822384429e-06, "loss": 0.5925, "step": 31638 }, { "epoch": 0.9237394528626901, "grad_norm": 0.5384660303140044, "learning_rate": 4.236820762368208e-06, "loss": 0.562, "step": 31639 }, { "epoch": 0.9237686490905375, "grad_norm": 0.5302209305092982, "learning_rate": 4.2351987023519865e-06, "loss": 0.5872, "step": 31640 }, { "epoch": 0.9237978453183848, "grad_norm": 0.5151871088770154, "learning_rate": 4.233576642335767e-06, "loss": 0.5236, "step": 31641 }, { "epoch": 0.9238270415462322, "grad_norm": 0.5278639050251861, "learning_rate": 4.231954582319546e-06, "loss": 0.5551, "step": 31642 }, { "epoch": 0.9238562377740795, "grad_norm": 0.5358797520398131, "learning_rate": 4.230332522303326e-06, "loss": 0.5943, "step": 31643 }, { "epoch": 0.9238854340019269, "grad_norm": 0.5139563746238641, "learning_rate": 4.228710462287105e-06, "loss": 0.5647, "step": 31644 }, { "epoch": 0.9239146302297743, "grad_norm": 0.560530857396863, "learning_rate": 4.227088402270884e-06, "loss": 0.6023, "step": 31645 }, { "epoch": 0.9239438264576216, "grad_norm": 0.490529485963123, "learning_rate": 4.2254663422546635e-06, "loss": 0.4994, "step": 31646 }, { "epoch": 0.923973022685469, "grad_norm": 0.5510653001104472, "learning_rate": 4.223844282238443e-06, "loss": 0.621, "step": 31647 }, { "epoch": 0.9240022189133164, "grad_norm": 0.5246616033561324, "learning_rate": 4.222222222222223e-06, "loss": 0.5541, "step": 31648 }, { "epoch": 0.9240314151411637, "grad_norm": 0.4937777564515202, "learning_rate": 4.220600162206002e-06, "loss": 0.4996, "step": 31649 }, { "epoch": 0.9240606113690111, "grad_norm": 0.5249596483995618, "learning_rate": 4.218978102189781e-06, "loss": 0.5384, "step": 31650 }, { "epoch": 0.9240898075968584, "grad_norm": 0.5944517897340064, "learning_rate": 4.2173560421735605e-06, "loss": 0.651, "step": 31651 }, { "epoch": 0.9241190038247058, "grad_norm": 0.5439504600361688, "learning_rate": 4.2157339821573406e-06, "loss": 0.5422, "step": 31652 }, { "epoch": 0.9241482000525532, "grad_norm": 0.52206837312459, "learning_rate": 4.21411192214112e-06, "loss": 0.557, "step": 31653 }, { "epoch": 0.9241773962804005, "grad_norm": 0.48824871885466115, "learning_rate": 4.212489862124898e-06, "loss": 0.5172, "step": 31654 }, { "epoch": 0.9242065925082479, "grad_norm": 0.5394658749089071, "learning_rate": 4.210867802108678e-06, "loss": 0.6249, "step": 31655 }, { "epoch": 0.9242357887360952, "grad_norm": 0.5077373055474473, "learning_rate": 4.2092457420924574e-06, "loss": 0.5183, "step": 31656 }, { "epoch": 0.9242649849639426, "grad_norm": 0.4909063700925808, "learning_rate": 4.2076236820762375e-06, "loss": 0.5323, "step": 31657 }, { "epoch": 0.92429418119179, "grad_norm": 0.5668042568955266, "learning_rate": 4.206001622060017e-06, "loss": 0.6363, "step": 31658 }, { "epoch": 0.9243233774196374, "grad_norm": 0.514799940924515, "learning_rate": 4.204379562043795e-06, "loss": 0.5904, "step": 31659 }, { "epoch": 0.9243525736474848, "grad_norm": 0.556788268047894, "learning_rate": 4.202757502027575e-06, "loss": 0.5998, "step": 31660 }, { "epoch": 0.9243817698753322, "grad_norm": 0.48679531141847554, "learning_rate": 4.201135442011354e-06, "loss": 0.5212, "step": 31661 }, { "epoch": 0.9244109661031795, "grad_norm": 0.6025687446740651, "learning_rate": 4.1995133819951345e-06, "loss": 0.7017, "step": 31662 }, { "epoch": 0.9244401623310269, "grad_norm": 0.5467558932175975, "learning_rate": 4.197891321978914e-06, "loss": 0.5791, "step": 31663 }, { "epoch": 0.9244693585588742, "grad_norm": 0.5298302799597795, "learning_rate": 4.196269261962693e-06, "loss": 0.5838, "step": 31664 }, { "epoch": 0.9244985547867216, "grad_norm": 0.5297367122590647, "learning_rate": 4.194647201946472e-06, "loss": 0.5421, "step": 31665 }, { "epoch": 0.924527751014569, "grad_norm": 0.5343973139398518, "learning_rate": 4.193025141930251e-06, "loss": 0.5647, "step": 31666 }, { "epoch": 0.9245569472424163, "grad_norm": 0.48493126865908104, "learning_rate": 4.191403081914031e-06, "loss": 0.5, "step": 31667 }, { "epoch": 0.9245861434702637, "grad_norm": 0.5544345194202457, "learning_rate": 4.189781021897811e-06, "loss": 0.577, "step": 31668 }, { "epoch": 0.924615339698111, "grad_norm": 0.5589102139536157, "learning_rate": 4.18815896188159e-06, "loss": 0.5463, "step": 31669 }, { "epoch": 0.9246445359259584, "grad_norm": 0.5544134478128567, "learning_rate": 4.186536901865369e-06, "loss": 0.6439, "step": 31670 }, { "epoch": 0.9246737321538058, "grad_norm": 0.5252839496642434, "learning_rate": 4.184914841849149e-06, "loss": 0.6097, "step": 31671 }, { "epoch": 0.9247029283816531, "grad_norm": 0.5284038602329558, "learning_rate": 4.183292781832928e-06, "loss": 0.5686, "step": 31672 }, { "epoch": 0.9247321246095005, "grad_norm": 0.5099910373421942, "learning_rate": 4.181670721816707e-06, "loss": 0.5395, "step": 31673 }, { "epoch": 0.9247613208373479, "grad_norm": 0.5280528458333184, "learning_rate": 4.180048661800487e-06, "loss": 0.5694, "step": 31674 }, { "epoch": 0.9247905170651952, "grad_norm": 0.4847786249699805, "learning_rate": 4.178426601784266e-06, "loss": 0.4865, "step": 31675 }, { "epoch": 0.9248197132930426, "grad_norm": 0.5461583139774984, "learning_rate": 4.176804541768046e-06, "loss": 0.5851, "step": 31676 }, { "epoch": 0.9248489095208899, "grad_norm": 0.5498299478813253, "learning_rate": 4.175182481751825e-06, "loss": 0.6328, "step": 31677 }, { "epoch": 0.9248781057487373, "grad_norm": 0.5262321558341001, "learning_rate": 4.1735604217356045e-06, "loss": 0.5754, "step": 31678 }, { "epoch": 0.9249073019765847, "grad_norm": 0.5537579412101841, "learning_rate": 4.171938361719384e-06, "loss": 0.6194, "step": 31679 }, { "epoch": 0.924936498204432, "grad_norm": 0.557676114791061, "learning_rate": 4.170316301703163e-06, "loss": 0.6001, "step": 31680 }, { "epoch": 0.9249656944322794, "grad_norm": 0.5388123452400501, "learning_rate": 4.168694241686943e-06, "loss": 0.6007, "step": 31681 }, { "epoch": 0.9249948906601267, "grad_norm": 0.550797345905352, "learning_rate": 4.167072181670722e-06, "loss": 0.5938, "step": 31682 }, { "epoch": 0.9250240868879741, "grad_norm": 0.5101637920569293, "learning_rate": 4.1654501216545015e-06, "loss": 0.565, "step": 31683 }, { "epoch": 0.9250532831158215, "grad_norm": 0.49943151318547535, "learning_rate": 4.163828061638281e-06, "loss": 0.5143, "step": 31684 }, { "epoch": 0.9250824793436688, "grad_norm": 0.5058902789786079, "learning_rate": 4.16220600162206e-06, "loss": 0.5258, "step": 31685 }, { "epoch": 0.9251116755715162, "grad_norm": 0.5472614818896671, "learning_rate": 4.16058394160584e-06, "loss": 0.6104, "step": 31686 }, { "epoch": 0.9251408717993636, "grad_norm": 0.5576947758942612, "learning_rate": 4.158961881589618e-06, "loss": 0.6097, "step": 31687 }, { "epoch": 0.9251700680272109, "grad_norm": 0.5204608027074566, "learning_rate": 4.1573398215733984e-06, "loss": 0.5416, "step": 31688 }, { "epoch": 0.9251992642550583, "grad_norm": 0.5202740327125583, "learning_rate": 4.155717761557178e-06, "loss": 0.5589, "step": 31689 }, { "epoch": 0.9252284604829056, "grad_norm": 0.5167913640107891, "learning_rate": 4.154095701540958e-06, "loss": 0.577, "step": 31690 }, { "epoch": 0.925257656710753, "grad_norm": 0.542580025967738, "learning_rate": 4.152473641524737e-06, "loss": 0.5714, "step": 31691 }, { "epoch": 0.9252868529386004, "grad_norm": 0.5203086559966557, "learning_rate": 4.150851581508515e-06, "loss": 0.5864, "step": 31692 }, { "epoch": 0.9253160491664477, "grad_norm": 0.5206992749102279, "learning_rate": 4.149229521492295e-06, "loss": 0.5514, "step": 31693 }, { "epoch": 0.9253452453942951, "grad_norm": 0.4996893775361606, "learning_rate": 4.147607461476075e-06, "loss": 0.5175, "step": 31694 }, { "epoch": 0.9253744416221424, "grad_norm": 0.5000710886871403, "learning_rate": 4.145985401459855e-06, "loss": 0.5195, "step": 31695 }, { "epoch": 0.9254036378499898, "grad_norm": 0.5260227630510738, "learning_rate": 4.144363341443634e-06, "loss": 0.5702, "step": 31696 }, { "epoch": 0.9254328340778372, "grad_norm": 0.5603345741696324, "learning_rate": 4.142741281427413e-06, "loss": 0.6474, "step": 31697 }, { "epoch": 0.9254620303056845, "grad_norm": 0.5008231554813611, "learning_rate": 4.141119221411192e-06, "loss": 0.5166, "step": 31698 }, { "epoch": 0.9254912265335319, "grad_norm": 0.5455945608673524, "learning_rate": 4.1394971613949716e-06, "loss": 0.5941, "step": 31699 }, { "epoch": 0.9255204227613792, "grad_norm": 0.5652140593897585, "learning_rate": 4.137875101378752e-06, "loss": 0.617, "step": 31700 }, { "epoch": 0.9255496189892266, "grad_norm": 0.524098260817676, "learning_rate": 4.13625304136253e-06, "loss": 0.5793, "step": 31701 }, { "epoch": 0.925578815217074, "grad_norm": 0.519428493729089, "learning_rate": 4.13463098134631e-06, "loss": 0.549, "step": 31702 }, { "epoch": 0.9256080114449213, "grad_norm": 0.5262043357503445, "learning_rate": 4.133008921330089e-06, "loss": 0.5786, "step": 31703 }, { "epoch": 0.9256372076727687, "grad_norm": 0.53046888409467, "learning_rate": 4.131386861313869e-06, "loss": 0.5758, "step": 31704 }, { "epoch": 0.925666403900616, "grad_norm": 0.548707959531736, "learning_rate": 4.129764801297649e-06, "loss": 0.5865, "step": 31705 }, { "epoch": 0.9256956001284634, "grad_norm": 0.539789642249686, "learning_rate": 4.128142741281427e-06, "loss": 0.6371, "step": 31706 }, { "epoch": 0.9257247963563108, "grad_norm": 0.539595529866407, "learning_rate": 4.126520681265207e-06, "loss": 0.5948, "step": 31707 }, { "epoch": 0.9257539925841581, "grad_norm": 0.6010941729176569, "learning_rate": 4.124898621248986e-06, "loss": 0.72, "step": 31708 }, { "epoch": 0.9257831888120055, "grad_norm": 0.555223040936138, "learning_rate": 4.123276561232766e-06, "loss": 0.5844, "step": 31709 }, { "epoch": 0.9258123850398529, "grad_norm": 0.54666096237715, "learning_rate": 4.1216545012165455e-06, "loss": 0.6166, "step": 31710 }, { "epoch": 0.9258415812677002, "grad_norm": 0.5554286207110298, "learning_rate": 4.120032441200325e-06, "loss": 0.613, "step": 31711 }, { "epoch": 0.9258707774955476, "grad_norm": 0.544839008022223, "learning_rate": 4.118410381184104e-06, "loss": 0.5543, "step": 31712 }, { "epoch": 0.9258999737233949, "grad_norm": 0.5192607405194857, "learning_rate": 4.116788321167883e-06, "loss": 0.5514, "step": 31713 }, { "epoch": 0.9259291699512423, "grad_norm": 0.5261198755890069, "learning_rate": 4.115166261151663e-06, "loss": 0.5784, "step": 31714 }, { "epoch": 0.9259583661790897, "grad_norm": 0.5312136976592587, "learning_rate": 4.113544201135442e-06, "loss": 0.5734, "step": 31715 }, { "epoch": 0.925987562406937, "grad_norm": 0.5194458877778876, "learning_rate": 4.111922141119222e-06, "loss": 0.5405, "step": 31716 }, { "epoch": 0.9260167586347844, "grad_norm": 0.4997774214524179, "learning_rate": 4.110300081103001e-06, "loss": 0.5225, "step": 31717 }, { "epoch": 0.9260459548626317, "grad_norm": 0.5856058364953296, "learning_rate": 4.10867802108678e-06, "loss": 0.6624, "step": 31718 }, { "epoch": 0.9260751510904791, "grad_norm": 0.5403470662137895, "learning_rate": 4.10705596107056e-06, "loss": 0.5933, "step": 31719 }, { "epoch": 0.9261043473183265, "grad_norm": 0.5269853255099808, "learning_rate": 4.105433901054339e-06, "loss": 0.5309, "step": 31720 }, { "epoch": 0.9261335435461738, "grad_norm": 0.5344081183618624, "learning_rate": 4.103811841038119e-06, "loss": 0.5939, "step": 31721 }, { "epoch": 0.9261627397740212, "grad_norm": 0.5060789384935875, "learning_rate": 4.102189781021898e-06, "loss": 0.5438, "step": 31722 }, { "epoch": 0.9261919360018686, "grad_norm": 0.5321187530215853, "learning_rate": 4.100567721005678e-06, "loss": 0.6284, "step": 31723 }, { "epoch": 0.9262211322297159, "grad_norm": 0.5412559603959498, "learning_rate": 4.098945660989457e-06, "loss": 0.6082, "step": 31724 }, { "epoch": 0.9262503284575633, "grad_norm": 0.5638270613482137, "learning_rate": 4.0973236009732356e-06, "loss": 0.6341, "step": 31725 }, { "epoch": 0.9262795246854106, "grad_norm": 0.5238640974349875, "learning_rate": 4.095701540957016e-06, "loss": 0.5673, "step": 31726 }, { "epoch": 0.926308720913258, "grad_norm": 0.4963023328851725, "learning_rate": 4.094079480940795e-06, "loss": 0.5166, "step": 31727 }, { "epoch": 0.9263379171411054, "grad_norm": 0.553032076485655, "learning_rate": 4.092457420924575e-06, "loss": 0.6094, "step": 31728 }, { "epoch": 0.9263671133689527, "grad_norm": 0.5401640044713525, "learning_rate": 4.090835360908353e-06, "loss": 0.6001, "step": 31729 }, { "epoch": 0.9263963095968001, "grad_norm": 0.5587639980108693, "learning_rate": 4.089213300892133e-06, "loss": 0.665, "step": 31730 }, { "epoch": 0.9264255058246474, "grad_norm": 0.49856824087393814, "learning_rate": 4.0875912408759126e-06, "loss": 0.5478, "step": 31731 }, { "epoch": 0.9264547020524948, "grad_norm": 0.5455498413523353, "learning_rate": 4.085969180859692e-06, "loss": 0.585, "step": 31732 }, { "epoch": 0.9264838982803422, "grad_norm": 0.5317716130620527, "learning_rate": 4.084347120843472e-06, "loss": 0.5626, "step": 31733 }, { "epoch": 0.9265130945081895, "grad_norm": 0.5979508636001245, "learning_rate": 4.08272506082725e-06, "loss": 0.6824, "step": 31734 }, { "epoch": 0.9265422907360369, "grad_norm": 0.5253886713921703, "learning_rate": 4.08110300081103e-06, "loss": 0.5251, "step": 31735 }, { "epoch": 0.9265714869638842, "grad_norm": 0.509530740765872, "learning_rate": 4.0794809407948095e-06, "loss": 0.5203, "step": 31736 }, { "epoch": 0.9266006831917316, "grad_norm": 0.4977113728356556, "learning_rate": 4.07785888077859e-06, "loss": 0.5279, "step": 31737 }, { "epoch": 0.926629879419579, "grad_norm": 0.508413099624523, "learning_rate": 4.076236820762369e-06, "loss": 0.5624, "step": 31738 }, { "epoch": 0.9266590756474263, "grad_norm": 0.5568323670096685, "learning_rate": 4.074614760746147e-06, "loss": 0.6124, "step": 31739 }, { "epoch": 0.9266882718752737, "grad_norm": 0.5689849016654125, "learning_rate": 4.072992700729927e-06, "loss": 0.6571, "step": 31740 }, { "epoch": 0.9267174681031211, "grad_norm": 0.5454681412463998, "learning_rate": 4.0713706407137065e-06, "loss": 0.6369, "step": 31741 }, { "epoch": 0.9267466643309684, "grad_norm": 0.5051605334864882, "learning_rate": 4.0697485806974865e-06, "loss": 0.5139, "step": 31742 }, { "epoch": 0.9267758605588158, "grad_norm": 0.5132190985875738, "learning_rate": 4.068126520681266e-06, "loss": 0.5255, "step": 31743 }, { "epoch": 0.9268050567866631, "grad_norm": 0.5191660184687094, "learning_rate": 4.066504460665045e-06, "loss": 0.5488, "step": 31744 }, { "epoch": 0.9268342530145105, "grad_norm": 0.5466448355232947, "learning_rate": 4.064882400648824e-06, "loss": 0.6113, "step": 31745 }, { "epoch": 0.9268634492423579, "grad_norm": 0.5312677658251074, "learning_rate": 4.0632603406326034e-06, "loss": 0.5728, "step": 31746 }, { "epoch": 0.9268926454702052, "grad_norm": 0.5395460968724758, "learning_rate": 4.0616382806163835e-06, "loss": 0.5462, "step": 31747 }, { "epoch": 0.9269218416980526, "grad_norm": 0.5256157713924506, "learning_rate": 4.060016220600162e-06, "loss": 0.582, "step": 31748 }, { "epoch": 0.9269510379259, "grad_norm": 0.5787205310471987, "learning_rate": 4.058394160583942e-06, "loss": 0.699, "step": 31749 }, { "epoch": 0.9269802341537473, "grad_norm": 0.5014756337090778, "learning_rate": 4.056772100567721e-06, "loss": 0.5389, "step": 31750 }, { "epoch": 0.9270094303815947, "grad_norm": 0.5538159132028152, "learning_rate": 4.0551500405515e-06, "loss": 0.6091, "step": 31751 }, { "epoch": 0.927038626609442, "grad_norm": 0.5282439201401152, "learning_rate": 4.0535279805352804e-06, "loss": 0.5595, "step": 31752 }, { "epoch": 0.9270678228372894, "grad_norm": 0.522372474539332, "learning_rate": 4.051905920519059e-06, "loss": 0.5967, "step": 31753 }, { "epoch": 0.9270970190651368, "grad_norm": 0.5536202711017255, "learning_rate": 4.050283860502839e-06, "loss": 0.6444, "step": 31754 }, { "epoch": 0.9271262152929841, "grad_norm": 0.5312442789823759, "learning_rate": 4.048661800486618e-06, "loss": 0.5939, "step": 31755 }, { "epoch": 0.9271554115208315, "grad_norm": 0.5013346440902845, "learning_rate": 4.047039740470398e-06, "loss": 0.5525, "step": 31756 }, { "epoch": 0.9271846077486788, "grad_norm": 0.5775856104484252, "learning_rate": 4.045417680454177e-06, "loss": 0.6771, "step": 31757 }, { "epoch": 0.9272138039765262, "grad_norm": 0.5214462272554865, "learning_rate": 4.043795620437956e-06, "loss": 0.585, "step": 31758 }, { "epoch": 0.9272430002043736, "grad_norm": 0.5464449376820664, "learning_rate": 4.042173560421736e-06, "loss": 0.6119, "step": 31759 }, { "epoch": 0.9272721964322209, "grad_norm": 0.5555762147969455, "learning_rate": 4.040551500405515e-06, "loss": 0.5929, "step": 31760 }, { "epoch": 0.9273013926600683, "grad_norm": 0.5188878921639553, "learning_rate": 4.038929440389295e-06, "loss": 0.5443, "step": 31761 }, { "epoch": 0.9273305888879156, "grad_norm": 0.561231198155465, "learning_rate": 4.0373073803730735e-06, "loss": 0.6271, "step": 31762 }, { "epoch": 0.927359785115763, "grad_norm": 0.5391091422290021, "learning_rate": 4.0356853203568536e-06, "loss": 0.6248, "step": 31763 }, { "epoch": 0.9273889813436104, "grad_norm": 0.5144970786268246, "learning_rate": 4.034063260340633e-06, "loss": 0.5323, "step": 31764 }, { "epoch": 0.9274181775714577, "grad_norm": 0.5750645833998277, "learning_rate": 4.032441200324412e-06, "loss": 0.6866, "step": 31765 }, { "epoch": 0.9274473737993051, "grad_norm": 0.5149414915718974, "learning_rate": 4.030819140308192e-06, "loss": 0.5387, "step": 31766 }, { "epoch": 0.9274765700271524, "grad_norm": 0.45447774082554526, "learning_rate": 4.0291970802919705e-06, "loss": 0.4476, "step": 31767 }, { "epoch": 0.9275057662549998, "grad_norm": 0.5343325163790663, "learning_rate": 4.0275750202757505e-06, "loss": 0.5892, "step": 31768 }, { "epoch": 0.9275349624828472, "grad_norm": 0.5065934032007003, "learning_rate": 4.02595296025953e-06, "loss": 0.5414, "step": 31769 }, { "epoch": 0.9275641587106945, "grad_norm": 0.5726323788249503, "learning_rate": 4.02433090024331e-06, "loss": 0.6542, "step": 31770 }, { "epoch": 0.9275933549385419, "grad_norm": 0.5411653895213652, "learning_rate": 4.022708840227089e-06, "loss": 0.6022, "step": 31771 }, { "epoch": 0.9276225511663893, "grad_norm": 0.5134578542593681, "learning_rate": 4.021086780210867e-06, "loss": 0.5396, "step": 31772 }, { "epoch": 0.9276517473942366, "grad_norm": 0.5283996840014452, "learning_rate": 4.0194647201946475e-06, "loss": 0.54, "step": 31773 }, { "epoch": 0.927680943622084, "grad_norm": 0.5682485469103384, "learning_rate": 4.017842660178427e-06, "loss": 0.6159, "step": 31774 }, { "epoch": 0.9277101398499313, "grad_norm": 0.4975878524274104, "learning_rate": 4.016220600162207e-06, "loss": 0.4837, "step": 31775 }, { "epoch": 0.9277393360777787, "grad_norm": 0.512569241017948, "learning_rate": 4.014598540145985e-06, "loss": 0.5315, "step": 31776 }, { "epoch": 0.9277685323056261, "grad_norm": 0.508530751287917, "learning_rate": 4.012976480129764e-06, "loss": 0.5432, "step": 31777 }, { "epoch": 0.9277977285334734, "grad_norm": 0.5304628124451883, "learning_rate": 4.0113544201135444e-06, "loss": 0.5934, "step": 31778 }, { "epoch": 0.9278269247613209, "grad_norm": 0.52324847802988, "learning_rate": 4.009732360097324e-06, "loss": 0.5812, "step": 31779 }, { "epoch": 0.9278561209891683, "grad_norm": 0.5078098649063262, "learning_rate": 4.008110300081104e-06, "loss": 0.5313, "step": 31780 }, { "epoch": 0.9278853172170156, "grad_norm": 0.522065702971106, "learning_rate": 4.006488240064882e-06, "loss": 0.5559, "step": 31781 }, { "epoch": 0.927914513444863, "grad_norm": 0.5029925081468293, "learning_rate": 4.004866180048662e-06, "loss": 0.4735, "step": 31782 }, { "epoch": 0.9279437096727103, "grad_norm": 0.5503726610955844, "learning_rate": 4.003244120032441e-06, "loss": 0.6038, "step": 31783 }, { "epoch": 0.9279729059005577, "grad_norm": 0.5619467747658039, "learning_rate": 4.001622060016221e-06, "loss": 0.6622, "step": 31784 }, { "epoch": 0.9280021021284051, "grad_norm": 0.5273991800815414, "learning_rate": 4.000000000000001e-06, "loss": 0.5493, "step": 31785 }, { "epoch": 0.9280312983562524, "grad_norm": 0.5458322316311397, "learning_rate": 3.998377939983779e-06, "loss": 0.5886, "step": 31786 }, { "epoch": 0.9280604945840998, "grad_norm": 0.5575393840865347, "learning_rate": 3.996755879967559e-06, "loss": 0.602, "step": 31787 }, { "epoch": 0.9280896908119471, "grad_norm": 0.557985548401291, "learning_rate": 3.995133819951338e-06, "loss": 0.6665, "step": 31788 }, { "epoch": 0.9281188870397945, "grad_norm": 0.555191213868062, "learning_rate": 3.993511759935118e-06, "loss": 0.6098, "step": 31789 }, { "epoch": 0.9281480832676419, "grad_norm": 0.5231862708234026, "learning_rate": 3.991889699918897e-06, "loss": 0.5823, "step": 31790 }, { "epoch": 0.9281772794954892, "grad_norm": 0.4896252948974177, "learning_rate": 3.990267639902676e-06, "loss": 0.4922, "step": 31791 }, { "epoch": 0.9282064757233366, "grad_norm": 0.5161338528398658, "learning_rate": 3.988645579886456e-06, "loss": 0.5587, "step": 31792 }, { "epoch": 0.928235671951184, "grad_norm": 0.5615301708010212, "learning_rate": 3.987023519870235e-06, "loss": 0.5473, "step": 31793 }, { "epoch": 0.9282648681790313, "grad_norm": 0.4987837283655625, "learning_rate": 3.985401459854015e-06, "loss": 0.495, "step": 31794 }, { "epoch": 0.9282940644068787, "grad_norm": 0.5421185790067484, "learning_rate": 3.983779399837794e-06, "loss": 0.5621, "step": 31795 }, { "epoch": 0.928323260634726, "grad_norm": 0.5208450461171806, "learning_rate": 3.982157339821574e-06, "loss": 0.5688, "step": 31796 }, { "epoch": 0.9283524568625734, "grad_norm": 0.5429896722789603, "learning_rate": 3.980535279805353e-06, "loss": 0.5994, "step": 31797 }, { "epoch": 0.9283816530904208, "grad_norm": 0.5161450019142231, "learning_rate": 3.978913219789132e-06, "loss": 0.522, "step": 31798 }, { "epoch": 0.9284108493182681, "grad_norm": 0.5459130356016193, "learning_rate": 3.977291159772912e-06, "loss": 0.577, "step": 31799 }, { "epoch": 0.9284400455461155, "grad_norm": 0.5502781022186735, "learning_rate": 3.975669099756691e-06, "loss": 0.5782, "step": 31800 }, { "epoch": 0.9284692417739628, "grad_norm": 0.5424540818457996, "learning_rate": 3.974047039740471e-06, "loss": 0.6181, "step": 31801 }, { "epoch": 0.9284984380018102, "grad_norm": 0.496079768916795, "learning_rate": 3.97242497972425e-06, "loss": 0.4985, "step": 31802 }, { "epoch": 0.9285276342296576, "grad_norm": 0.5197868973634261, "learning_rate": 3.97080291970803e-06, "loss": 0.539, "step": 31803 }, { "epoch": 0.9285568304575049, "grad_norm": 0.5118079479204817, "learning_rate": 3.969180859691809e-06, "loss": 0.5583, "step": 31804 }, { "epoch": 0.9285860266853523, "grad_norm": 0.5127589933172331, "learning_rate": 3.967558799675588e-06, "loss": 0.5703, "step": 31805 }, { "epoch": 0.9286152229131996, "grad_norm": 0.5401665221636919, "learning_rate": 3.965936739659368e-06, "loss": 0.6107, "step": 31806 }, { "epoch": 0.928644419141047, "grad_norm": 0.519328695381048, "learning_rate": 3.964314679643147e-06, "loss": 0.5543, "step": 31807 }, { "epoch": 0.9286736153688944, "grad_norm": 0.516911434325341, "learning_rate": 3.962692619626927e-06, "loss": 0.5556, "step": 31808 }, { "epoch": 0.9287028115967417, "grad_norm": 0.554967834175401, "learning_rate": 3.961070559610705e-06, "loss": 0.5987, "step": 31809 }, { "epoch": 0.9287320078245891, "grad_norm": 0.5112524508308418, "learning_rate": 3.959448499594485e-06, "loss": 0.5298, "step": 31810 }, { "epoch": 0.9287612040524365, "grad_norm": 0.6246327856007046, "learning_rate": 3.957826439578265e-06, "loss": 0.6544, "step": 31811 }, { "epoch": 0.9287904002802838, "grad_norm": 0.5246685658927027, "learning_rate": 3.956204379562044e-06, "loss": 0.5708, "step": 31812 }, { "epoch": 0.9288195965081312, "grad_norm": 0.5205063862145508, "learning_rate": 3.954582319545824e-06, "loss": 0.5506, "step": 31813 }, { "epoch": 0.9288487927359785, "grad_norm": 0.5499310613636774, "learning_rate": 3.952960259529602e-06, "loss": 0.579, "step": 31814 }, { "epoch": 0.9288779889638259, "grad_norm": 0.5428312895454906, "learning_rate": 3.951338199513382e-06, "loss": 0.5889, "step": 31815 }, { "epoch": 0.9289071851916733, "grad_norm": 0.521368473509323, "learning_rate": 3.949716139497162e-06, "loss": 0.5153, "step": 31816 }, { "epoch": 0.9289363814195206, "grad_norm": 0.5347122994930793, "learning_rate": 3.948094079480941e-06, "loss": 0.5844, "step": 31817 }, { "epoch": 0.928965577647368, "grad_norm": 0.5194718597740335, "learning_rate": 3.946472019464721e-06, "loss": 0.5578, "step": 31818 }, { "epoch": 0.9289947738752153, "grad_norm": 0.5341717939631676, "learning_rate": 3.944849959448499e-06, "loss": 0.5962, "step": 31819 }, { "epoch": 0.9290239701030627, "grad_norm": 0.5091831079124983, "learning_rate": 3.943227899432279e-06, "loss": 0.5501, "step": 31820 }, { "epoch": 0.9290531663309101, "grad_norm": 0.5468854681602299, "learning_rate": 3.9416058394160585e-06, "loss": 0.6075, "step": 31821 }, { "epoch": 0.9290823625587574, "grad_norm": 0.5033371612444009, "learning_rate": 3.939983779399839e-06, "loss": 0.5331, "step": 31822 }, { "epoch": 0.9291115587866048, "grad_norm": 0.5340940951627182, "learning_rate": 3.938361719383617e-06, "loss": 0.5681, "step": 31823 }, { "epoch": 0.9291407550144521, "grad_norm": 0.5310751496969542, "learning_rate": 3.936739659367396e-06, "loss": 0.5905, "step": 31824 }, { "epoch": 0.9291699512422995, "grad_norm": 0.5294103846033303, "learning_rate": 3.935117599351176e-06, "loss": 0.5683, "step": 31825 }, { "epoch": 0.9291991474701469, "grad_norm": 0.5721398473419298, "learning_rate": 3.9334955393349555e-06, "loss": 0.6345, "step": 31826 }, { "epoch": 0.9292283436979942, "grad_norm": 0.5401694770388031, "learning_rate": 3.9318734793187356e-06, "loss": 0.6134, "step": 31827 }, { "epoch": 0.9292575399258416, "grad_norm": 0.5285284112926527, "learning_rate": 3.930251419302514e-06, "loss": 0.5753, "step": 31828 }, { "epoch": 0.929286736153689, "grad_norm": 0.585385090622207, "learning_rate": 3.928629359286294e-06, "loss": 0.6432, "step": 31829 }, { "epoch": 0.9293159323815363, "grad_norm": 0.5452556041899315, "learning_rate": 3.927007299270073e-06, "loss": 0.6207, "step": 31830 }, { "epoch": 0.9293451286093837, "grad_norm": 0.5302179661925782, "learning_rate": 3.9253852392538525e-06, "loss": 0.5963, "step": 31831 }, { "epoch": 0.929374324837231, "grad_norm": 0.5605923875694411, "learning_rate": 3.9237631792376325e-06, "loss": 0.5955, "step": 31832 }, { "epoch": 0.9294035210650784, "grad_norm": 0.4933302787170105, "learning_rate": 3.922141119221411e-06, "loss": 0.5205, "step": 31833 }, { "epoch": 0.9294327172929258, "grad_norm": 0.5579282082923406, "learning_rate": 3.920519059205191e-06, "loss": 0.626, "step": 31834 }, { "epoch": 0.9294619135207731, "grad_norm": 0.538494273294205, "learning_rate": 3.91889699918897e-06, "loss": 0.5788, "step": 31835 }, { "epoch": 0.9294911097486205, "grad_norm": 0.5104100723056287, "learning_rate": 3.917274939172749e-06, "loss": 0.5304, "step": 31836 }, { "epoch": 0.9295203059764678, "grad_norm": 0.536234294872032, "learning_rate": 3.915652879156529e-06, "loss": 0.5764, "step": 31837 }, { "epoch": 0.9295495022043152, "grad_norm": 0.523403690098181, "learning_rate": 3.914030819140308e-06, "loss": 0.5237, "step": 31838 }, { "epoch": 0.9295786984321626, "grad_norm": 0.5520358808640691, "learning_rate": 3.912408759124088e-06, "loss": 0.6263, "step": 31839 }, { "epoch": 0.9296078946600099, "grad_norm": 0.5287998222811332, "learning_rate": 3.910786699107867e-06, "loss": 0.6015, "step": 31840 }, { "epoch": 0.9296370908878573, "grad_norm": 0.5070574280701924, "learning_rate": 3.909164639091647e-06, "loss": 0.5883, "step": 31841 }, { "epoch": 0.9296662871157046, "grad_norm": 0.5670608525874354, "learning_rate": 3.907542579075426e-06, "loss": 0.6305, "step": 31842 }, { "epoch": 0.929695483343552, "grad_norm": 0.5144467671278617, "learning_rate": 3.905920519059205e-06, "loss": 0.5724, "step": 31843 }, { "epoch": 0.9297246795713994, "grad_norm": 0.5070312408813861, "learning_rate": 3.904298459042985e-06, "loss": 0.5709, "step": 31844 }, { "epoch": 0.9297538757992467, "grad_norm": 0.5515078599987621, "learning_rate": 3.902676399026764e-06, "loss": 0.6226, "step": 31845 }, { "epoch": 0.9297830720270941, "grad_norm": 0.5310055971083761, "learning_rate": 3.901054339010544e-06, "loss": 0.5705, "step": 31846 }, { "epoch": 0.9298122682549415, "grad_norm": 0.49854767906822245, "learning_rate": 3.8994322789943225e-06, "loss": 0.4911, "step": 31847 }, { "epoch": 0.9298414644827888, "grad_norm": 0.5298396863451289, "learning_rate": 3.897810218978103e-06, "loss": 0.5491, "step": 31848 }, { "epoch": 0.9298706607106362, "grad_norm": 0.5131376562947937, "learning_rate": 3.896188158961882e-06, "loss": 0.5667, "step": 31849 }, { "epoch": 0.9298998569384835, "grad_norm": 0.5303093088416555, "learning_rate": 3.894566098945661e-06, "loss": 0.5631, "step": 31850 }, { "epoch": 0.9299290531663309, "grad_norm": 0.5163058972408533, "learning_rate": 3.89294403892944e-06, "loss": 0.5425, "step": 31851 }, { "epoch": 0.9299582493941783, "grad_norm": 0.4897997167443963, "learning_rate": 3.8913219789132195e-06, "loss": 0.48, "step": 31852 }, { "epoch": 0.9299874456220256, "grad_norm": 0.497082942891504, "learning_rate": 3.8896999188969995e-06, "loss": 0.5322, "step": 31853 }, { "epoch": 0.930016641849873, "grad_norm": 0.5466370582059015, "learning_rate": 3.888077858880779e-06, "loss": 0.6349, "step": 31854 }, { "epoch": 0.9300458380777203, "grad_norm": 0.547338508629114, "learning_rate": 3.886455798864559e-06, "loss": 0.6407, "step": 31855 }, { "epoch": 0.9300750343055677, "grad_norm": 0.5319907513198124, "learning_rate": 3.884833738848337e-06, "loss": 0.5835, "step": 31856 }, { "epoch": 0.9301042305334151, "grad_norm": 0.4979494878511296, "learning_rate": 3.8832116788321164e-06, "loss": 0.5112, "step": 31857 }, { "epoch": 0.9301334267612624, "grad_norm": 0.5329629476059019, "learning_rate": 3.8815896188158965e-06, "loss": 0.5889, "step": 31858 }, { "epoch": 0.9301626229891098, "grad_norm": 0.5195853878286073, "learning_rate": 3.879967558799676e-06, "loss": 0.5679, "step": 31859 }, { "epoch": 0.9301918192169571, "grad_norm": 0.5301159133280269, "learning_rate": 3.878345498783456e-06, "loss": 0.5972, "step": 31860 }, { "epoch": 0.9302210154448045, "grad_norm": 0.5049192054060507, "learning_rate": 3.876723438767234e-06, "loss": 0.496, "step": 31861 }, { "epoch": 0.9302502116726519, "grad_norm": 0.5408906551468895, "learning_rate": 3.875101378751014e-06, "loss": 0.5699, "step": 31862 }, { "epoch": 0.9302794079004992, "grad_norm": 0.4941492038882075, "learning_rate": 3.8734793187347935e-06, "loss": 0.5273, "step": 31863 }, { "epoch": 0.9303086041283466, "grad_norm": 0.5089809049624454, "learning_rate": 3.871857258718573e-06, "loss": 0.5622, "step": 31864 }, { "epoch": 0.930337800356194, "grad_norm": 0.5641865719532305, "learning_rate": 3.870235198702353e-06, "loss": 0.6721, "step": 31865 }, { "epoch": 0.9303669965840413, "grad_norm": 0.5215488705742914, "learning_rate": 3.868613138686131e-06, "loss": 0.5492, "step": 31866 }, { "epoch": 0.9303961928118887, "grad_norm": 0.5351324508943798, "learning_rate": 3.866991078669911e-06, "loss": 0.5859, "step": 31867 }, { "epoch": 0.930425389039736, "grad_norm": 0.5127291017376152, "learning_rate": 3.86536901865369e-06, "loss": 0.5314, "step": 31868 }, { "epoch": 0.9304545852675834, "grad_norm": 0.5350098814789408, "learning_rate": 3.86374695863747e-06, "loss": 0.5878, "step": 31869 }, { "epoch": 0.9304837814954308, "grad_norm": 0.5323601028325218, "learning_rate": 3.862124898621249e-06, "loss": 0.576, "step": 31870 }, { "epoch": 0.9305129777232781, "grad_norm": 0.5162940395619883, "learning_rate": 3.860502838605028e-06, "loss": 0.5639, "step": 31871 }, { "epoch": 0.9305421739511255, "grad_norm": 0.5255815402896133, "learning_rate": 3.858880778588808e-06, "loss": 0.575, "step": 31872 }, { "epoch": 0.9305713701789728, "grad_norm": 0.5277953450780356, "learning_rate": 3.857258718572587e-06, "loss": 0.5691, "step": 31873 }, { "epoch": 0.9306005664068202, "grad_norm": 0.5631869080235986, "learning_rate": 3.8556366585563674e-06, "loss": 0.6172, "step": 31874 }, { "epoch": 0.9306297626346676, "grad_norm": 0.5122787508480099, "learning_rate": 3.854014598540146e-06, "loss": 0.5499, "step": 31875 }, { "epoch": 0.9306589588625149, "grad_norm": 0.5653698566827592, "learning_rate": 3.852392538523925e-06, "loss": 0.6364, "step": 31876 }, { "epoch": 0.9306881550903623, "grad_norm": 0.5264541581011495, "learning_rate": 3.850770478507705e-06, "loss": 0.5411, "step": 31877 }, { "epoch": 0.9307173513182097, "grad_norm": 0.5352008536154074, "learning_rate": 3.849148418491484e-06, "loss": 0.5846, "step": 31878 }, { "epoch": 0.930746547546057, "grad_norm": 0.5429314563117611, "learning_rate": 3.847526358475264e-06, "loss": 0.5942, "step": 31879 }, { "epoch": 0.9307757437739044, "grad_norm": 0.525252633036302, "learning_rate": 3.845904298459043e-06, "loss": 0.5624, "step": 31880 }, { "epoch": 0.9308049400017517, "grad_norm": 0.5397280240651331, "learning_rate": 3.844282238442823e-06, "loss": 0.5908, "step": 31881 }, { "epoch": 0.9308341362295991, "grad_norm": 0.6050659852360253, "learning_rate": 3.842660178426602e-06, "loss": 0.6539, "step": 31882 }, { "epoch": 0.9308633324574465, "grad_norm": 0.4886929809180178, "learning_rate": 3.841038118410381e-06, "loss": 0.5193, "step": 31883 }, { "epoch": 0.9308925286852938, "grad_norm": 0.5287151536478302, "learning_rate": 3.8394160583941605e-06, "loss": 0.5683, "step": 31884 }, { "epoch": 0.9309217249131412, "grad_norm": 0.5777077960165904, "learning_rate": 3.83779399837794e-06, "loss": 0.636, "step": 31885 }, { "epoch": 0.9309509211409885, "grad_norm": 0.5380102888091359, "learning_rate": 3.83617193836172e-06, "loss": 0.5566, "step": 31886 }, { "epoch": 0.9309801173688359, "grad_norm": 0.5535474115071713, "learning_rate": 3.834549878345499e-06, "loss": 0.5758, "step": 31887 }, { "epoch": 0.9310093135966833, "grad_norm": 0.5061725167093798, "learning_rate": 3.832927818329279e-06, "loss": 0.5275, "step": 31888 }, { "epoch": 0.9310385098245306, "grad_norm": 0.5234834491315663, "learning_rate": 3.8313057583130574e-06, "loss": 0.5486, "step": 31889 }, { "epoch": 0.931067706052378, "grad_norm": 0.5172490778689484, "learning_rate": 3.829683698296837e-06, "loss": 0.5085, "step": 31890 }, { "epoch": 0.9310969022802253, "grad_norm": 0.5552772539413675, "learning_rate": 3.828061638280617e-06, "loss": 0.6223, "step": 31891 }, { "epoch": 0.9311260985080727, "grad_norm": 0.5110228800465664, "learning_rate": 3.826439578264396e-06, "loss": 0.5666, "step": 31892 }, { "epoch": 0.9311552947359201, "grad_norm": 0.5042974640722626, "learning_rate": 3.824817518248176e-06, "loss": 0.4986, "step": 31893 }, { "epoch": 0.9311844909637674, "grad_norm": 0.5236429155515704, "learning_rate": 3.823195458231954e-06, "loss": 0.548, "step": 31894 }, { "epoch": 0.9312136871916148, "grad_norm": 0.5419729724078798, "learning_rate": 3.821573398215734e-06, "loss": 0.6269, "step": 31895 }, { "epoch": 0.9312428834194622, "grad_norm": 0.5029041495497567, "learning_rate": 3.819951338199514e-06, "loss": 0.5443, "step": 31896 }, { "epoch": 0.9312720796473095, "grad_norm": 0.5291057717771482, "learning_rate": 3.818329278183293e-06, "loss": 0.5718, "step": 31897 }, { "epoch": 0.9313012758751569, "grad_norm": 0.5651159773152565, "learning_rate": 3.816707218167072e-06, "loss": 0.6284, "step": 31898 }, { "epoch": 0.9313304721030042, "grad_norm": 0.5313608953060374, "learning_rate": 3.815085158150851e-06, "loss": 0.568, "step": 31899 }, { "epoch": 0.9313596683308517, "grad_norm": 0.5319827948898368, "learning_rate": 3.8134630981346314e-06, "loss": 0.5511, "step": 31900 }, { "epoch": 0.9313888645586991, "grad_norm": 0.5481527965396609, "learning_rate": 3.8118410381184106e-06, "loss": 0.6135, "step": 31901 }, { "epoch": 0.9314180607865464, "grad_norm": 0.5209156243667197, "learning_rate": 3.81021897810219e-06, "loss": 0.5441, "step": 31902 }, { "epoch": 0.9314472570143938, "grad_norm": 0.4833907306151853, "learning_rate": 3.8085969180859695e-06, "loss": 0.4984, "step": 31903 }, { "epoch": 0.9314764532422412, "grad_norm": 0.5274436535145455, "learning_rate": 3.8069748580697487e-06, "loss": 0.5951, "step": 31904 }, { "epoch": 0.9315056494700885, "grad_norm": 0.4971550018940628, "learning_rate": 3.8053527980535284e-06, "loss": 0.5311, "step": 31905 }, { "epoch": 0.9315348456979359, "grad_norm": 0.5311770984766756, "learning_rate": 3.8037307380373076e-06, "loss": 0.5944, "step": 31906 }, { "epoch": 0.9315640419257832, "grad_norm": 0.5653192638464701, "learning_rate": 3.8021086780210872e-06, "loss": 0.6098, "step": 31907 }, { "epoch": 0.9315932381536306, "grad_norm": 0.509581236615281, "learning_rate": 3.8004866180048664e-06, "loss": 0.5622, "step": 31908 }, { "epoch": 0.931622434381478, "grad_norm": 0.5216855530284893, "learning_rate": 3.7988645579886457e-06, "loss": 0.5799, "step": 31909 }, { "epoch": 0.9316516306093253, "grad_norm": 0.5368167012952261, "learning_rate": 3.7972424979724253e-06, "loss": 0.5803, "step": 31910 }, { "epoch": 0.9316808268371727, "grad_norm": 0.4871409863393276, "learning_rate": 3.7956204379562045e-06, "loss": 0.5172, "step": 31911 }, { "epoch": 0.93171002306502, "grad_norm": 0.5033724244987214, "learning_rate": 3.793998377939984e-06, "loss": 0.5493, "step": 31912 }, { "epoch": 0.9317392192928674, "grad_norm": 0.5471768169545771, "learning_rate": 3.7923763179237634e-06, "loss": 0.5919, "step": 31913 }, { "epoch": 0.9317684155207148, "grad_norm": 0.5713459606102409, "learning_rate": 3.790754257907543e-06, "loss": 0.6099, "step": 31914 }, { "epoch": 0.9317976117485621, "grad_norm": 0.5161560216688901, "learning_rate": 3.7891321978913223e-06, "loss": 0.5474, "step": 31915 }, { "epoch": 0.9318268079764095, "grad_norm": 0.5132980849833128, "learning_rate": 3.7875101378751015e-06, "loss": 0.5753, "step": 31916 }, { "epoch": 0.9318560042042568, "grad_norm": 0.5487734788424486, "learning_rate": 3.785888077858881e-06, "loss": 0.6531, "step": 31917 }, { "epoch": 0.9318852004321042, "grad_norm": 0.5047602077595585, "learning_rate": 3.7842660178426603e-06, "loss": 0.5195, "step": 31918 }, { "epoch": 0.9319143966599516, "grad_norm": 0.5074040630786666, "learning_rate": 3.78264395782644e-06, "loss": 0.5853, "step": 31919 }, { "epoch": 0.9319435928877989, "grad_norm": 0.5642269213211857, "learning_rate": 3.7810218978102192e-06, "loss": 0.6547, "step": 31920 }, { "epoch": 0.9319727891156463, "grad_norm": 0.5455581820351156, "learning_rate": 3.779399837793999e-06, "loss": 0.565, "step": 31921 }, { "epoch": 0.9320019853434937, "grad_norm": 0.5275285612396743, "learning_rate": 3.777777777777778e-06, "loss": 0.581, "step": 31922 }, { "epoch": 0.932031181571341, "grad_norm": 0.5416592570801276, "learning_rate": 3.7761557177615573e-06, "loss": 0.5906, "step": 31923 }, { "epoch": 0.9320603777991884, "grad_norm": 0.5334181681289399, "learning_rate": 3.774533657745337e-06, "loss": 0.5645, "step": 31924 }, { "epoch": 0.9320895740270357, "grad_norm": 0.5600199682853376, "learning_rate": 3.772911597729116e-06, "loss": 0.5924, "step": 31925 }, { "epoch": 0.9321187702548831, "grad_norm": 1.1434356627628304, "learning_rate": 3.771289537712896e-06, "loss": 0.6733, "step": 31926 }, { "epoch": 0.9321479664827305, "grad_norm": 0.5268824150006691, "learning_rate": 3.769667477696675e-06, "loss": 0.5553, "step": 31927 }, { "epoch": 0.9321771627105778, "grad_norm": 0.48776826636232734, "learning_rate": 3.768045417680454e-06, "loss": 0.4932, "step": 31928 }, { "epoch": 0.9322063589384252, "grad_norm": 0.5488897919510111, "learning_rate": 3.766423357664234e-06, "loss": 0.6251, "step": 31929 }, { "epoch": 0.9322355551662725, "grad_norm": 0.5276683690605339, "learning_rate": 3.764801297648013e-06, "loss": 0.5609, "step": 31930 }, { "epoch": 0.9322647513941199, "grad_norm": 0.49495877979173086, "learning_rate": 3.7631792376317928e-06, "loss": 0.494, "step": 31931 }, { "epoch": 0.9322939476219673, "grad_norm": 0.5105676199964068, "learning_rate": 3.761557177615572e-06, "loss": 0.5389, "step": 31932 }, { "epoch": 0.9323231438498146, "grad_norm": 0.5431923465926213, "learning_rate": 3.7599351175993516e-06, "loss": 0.6256, "step": 31933 }, { "epoch": 0.932352340077662, "grad_norm": 0.5096231521723963, "learning_rate": 3.758313057583131e-06, "loss": 0.5555, "step": 31934 }, { "epoch": 0.9323815363055094, "grad_norm": 0.5294192398832854, "learning_rate": 3.7566909975669096e-06, "loss": 0.5827, "step": 31935 }, { "epoch": 0.9324107325333567, "grad_norm": 0.5539161520998347, "learning_rate": 3.7550689375506897e-06, "loss": 0.6101, "step": 31936 }, { "epoch": 0.9324399287612041, "grad_norm": 0.5168753639493766, "learning_rate": 3.753446877534469e-06, "loss": 0.5496, "step": 31937 }, { "epoch": 0.9324691249890514, "grad_norm": 0.4798543662047473, "learning_rate": 3.7518248175182486e-06, "loss": 0.4883, "step": 31938 }, { "epoch": 0.9324983212168988, "grad_norm": 0.5385518731035314, "learning_rate": 3.750202757502028e-06, "loss": 0.6137, "step": 31939 }, { "epoch": 0.9325275174447462, "grad_norm": 0.5136441356643613, "learning_rate": 3.7485806974858074e-06, "loss": 0.5789, "step": 31940 }, { "epoch": 0.9325567136725935, "grad_norm": 0.5379456696489346, "learning_rate": 3.7469586374695867e-06, "loss": 0.6088, "step": 31941 }, { "epoch": 0.9325859099004409, "grad_norm": 0.5034259521093958, "learning_rate": 3.7453365774533655e-06, "loss": 0.5355, "step": 31942 }, { "epoch": 0.9326151061282882, "grad_norm": 0.5234747879517591, "learning_rate": 3.7437145174371455e-06, "loss": 0.5812, "step": 31943 }, { "epoch": 0.9326443023561356, "grad_norm": 0.49188452443534025, "learning_rate": 3.7420924574209248e-06, "loss": 0.5371, "step": 31944 }, { "epoch": 0.932673498583983, "grad_norm": 0.5249307447124004, "learning_rate": 3.7404703974047044e-06, "loss": 0.5707, "step": 31945 }, { "epoch": 0.9327026948118303, "grad_norm": 0.5133073125954617, "learning_rate": 3.7388483373884836e-06, "loss": 0.5578, "step": 31946 }, { "epoch": 0.9327318910396777, "grad_norm": 0.5409251255834483, "learning_rate": 3.7372262773722633e-06, "loss": 0.5094, "step": 31947 }, { "epoch": 0.932761087267525, "grad_norm": 0.5224389427081632, "learning_rate": 3.7356042173560425e-06, "loss": 0.5524, "step": 31948 }, { "epoch": 0.9327902834953724, "grad_norm": 0.5027230919166892, "learning_rate": 3.7339821573398213e-06, "loss": 0.5202, "step": 31949 }, { "epoch": 0.9328194797232198, "grad_norm": 0.5142546646167696, "learning_rate": 3.7323600973236013e-06, "loss": 0.5442, "step": 31950 }, { "epoch": 0.9328486759510671, "grad_norm": 0.5537975767177183, "learning_rate": 3.7307380373073806e-06, "loss": 0.6332, "step": 31951 }, { "epoch": 0.9328778721789145, "grad_norm": 0.5359469333346579, "learning_rate": 3.7291159772911602e-06, "loss": 0.5916, "step": 31952 }, { "epoch": 0.9329070684067619, "grad_norm": 0.5790593658598409, "learning_rate": 3.7274939172749394e-06, "loss": 0.6527, "step": 31953 }, { "epoch": 0.9329362646346092, "grad_norm": 0.5582712412736925, "learning_rate": 3.725871857258719e-06, "loss": 0.6512, "step": 31954 }, { "epoch": 0.9329654608624566, "grad_norm": 0.5439374777715722, "learning_rate": 3.7242497972424983e-06, "loss": 0.5962, "step": 31955 }, { "epoch": 0.9329946570903039, "grad_norm": 0.502410402096491, "learning_rate": 3.722627737226277e-06, "loss": 0.5281, "step": 31956 }, { "epoch": 0.9330238533181513, "grad_norm": 0.46479102292201163, "learning_rate": 3.721005677210057e-06, "loss": 0.4788, "step": 31957 }, { "epoch": 0.9330530495459987, "grad_norm": 0.5380330388511508, "learning_rate": 3.7193836171938364e-06, "loss": 0.5892, "step": 31958 }, { "epoch": 0.933082245773846, "grad_norm": 0.5529291516287755, "learning_rate": 3.717761557177616e-06, "loss": 0.6188, "step": 31959 }, { "epoch": 0.9331114420016934, "grad_norm": 0.5255750979835724, "learning_rate": 3.7161394971613952e-06, "loss": 0.5612, "step": 31960 }, { "epoch": 0.9331406382295407, "grad_norm": 0.5704082030300509, "learning_rate": 3.714517437145174e-06, "loss": 0.6473, "step": 31961 }, { "epoch": 0.9331698344573881, "grad_norm": 0.5320256625918434, "learning_rate": 3.712895377128954e-06, "loss": 0.5678, "step": 31962 }, { "epoch": 0.9331990306852355, "grad_norm": 0.5168691435142679, "learning_rate": 3.7112733171127333e-06, "loss": 0.5706, "step": 31963 }, { "epoch": 0.9332282269130828, "grad_norm": 0.49225171610721585, "learning_rate": 3.709651257096513e-06, "loss": 0.5127, "step": 31964 }, { "epoch": 0.9332574231409302, "grad_norm": 0.5538651714229368, "learning_rate": 3.708029197080292e-06, "loss": 0.6187, "step": 31965 }, { "epoch": 0.9332866193687775, "grad_norm": 0.5088134471175073, "learning_rate": 3.706407137064072e-06, "loss": 0.5498, "step": 31966 }, { "epoch": 0.9333158155966249, "grad_norm": 0.5564789132443932, "learning_rate": 3.704785077047851e-06, "loss": 0.6498, "step": 31967 }, { "epoch": 0.9333450118244723, "grad_norm": 0.5276780918746939, "learning_rate": 3.70316301703163e-06, "loss": 0.5667, "step": 31968 }, { "epoch": 0.9333742080523196, "grad_norm": 0.5628562392183402, "learning_rate": 3.70154095701541e-06, "loss": 0.6297, "step": 31969 }, { "epoch": 0.933403404280167, "grad_norm": 0.5284480727357662, "learning_rate": 3.699918896999189e-06, "loss": 0.5888, "step": 31970 }, { "epoch": 0.9334326005080144, "grad_norm": 0.505062380322397, "learning_rate": 3.698296836982969e-06, "loss": 0.5298, "step": 31971 }, { "epoch": 0.9334617967358617, "grad_norm": 0.5545346102584156, "learning_rate": 3.696674776966748e-06, "loss": 0.5944, "step": 31972 }, { "epoch": 0.9334909929637091, "grad_norm": 0.5377228819710801, "learning_rate": 3.6950527169505277e-06, "loss": 0.589, "step": 31973 }, { "epoch": 0.9335201891915564, "grad_norm": 0.47518378612050866, "learning_rate": 3.693430656934307e-06, "loss": 0.4896, "step": 31974 }, { "epoch": 0.9335493854194038, "grad_norm": 0.5512154375394385, "learning_rate": 3.6918085969180857e-06, "loss": 0.62, "step": 31975 }, { "epoch": 0.9335785816472512, "grad_norm": 0.5506818194102139, "learning_rate": 3.6901865369018657e-06, "loss": 0.6136, "step": 31976 }, { "epoch": 0.9336077778750985, "grad_norm": 0.515217058937364, "learning_rate": 3.688564476885645e-06, "loss": 0.5329, "step": 31977 }, { "epoch": 0.9336369741029459, "grad_norm": 0.5004912291748017, "learning_rate": 3.6869424168694246e-06, "loss": 0.5449, "step": 31978 }, { "epoch": 0.9336661703307932, "grad_norm": 0.573930855662473, "learning_rate": 3.685320356853204e-06, "loss": 0.6388, "step": 31979 }, { "epoch": 0.9336953665586406, "grad_norm": 0.4932510812402541, "learning_rate": 3.6836982968369835e-06, "loss": 0.4852, "step": 31980 }, { "epoch": 0.933724562786488, "grad_norm": 0.5611937945189023, "learning_rate": 3.6820762368207627e-06, "loss": 0.6114, "step": 31981 }, { "epoch": 0.9337537590143353, "grad_norm": 0.5330961080735668, "learning_rate": 3.6804541768045415e-06, "loss": 0.5521, "step": 31982 }, { "epoch": 0.9337829552421827, "grad_norm": 0.5308047679597916, "learning_rate": 3.6788321167883216e-06, "loss": 0.5767, "step": 31983 }, { "epoch": 0.93381215147003, "grad_norm": 0.5385225350121973, "learning_rate": 3.6772100567721008e-06, "loss": 0.6261, "step": 31984 }, { "epoch": 0.9338413476978774, "grad_norm": 0.5403045855927024, "learning_rate": 3.6755879967558804e-06, "loss": 0.6089, "step": 31985 }, { "epoch": 0.9338705439257248, "grad_norm": 0.5348343633432352, "learning_rate": 3.6739659367396597e-06, "loss": 0.6042, "step": 31986 }, { "epoch": 0.9338997401535721, "grad_norm": 0.4694427006878752, "learning_rate": 3.6723438767234385e-06, "loss": 0.4588, "step": 31987 }, { "epoch": 0.9339289363814195, "grad_norm": 0.5083717064280978, "learning_rate": 3.6707218167072185e-06, "loss": 0.5085, "step": 31988 }, { "epoch": 0.9339581326092669, "grad_norm": 0.555543708576264, "learning_rate": 3.6690997566909973e-06, "loss": 0.631, "step": 31989 }, { "epoch": 0.9339873288371142, "grad_norm": 0.546626377579667, "learning_rate": 3.6674776966747774e-06, "loss": 0.6171, "step": 31990 }, { "epoch": 0.9340165250649616, "grad_norm": 0.5261931482504016, "learning_rate": 3.6658556366585566e-06, "loss": 0.5531, "step": 31991 }, { "epoch": 0.9340457212928089, "grad_norm": 0.4803845762467292, "learning_rate": 3.6642335766423362e-06, "loss": 0.4811, "step": 31992 }, { "epoch": 0.9340749175206563, "grad_norm": 0.504746761028119, "learning_rate": 3.6626115166261155e-06, "loss": 0.5592, "step": 31993 }, { "epoch": 0.9341041137485037, "grad_norm": 0.5658884354160485, "learning_rate": 3.6609894566098943e-06, "loss": 0.6184, "step": 31994 }, { "epoch": 0.934133309976351, "grad_norm": 0.5217728326806357, "learning_rate": 3.6593673965936743e-06, "loss": 0.5667, "step": 31995 }, { "epoch": 0.9341625062041984, "grad_norm": 0.5577094734084592, "learning_rate": 3.657745336577453e-06, "loss": 0.6314, "step": 31996 }, { "epoch": 0.9341917024320457, "grad_norm": 0.5380652752550928, "learning_rate": 3.656123276561233e-06, "loss": 0.6144, "step": 31997 }, { "epoch": 0.9342208986598931, "grad_norm": 0.5246760339636584, "learning_rate": 3.6545012165450124e-06, "loss": 0.5772, "step": 31998 }, { "epoch": 0.9342500948877405, "grad_norm": 0.5077532835936328, "learning_rate": 3.652879156528792e-06, "loss": 0.5589, "step": 31999 }, { "epoch": 0.9342792911155878, "grad_norm": 0.5247361556301448, "learning_rate": 3.6512570965125713e-06, "loss": 0.5773, "step": 32000 }, { "epoch": 0.9343084873434352, "grad_norm": 0.5182698648670551, "learning_rate": 3.64963503649635e-06, "loss": 0.5544, "step": 32001 }, { "epoch": 0.9343376835712826, "grad_norm": 0.485366240253444, "learning_rate": 3.64801297648013e-06, "loss": 0.5016, "step": 32002 }, { "epoch": 0.9343668797991299, "grad_norm": 0.4891418063334594, "learning_rate": 3.646390916463909e-06, "loss": 0.504, "step": 32003 }, { "epoch": 0.9343960760269773, "grad_norm": 0.544230327544247, "learning_rate": 3.644768856447689e-06, "loss": 0.6139, "step": 32004 }, { "epoch": 0.9344252722548246, "grad_norm": 0.4982118653323019, "learning_rate": 3.6431467964314682e-06, "loss": 0.5335, "step": 32005 }, { "epoch": 0.934454468482672, "grad_norm": 0.5161610074721255, "learning_rate": 3.641524736415248e-06, "loss": 0.5313, "step": 32006 }, { "epoch": 0.9344836647105194, "grad_norm": 0.5103128416845321, "learning_rate": 3.639902676399027e-06, "loss": 0.5241, "step": 32007 }, { "epoch": 0.9345128609383667, "grad_norm": 0.5240228513383272, "learning_rate": 3.638280616382806e-06, "loss": 0.5689, "step": 32008 }, { "epoch": 0.9345420571662141, "grad_norm": 0.5070192156697982, "learning_rate": 3.636658556366586e-06, "loss": 0.5858, "step": 32009 }, { "epoch": 0.9345712533940614, "grad_norm": 0.5086187068057363, "learning_rate": 3.6350364963503648e-06, "loss": 0.5333, "step": 32010 }, { "epoch": 0.9346004496219088, "grad_norm": 0.48688827165218207, "learning_rate": 3.633414436334145e-06, "loss": 0.4988, "step": 32011 }, { "epoch": 0.9346296458497562, "grad_norm": 0.5597413339053217, "learning_rate": 3.631792376317924e-06, "loss": 0.6522, "step": 32012 }, { "epoch": 0.9346588420776035, "grad_norm": 0.48054342211393575, "learning_rate": 3.6301703163017037e-06, "loss": 0.4907, "step": 32013 }, { "epoch": 0.9346880383054509, "grad_norm": 0.537746115006188, "learning_rate": 3.628548256285483e-06, "loss": 0.6072, "step": 32014 }, { "epoch": 0.9347172345332982, "grad_norm": 0.5381055895422509, "learning_rate": 3.6269261962692617e-06, "loss": 0.5916, "step": 32015 }, { "epoch": 0.9347464307611456, "grad_norm": 0.5236189417380896, "learning_rate": 3.6253041362530418e-06, "loss": 0.5423, "step": 32016 }, { "epoch": 0.934775626988993, "grad_norm": 0.544668807325725, "learning_rate": 3.6236820762368206e-06, "loss": 0.5549, "step": 32017 }, { "epoch": 0.9348048232168403, "grad_norm": 0.5123415814090736, "learning_rate": 3.6220600162206007e-06, "loss": 0.5546, "step": 32018 }, { "epoch": 0.9348340194446877, "grad_norm": 0.5580472892443693, "learning_rate": 3.62043795620438e-06, "loss": 0.639, "step": 32019 }, { "epoch": 0.9348632156725352, "grad_norm": 0.5551742085496386, "learning_rate": 3.6188158961881587e-06, "loss": 0.6151, "step": 32020 }, { "epoch": 0.9348924119003825, "grad_norm": 0.5637232528981991, "learning_rate": 3.6171938361719387e-06, "loss": 0.6114, "step": 32021 }, { "epoch": 0.9349216081282299, "grad_norm": 0.5216505978231283, "learning_rate": 3.6155717761557175e-06, "loss": 0.5109, "step": 32022 }, { "epoch": 0.9349508043560772, "grad_norm": 0.5673728706813945, "learning_rate": 3.6139497161394976e-06, "loss": 0.6316, "step": 32023 }, { "epoch": 0.9349800005839246, "grad_norm": 0.5445222588071347, "learning_rate": 3.612327656123277e-06, "loss": 0.5269, "step": 32024 }, { "epoch": 0.935009196811772, "grad_norm": 0.49504722394069905, "learning_rate": 3.6107055961070565e-06, "loss": 0.5629, "step": 32025 }, { "epoch": 0.9350383930396193, "grad_norm": 0.5241852689657407, "learning_rate": 3.6090835360908357e-06, "loss": 0.5537, "step": 32026 }, { "epoch": 0.9350675892674667, "grad_norm": 0.5206487994074215, "learning_rate": 3.6074614760746145e-06, "loss": 0.5422, "step": 32027 }, { "epoch": 0.935096785495314, "grad_norm": 0.5702112269525342, "learning_rate": 3.6058394160583946e-06, "loss": 0.6618, "step": 32028 }, { "epoch": 0.9351259817231614, "grad_norm": 0.47901027306972804, "learning_rate": 3.6042173560421734e-06, "loss": 0.5149, "step": 32029 }, { "epoch": 0.9351551779510088, "grad_norm": 0.517022084982609, "learning_rate": 3.6025952960259534e-06, "loss": 0.5553, "step": 32030 }, { "epoch": 0.9351843741788561, "grad_norm": 0.5407459412956742, "learning_rate": 3.6009732360097326e-06, "loss": 0.5721, "step": 32031 }, { "epoch": 0.9352135704067035, "grad_norm": 0.5332485120976065, "learning_rate": 3.5993511759935123e-06, "loss": 0.5999, "step": 32032 }, { "epoch": 0.9352427666345509, "grad_norm": 0.4969132979649184, "learning_rate": 3.5977291159772915e-06, "loss": 0.5187, "step": 32033 }, { "epoch": 0.9352719628623982, "grad_norm": 0.5333007502574247, "learning_rate": 3.5961070559610703e-06, "loss": 0.538, "step": 32034 }, { "epoch": 0.9353011590902456, "grad_norm": 0.5507138988077447, "learning_rate": 3.5944849959448504e-06, "loss": 0.6426, "step": 32035 }, { "epoch": 0.9353303553180929, "grad_norm": 0.47924713397691526, "learning_rate": 3.592862935928629e-06, "loss": 0.4828, "step": 32036 }, { "epoch": 0.9353595515459403, "grad_norm": 0.5674118929683926, "learning_rate": 3.5912408759124092e-06, "loss": 0.6816, "step": 32037 }, { "epoch": 0.9353887477737877, "grad_norm": 0.5271214785975218, "learning_rate": 3.5896188158961885e-06, "loss": 0.5713, "step": 32038 }, { "epoch": 0.935417944001635, "grad_norm": 0.5039841878061545, "learning_rate": 3.587996755879968e-06, "loss": 0.5394, "step": 32039 }, { "epoch": 0.9354471402294824, "grad_norm": 0.5228195353536729, "learning_rate": 3.5863746958637473e-06, "loss": 0.5786, "step": 32040 }, { "epoch": 0.9354763364573297, "grad_norm": 0.541613873415437, "learning_rate": 3.584752635847526e-06, "loss": 0.6, "step": 32041 }, { "epoch": 0.9355055326851771, "grad_norm": 0.5242839327859696, "learning_rate": 3.583130575831306e-06, "loss": 0.5675, "step": 32042 }, { "epoch": 0.9355347289130245, "grad_norm": 0.5072286254543935, "learning_rate": 3.581508515815085e-06, "loss": 0.5149, "step": 32043 }, { "epoch": 0.9355639251408718, "grad_norm": 0.527728668856466, "learning_rate": 3.579886455798865e-06, "loss": 0.5796, "step": 32044 }, { "epoch": 0.9355931213687192, "grad_norm": 0.5439524668781661, "learning_rate": 3.5782643957826443e-06, "loss": 0.5964, "step": 32045 }, { "epoch": 0.9356223175965666, "grad_norm": 0.6110485876750252, "learning_rate": 3.576642335766423e-06, "loss": 0.6793, "step": 32046 }, { "epoch": 0.9356515138244139, "grad_norm": 0.5347481493327662, "learning_rate": 3.575020275750203e-06, "loss": 0.5631, "step": 32047 }, { "epoch": 0.9356807100522613, "grad_norm": 0.5618925979183004, "learning_rate": 3.573398215733982e-06, "loss": 0.6202, "step": 32048 }, { "epoch": 0.9357099062801086, "grad_norm": 0.5307723031142247, "learning_rate": 3.571776155717762e-06, "loss": 0.5756, "step": 32049 }, { "epoch": 0.935739102507956, "grad_norm": 0.5610180516394996, "learning_rate": 3.570154095701541e-06, "loss": 0.609, "step": 32050 }, { "epoch": 0.9357682987358034, "grad_norm": 0.5359289112031624, "learning_rate": 3.568532035685321e-06, "loss": 0.5814, "step": 32051 }, { "epoch": 0.9357974949636507, "grad_norm": 0.5037408235694208, "learning_rate": 3.5669099756691e-06, "loss": 0.5358, "step": 32052 }, { "epoch": 0.9358266911914981, "grad_norm": 0.5135812957460842, "learning_rate": 3.565287915652879e-06, "loss": 0.5297, "step": 32053 }, { "epoch": 0.9358558874193454, "grad_norm": 0.5236500364725212, "learning_rate": 3.563665855636659e-06, "loss": 0.5702, "step": 32054 }, { "epoch": 0.9358850836471928, "grad_norm": 0.532305540080735, "learning_rate": 3.5620437956204378e-06, "loss": 0.5991, "step": 32055 }, { "epoch": 0.9359142798750402, "grad_norm": 0.5150614833206753, "learning_rate": 3.560421735604218e-06, "loss": 0.5305, "step": 32056 }, { "epoch": 0.9359434761028875, "grad_norm": 0.5532374994676844, "learning_rate": 3.5587996755879966e-06, "loss": 0.6028, "step": 32057 }, { "epoch": 0.9359726723307349, "grad_norm": 0.4906988165958666, "learning_rate": 3.5571776155717767e-06, "loss": 0.54, "step": 32058 }, { "epoch": 0.9360018685585823, "grad_norm": 0.5204891412121198, "learning_rate": 3.555555555555556e-06, "loss": 0.5895, "step": 32059 }, { "epoch": 0.9360310647864296, "grad_norm": 0.5352500387904422, "learning_rate": 3.5539334955393347e-06, "loss": 0.5941, "step": 32060 }, { "epoch": 0.936060261014277, "grad_norm": 0.49070883803045395, "learning_rate": 3.5523114355231148e-06, "loss": 0.5231, "step": 32061 }, { "epoch": 0.9360894572421243, "grad_norm": 0.5243136159552592, "learning_rate": 3.5506893755068936e-06, "loss": 0.5875, "step": 32062 }, { "epoch": 0.9361186534699717, "grad_norm": 0.5300621340993877, "learning_rate": 3.5490673154906736e-06, "loss": 0.5806, "step": 32063 }, { "epoch": 0.9361478496978191, "grad_norm": 0.5346977957591961, "learning_rate": 3.5474452554744524e-06, "loss": 0.5867, "step": 32064 }, { "epoch": 0.9361770459256664, "grad_norm": 0.5419453044695756, "learning_rate": 3.5458231954582325e-06, "loss": 0.575, "step": 32065 }, { "epoch": 0.9362062421535138, "grad_norm": 0.5134403947680535, "learning_rate": 3.5442011354420117e-06, "loss": 0.5767, "step": 32066 }, { "epoch": 0.9362354383813611, "grad_norm": 0.5355655954530952, "learning_rate": 3.5425790754257905e-06, "loss": 0.6224, "step": 32067 }, { "epoch": 0.9362646346092085, "grad_norm": 0.5292218380828856, "learning_rate": 3.5409570154095706e-06, "loss": 0.6088, "step": 32068 }, { "epoch": 0.9362938308370559, "grad_norm": 0.5317030496526447, "learning_rate": 3.5393349553933494e-06, "loss": 0.5705, "step": 32069 }, { "epoch": 0.9363230270649032, "grad_norm": 0.54306154527869, "learning_rate": 3.5377128953771295e-06, "loss": 0.6406, "step": 32070 }, { "epoch": 0.9363522232927506, "grad_norm": 0.5107528663055783, "learning_rate": 3.5360908353609083e-06, "loss": 0.5248, "step": 32071 }, { "epoch": 0.936381419520598, "grad_norm": 0.5317042463494867, "learning_rate": 3.5344687753446883e-06, "loss": 0.5657, "step": 32072 }, { "epoch": 0.9364106157484453, "grad_norm": 0.5866574568293964, "learning_rate": 3.5328467153284675e-06, "loss": 0.6647, "step": 32073 }, { "epoch": 0.9364398119762927, "grad_norm": 0.5582461945241308, "learning_rate": 3.5312246553122463e-06, "loss": 0.6657, "step": 32074 }, { "epoch": 0.93646900820414, "grad_norm": 0.5401512413094856, "learning_rate": 3.5296025952960264e-06, "loss": 0.6144, "step": 32075 }, { "epoch": 0.9364982044319874, "grad_norm": 0.5739652623915834, "learning_rate": 3.527980535279805e-06, "loss": 0.6948, "step": 32076 }, { "epoch": 0.9365274006598348, "grad_norm": 0.5354675969001028, "learning_rate": 3.5263584752635853e-06, "loss": 0.5851, "step": 32077 }, { "epoch": 0.9365565968876821, "grad_norm": 0.5249256124476382, "learning_rate": 3.524736415247364e-06, "loss": 0.5876, "step": 32078 }, { "epoch": 0.9365857931155295, "grad_norm": 0.5851855591619226, "learning_rate": 3.5231143552311433e-06, "loss": 0.6978, "step": 32079 }, { "epoch": 0.9366149893433768, "grad_norm": 0.5238899707825779, "learning_rate": 3.5214922952149234e-06, "loss": 0.5583, "step": 32080 }, { "epoch": 0.9366441855712242, "grad_norm": 0.5359687190222221, "learning_rate": 3.519870235198702e-06, "loss": 0.5391, "step": 32081 }, { "epoch": 0.9366733817990716, "grad_norm": 0.5213254545806418, "learning_rate": 3.5182481751824822e-06, "loss": 0.578, "step": 32082 }, { "epoch": 0.9367025780269189, "grad_norm": 0.5501457352042282, "learning_rate": 3.516626115166261e-06, "loss": 0.601, "step": 32083 }, { "epoch": 0.9367317742547663, "grad_norm": 0.5451831849738771, "learning_rate": 3.515004055150041e-06, "loss": 0.6098, "step": 32084 }, { "epoch": 0.9367609704826136, "grad_norm": 0.5448363491517484, "learning_rate": 3.51338199513382e-06, "loss": 0.6049, "step": 32085 }, { "epoch": 0.936790166710461, "grad_norm": 0.5573297673599724, "learning_rate": 3.511759935117599e-06, "loss": 0.6282, "step": 32086 }, { "epoch": 0.9368193629383084, "grad_norm": 0.546143561242743, "learning_rate": 3.510137875101379e-06, "loss": 0.6138, "step": 32087 }, { "epoch": 0.9368485591661557, "grad_norm": 0.5316414262395689, "learning_rate": 3.508515815085158e-06, "loss": 0.5626, "step": 32088 }, { "epoch": 0.9368777553940031, "grad_norm": 0.5132604408264039, "learning_rate": 3.506893755068938e-06, "loss": 0.5656, "step": 32089 }, { "epoch": 0.9369069516218504, "grad_norm": 0.5559425049861796, "learning_rate": 3.505271695052717e-06, "loss": 0.6362, "step": 32090 }, { "epoch": 0.9369361478496978, "grad_norm": 0.5462735476965355, "learning_rate": 3.503649635036497e-06, "loss": 0.5944, "step": 32091 }, { "epoch": 0.9369653440775452, "grad_norm": 0.5470416341101088, "learning_rate": 3.502027575020276e-06, "loss": 0.5786, "step": 32092 }, { "epoch": 0.9369945403053925, "grad_norm": 0.54672769044323, "learning_rate": 3.500405515004055e-06, "loss": 0.6135, "step": 32093 }, { "epoch": 0.9370237365332399, "grad_norm": 0.5180092691733883, "learning_rate": 3.498783454987835e-06, "loss": 0.5821, "step": 32094 }, { "epoch": 0.9370529327610873, "grad_norm": 0.5609482373458232, "learning_rate": 3.497161394971614e-06, "loss": 0.6779, "step": 32095 }, { "epoch": 0.9370821289889346, "grad_norm": 0.5250309907355445, "learning_rate": 3.495539334955394e-06, "loss": 0.5947, "step": 32096 }, { "epoch": 0.937111325216782, "grad_norm": 0.5547660177684178, "learning_rate": 3.4939172749391727e-06, "loss": 0.627, "step": 32097 }, { "epoch": 0.9371405214446293, "grad_norm": 0.5051260100878822, "learning_rate": 3.4922952149229527e-06, "loss": 0.5184, "step": 32098 }, { "epoch": 0.9371697176724767, "grad_norm": 0.5207448362756352, "learning_rate": 3.490673154906732e-06, "loss": 0.5205, "step": 32099 }, { "epoch": 0.9371989139003241, "grad_norm": 0.5214621105967424, "learning_rate": 3.4890510948905107e-06, "loss": 0.5539, "step": 32100 }, { "epoch": 0.9372281101281714, "grad_norm": 0.5443415649354493, "learning_rate": 3.487429034874291e-06, "loss": 0.6176, "step": 32101 }, { "epoch": 0.9372573063560188, "grad_norm": 0.5492763164540931, "learning_rate": 3.4858069748580696e-06, "loss": 0.5836, "step": 32102 }, { "epoch": 0.9372865025838661, "grad_norm": 0.5315617617622653, "learning_rate": 3.4841849148418497e-06, "loss": 0.5875, "step": 32103 }, { "epoch": 0.9373156988117135, "grad_norm": 0.5327407768471453, "learning_rate": 3.4825628548256285e-06, "loss": 0.5716, "step": 32104 }, { "epoch": 0.9373448950395609, "grad_norm": 0.5196817982585091, "learning_rate": 3.4809407948094077e-06, "loss": 0.5775, "step": 32105 }, { "epoch": 0.9373740912674082, "grad_norm": 0.5184552521406017, "learning_rate": 3.4793187347931878e-06, "loss": 0.5779, "step": 32106 }, { "epoch": 0.9374032874952556, "grad_norm": 0.518182024039938, "learning_rate": 3.4776966747769666e-06, "loss": 0.5773, "step": 32107 }, { "epoch": 0.937432483723103, "grad_norm": 0.5699522110098798, "learning_rate": 3.4760746147607466e-06, "loss": 0.6321, "step": 32108 }, { "epoch": 0.9374616799509503, "grad_norm": 0.619678947712463, "learning_rate": 3.4744525547445254e-06, "loss": 0.6967, "step": 32109 }, { "epoch": 0.9374908761787977, "grad_norm": 0.5260606961577721, "learning_rate": 3.4728304947283055e-06, "loss": 0.5452, "step": 32110 }, { "epoch": 0.937520072406645, "grad_norm": 0.5161964904807667, "learning_rate": 3.4712084347120843e-06, "loss": 0.5434, "step": 32111 }, { "epoch": 0.9375492686344924, "grad_norm": 0.5058808511413475, "learning_rate": 3.4695863746958635e-06, "loss": 0.4995, "step": 32112 }, { "epoch": 0.9375784648623398, "grad_norm": 0.5744787137875397, "learning_rate": 3.4679643146796436e-06, "loss": 0.6159, "step": 32113 }, { "epoch": 0.9376076610901871, "grad_norm": 0.5017992863554983, "learning_rate": 3.4663422546634224e-06, "loss": 0.54, "step": 32114 }, { "epoch": 0.9376368573180345, "grad_norm": 0.5206657204989442, "learning_rate": 3.4647201946472025e-06, "loss": 0.5415, "step": 32115 }, { "epoch": 0.9376660535458818, "grad_norm": 0.5486816173739274, "learning_rate": 3.4630981346309812e-06, "loss": 0.6266, "step": 32116 }, { "epoch": 0.9376952497737292, "grad_norm": 0.4864990455126878, "learning_rate": 3.4614760746147613e-06, "loss": 0.4871, "step": 32117 }, { "epoch": 0.9377244460015766, "grad_norm": 0.5236465661619601, "learning_rate": 3.45985401459854e-06, "loss": 0.5546, "step": 32118 }, { "epoch": 0.9377536422294239, "grad_norm": 0.5483237693690747, "learning_rate": 3.4582319545823193e-06, "loss": 0.6406, "step": 32119 }, { "epoch": 0.9377828384572713, "grad_norm": 0.5766556475469178, "learning_rate": 3.4566098945660994e-06, "loss": 0.6463, "step": 32120 }, { "epoch": 0.9378120346851186, "grad_norm": 0.5081520028447662, "learning_rate": 3.454987834549878e-06, "loss": 0.5082, "step": 32121 }, { "epoch": 0.937841230912966, "grad_norm": 0.5180224892497205, "learning_rate": 3.4533657745336583e-06, "loss": 0.5361, "step": 32122 }, { "epoch": 0.9378704271408134, "grad_norm": 0.5381541436359499, "learning_rate": 3.451743714517437e-06, "loss": 0.5881, "step": 32123 }, { "epoch": 0.9378996233686607, "grad_norm": 0.5313879809748525, "learning_rate": 3.450121654501217e-06, "loss": 0.575, "step": 32124 }, { "epoch": 0.9379288195965081, "grad_norm": 0.5224960171041416, "learning_rate": 3.448499594484996e-06, "loss": 0.5763, "step": 32125 }, { "epoch": 0.9379580158243555, "grad_norm": 0.56046548934539, "learning_rate": 3.446877534468775e-06, "loss": 0.6186, "step": 32126 }, { "epoch": 0.9379872120522028, "grad_norm": 0.5231976998626056, "learning_rate": 3.4452554744525552e-06, "loss": 0.5596, "step": 32127 }, { "epoch": 0.9380164082800502, "grad_norm": 0.5768459685974061, "learning_rate": 3.443633414436334e-06, "loss": 0.6873, "step": 32128 }, { "epoch": 0.9380456045078975, "grad_norm": 0.508590403936106, "learning_rate": 3.442011354420114e-06, "loss": 0.556, "step": 32129 }, { "epoch": 0.9380748007357449, "grad_norm": 0.5368010309586678, "learning_rate": 3.440389294403893e-06, "loss": 0.5671, "step": 32130 }, { "epoch": 0.9381039969635923, "grad_norm": 0.5619215548016088, "learning_rate": 3.438767234387673e-06, "loss": 0.6371, "step": 32131 }, { "epoch": 0.9381331931914396, "grad_norm": 0.5599659521468091, "learning_rate": 3.4371451743714517e-06, "loss": 0.6384, "step": 32132 }, { "epoch": 0.938162389419287, "grad_norm": 0.4924333995952595, "learning_rate": 3.435523114355231e-06, "loss": 0.5143, "step": 32133 }, { "epoch": 0.9381915856471343, "grad_norm": 0.5494636694417273, "learning_rate": 3.433901054339011e-06, "loss": 0.6479, "step": 32134 }, { "epoch": 0.9382207818749817, "grad_norm": 0.5392537671208242, "learning_rate": 3.43227899432279e-06, "loss": 0.6012, "step": 32135 }, { "epoch": 0.9382499781028291, "grad_norm": 0.5007413539488929, "learning_rate": 3.43065693430657e-06, "loss": 0.4899, "step": 32136 }, { "epoch": 0.9382791743306764, "grad_norm": 0.5385868628170836, "learning_rate": 3.4290348742903487e-06, "loss": 0.5661, "step": 32137 }, { "epoch": 0.9383083705585238, "grad_norm": 0.530852707752864, "learning_rate": 3.427412814274128e-06, "loss": 0.5606, "step": 32138 }, { "epoch": 0.9383375667863711, "grad_norm": 0.5172606156311178, "learning_rate": 3.4257907542579076e-06, "loss": 0.5687, "step": 32139 }, { "epoch": 0.9383667630142185, "grad_norm": 0.5241024162867156, "learning_rate": 3.4241686942416868e-06, "loss": 0.5598, "step": 32140 }, { "epoch": 0.938395959242066, "grad_norm": 0.5685066518994999, "learning_rate": 3.422546634225467e-06, "loss": 0.645, "step": 32141 }, { "epoch": 0.9384251554699133, "grad_norm": 0.532807222969267, "learning_rate": 3.4209245742092457e-06, "loss": 0.6271, "step": 32142 }, { "epoch": 0.9384543516977607, "grad_norm": 0.5505962838082282, "learning_rate": 3.4193025141930257e-06, "loss": 0.601, "step": 32143 }, { "epoch": 0.9384835479256081, "grad_norm": 0.5399276202298158, "learning_rate": 3.4176804541768045e-06, "loss": 0.5863, "step": 32144 }, { "epoch": 0.9385127441534554, "grad_norm": 0.6080241018498947, "learning_rate": 3.4160583941605837e-06, "loss": 0.6866, "step": 32145 }, { "epoch": 0.9385419403813028, "grad_norm": 0.5331522783853172, "learning_rate": 3.4144363341443634e-06, "loss": 0.5975, "step": 32146 }, { "epoch": 0.9385711366091501, "grad_norm": 0.4931153492671791, "learning_rate": 3.4128142741281426e-06, "loss": 0.5283, "step": 32147 }, { "epoch": 0.9386003328369975, "grad_norm": 0.5447536325549343, "learning_rate": 3.4111922141119227e-06, "loss": 0.6142, "step": 32148 }, { "epoch": 0.9386295290648449, "grad_norm": 0.5770709891606768, "learning_rate": 3.4095701540957015e-06, "loss": 0.5962, "step": 32149 }, { "epoch": 0.9386587252926922, "grad_norm": 0.5285948521850786, "learning_rate": 3.4079480940794815e-06, "loss": 0.5695, "step": 32150 }, { "epoch": 0.9386879215205396, "grad_norm": 0.4953508678411906, "learning_rate": 3.4063260340632603e-06, "loss": 0.5398, "step": 32151 }, { "epoch": 0.938717117748387, "grad_norm": 0.5265328607938802, "learning_rate": 3.4047039740470396e-06, "loss": 0.5818, "step": 32152 }, { "epoch": 0.9387463139762343, "grad_norm": 0.5113989630582703, "learning_rate": 3.4030819140308196e-06, "loss": 0.5489, "step": 32153 }, { "epoch": 0.9387755102040817, "grad_norm": 0.4801395266568279, "learning_rate": 3.4014598540145984e-06, "loss": 0.4992, "step": 32154 }, { "epoch": 0.938804706431929, "grad_norm": 0.509187735918434, "learning_rate": 3.3998377939983785e-06, "loss": 0.5582, "step": 32155 }, { "epoch": 0.9388339026597764, "grad_norm": 0.5159834369069481, "learning_rate": 3.3982157339821573e-06, "loss": 0.5132, "step": 32156 }, { "epoch": 0.9388630988876238, "grad_norm": 0.5210687107451594, "learning_rate": 3.3965936739659374e-06, "loss": 0.5401, "step": 32157 }, { "epoch": 0.9388922951154711, "grad_norm": 0.5076720267640319, "learning_rate": 3.394971613949716e-06, "loss": 0.5445, "step": 32158 }, { "epoch": 0.9389214913433185, "grad_norm": 0.5505837124981019, "learning_rate": 3.3933495539334954e-06, "loss": 0.6204, "step": 32159 }, { "epoch": 0.9389506875711658, "grad_norm": 0.4993306362550523, "learning_rate": 3.3917274939172754e-06, "loss": 0.5485, "step": 32160 }, { "epoch": 0.9389798837990132, "grad_norm": 0.5379690666262247, "learning_rate": 3.3901054339010542e-06, "loss": 0.6303, "step": 32161 }, { "epoch": 0.9390090800268606, "grad_norm": 0.5270952357817636, "learning_rate": 3.3884833738848343e-06, "loss": 0.575, "step": 32162 }, { "epoch": 0.9390382762547079, "grad_norm": 0.518872672050941, "learning_rate": 3.386861313868613e-06, "loss": 0.5301, "step": 32163 }, { "epoch": 0.9390674724825553, "grad_norm": 0.551170976468973, "learning_rate": 3.385239253852393e-06, "loss": 0.6004, "step": 32164 }, { "epoch": 0.9390966687104026, "grad_norm": 0.5866809917543558, "learning_rate": 3.383617193836172e-06, "loss": 0.6726, "step": 32165 }, { "epoch": 0.93912586493825, "grad_norm": 0.5249649810097293, "learning_rate": 3.381995133819951e-06, "loss": 0.5785, "step": 32166 }, { "epoch": 0.9391550611660974, "grad_norm": 0.5296102116957038, "learning_rate": 3.3803730738037313e-06, "loss": 0.548, "step": 32167 }, { "epoch": 0.9391842573939447, "grad_norm": 0.4984080294785268, "learning_rate": 3.37875101378751e-06, "loss": 0.5049, "step": 32168 }, { "epoch": 0.9392134536217921, "grad_norm": 0.533207691138336, "learning_rate": 3.37712895377129e-06, "loss": 0.5919, "step": 32169 }, { "epoch": 0.9392426498496395, "grad_norm": 0.502001402703786, "learning_rate": 3.375506893755069e-06, "loss": 0.493, "step": 32170 }, { "epoch": 0.9392718460774868, "grad_norm": 0.5449032612856723, "learning_rate": 3.373884833738848e-06, "loss": 0.6385, "step": 32171 }, { "epoch": 0.9393010423053342, "grad_norm": 0.5284564539547931, "learning_rate": 3.3722627737226278e-06, "loss": 0.5934, "step": 32172 }, { "epoch": 0.9393302385331815, "grad_norm": 0.5335409099413257, "learning_rate": 3.370640713706407e-06, "loss": 0.6172, "step": 32173 }, { "epoch": 0.9393594347610289, "grad_norm": 0.53558555466934, "learning_rate": 3.369018653690187e-06, "loss": 0.5858, "step": 32174 }, { "epoch": 0.9393886309888763, "grad_norm": 0.5824993626162617, "learning_rate": 3.367396593673966e-06, "loss": 0.6137, "step": 32175 }, { "epoch": 0.9394178272167236, "grad_norm": 0.5375576520832217, "learning_rate": 3.365774533657746e-06, "loss": 0.5997, "step": 32176 }, { "epoch": 0.939447023444571, "grad_norm": 0.5507819953271627, "learning_rate": 3.3641524736415247e-06, "loss": 0.5931, "step": 32177 }, { "epoch": 0.9394762196724183, "grad_norm": 0.48554710084248576, "learning_rate": 3.362530413625304e-06, "loss": 0.5128, "step": 32178 }, { "epoch": 0.9395054159002657, "grad_norm": 0.5032397619533611, "learning_rate": 3.3609083536090836e-06, "loss": 0.5542, "step": 32179 }, { "epoch": 0.9395346121281131, "grad_norm": 0.5293608999622847, "learning_rate": 3.359286293592863e-06, "loss": 0.5321, "step": 32180 }, { "epoch": 0.9395638083559604, "grad_norm": 0.5548676234626403, "learning_rate": 3.357664233576643e-06, "loss": 0.6091, "step": 32181 }, { "epoch": 0.9395930045838078, "grad_norm": 0.5446287219717447, "learning_rate": 3.3560421735604217e-06, "loss": 0.5957, "step": 32182 }, { "epoch": 0.9396222008116552, "grad_norm": 0.526217457756205, "learning_rate": 3.3544201135442018e-06, "loss": 0.589, "step": 32183 }, { "epoch": 0.9396513970395025, "grad_norm": 0.5531463784391729, "learning_rate": 3.3527980535279806e-06, "loss": 0.5931, "step": 32184 }, { "epoch": 0.9396805932673499, "grad_norm": 0.5216156411329212, "learning_rate": 3.3511759935117598e-06, "loss": 0.5582, "step": 32185 }, { "epoch": 0.9397097894951972, "grad_norm": 0.5170696088151837, "learning_rate": 3.3495539334955394e-06, "loss": 0.5944, "step": 32186 }, { "epoch": 0.9397389857230446, "grad_norm": 0.533896288332725, "learning_rate": 3.3479318734793186e-06, "loss": 0.5953, "step": 32187 }, { "epoch": 0.939768181950892, "grad_norm": 0.49319260472227194, "learning_rate": 3.3463098134630987e-06, "loss": 0.5034, "step": 32188 }, { "epoch": 0.9397973781787393, "grad_norm": 0.5161351252551574, "learning_rate": 3.3446877534468775e-06, "loss": 0.5554, "step": 32189 }, { "epoch": 0.9398265744065867, "grad_norm": 0.5262938312684561, "learning_rate": 3.3430656934306576e-06, "loss": 0.5668, "step": 32190 }, { "epoch": 0.939855770634434, "grad_norm": 0.5428452818084186, "learning_rate": 3.3414436334144364e-06, "loss": 0.5725, "step": 32191 }, { "epoch": 0.9398849668622814, "grad_norm": 0.5434047393336994, "learning_rate": 3.3398215733982156e-06, "loss": 0.6252, "step": 32192 }, { "epoch": 0.9399141630901288, "grad_norm": 0.525799599971682, "learning_rate": 3.3381995133819952e-06, "loss": 0.6004, "step": 32193 }, { "epoch": 0.9399433593179761, "grad_norm": 0.5792079269181977, "learning_rate": 3.3365774533657745e-06, "loss": 0.6155, "step": 32194 }, { "epoch": 0.9399725555458235, "grad_norm": 0.48022511447209, "learning_rate": 3.3349553933495545e-06, "loss": 0.5132, "step": 32195 }, { "epoch": 0.9400017517736708, "grad_norm": 0.5280893712137249, "learning_rate": 3.3333333333333333e-06, "loss": 0.563, "step": 32196 }, { "epoch": 0.9400309480015182, "grad_norm": 0.5122515280167718, "learning_rate": 3.3317112733171125e-06, "loss": 0.5704, "step": 32197 }, { "epoch": 0.9400601442293656, "grad_norm": 0.5104689397069297, "learning_rate": 3.330089213300892e-06, "loss": 0.5521, "step": 32198 }, { "epoch": 0.9400893404572129, "grad_norm": 0.5011729474982138, "learning_rate": 3.3284671532846714e-06, "loss": 0.5067, "step": 32199 }, { "epoch": 0.9401185366850603, "grad_norm": 0.5171515126534144, "learning_rate": 3.326845093268451e-06, "loss": 0.5345, "step": 32200 }, { "epoch": 0.9401477329129077, "grad_norm": 0.5470908099941381, "learning_rate": 3.3252230332522303e-06, "loss": 0.5946, "step": 32201 }, { "epoch": 0.940176929140755, "grad_norm": 0.5383463363414716, "learning_rate": 3.3236009732360103e-06, "loss": 0.6324, "step": 32202 }, { "epoch": 0.9402061253686024, "grad_norm": 0.5323240213266108, "learning_rate": 3.321978913219789e-06, "loss": 0.5967, "step": 32203 }, { "epoch": 0.9402353215964497, "grad_norm": 0.5468115047428839, "learning_rate": 3.3203568532035684e-06, "loss": 0.6368, "step": 32204 }, { "epoch": 0.9402645178242971, "grad_norm": 0.4926992450841269, "learning_rate": 3.318734793187348e-06, "loss": 0.5009, "step": 32205 }, { "epoch": 0.9402937140521445, "grad_norm": 0.4752571012243361, "learning_rate": 3.3171127331711272e-06, "loss": 0.5034, "step": 32206 }, { "epoch": 0.9403229102799918, "grad_norm": 0.5190638536263897, "learning_rate": 3.315490673154907e-06, "loss": 0.5508, "step": 32207 }, { "epoch": 0.9403521065078392, "grad_norm": 0.5601417175735236, "learning_rate": 3.313868613138686e-06, "loss": 0.6309, "step": 32208 }, { "epoch": 0.9403813027356865, "grad_norm": 0.4991455049965386, "learning_rate": 3.312246553122466e-06, "loss": 0.5541, "step": 32209 }, { "epoch": 0.9404104989635339, "grad_norm": 0.550291674327772, "learning_rate": 3.310624493106245e-06, "loss": 0.6494, "step": 32210 }, { "epoch": 0.9404396951913813, "grad_norm": 0.5732595284625942, "learning_rate": 3.309002433090024e-06, "loss": 0.6308, "step": 32211 }, { "epoch": 0.9404688914192286, "grad_norm": 0.5302018443133496, "learning_rate": 3.307380373073804e-06, "loss": 0.5416, "step": 32212 }, { "epoch": 0.940498087647076, "grad_norm": 0.5290761800003612, "learning_rate": 3.305758313057583e-06, "loss": 0.5958, "step": 32213 }, { "epoch": 0.9405272838749233, "grad_norm": 0.5260368258214957, "learning_rate": 3.3041362530413627e-06, "loss": 0.5583, "step": 32214 }, { "epoch": 0.9405564801027707, "grad_norm": 0.5078708158586608, "learning_rate": 3.302514193025142e-06, "loss": 0.5396, "step": 32215 }, { "epoch": 0.9405856763306181, "grad_norm": 0.5594364707441897, "learning_rate": 3.300892133008922e-06, "loss": 0.6159, "step": 32216 }, { "epoch": 0.9406148725584654, "grad_norm": 0.5556208891627845, "learning_rate": 3.2992700729927008e-06, "loss": 0.6249, "step": 32217 }, { "epoch": 0.9406440687863128, "grad_norm": 0.560053336086033, "learning_rate": 3.29764801297648e-06, "loss": 0.6128, "step": 32218 }, { "epoch": 0.9406732650141602, "grad_norm": 0.5299649693022728, "learning_rate": 3.2960259529602596e-06, "loss": 0.5736, "step": 32219 }, { "epoch": 0.9407024612420075, "grad_norm": 0.4971165142406828, "learning_rate": 3.294403892944039e-06, "loss": 0.4928, "step": 32220 }, { "epoch": 0.9407316574698549, "grad_norm": 0.5689440693613461, "learning_rate": 3.292781832927819e-06, "loss": 0.6846, "step": 32221 }, { "epoch": 0.9407608536977022, "grad_norm": 0.5259721848129855, "learning_rate": 3.2911597729115977e-06, "loss": 0.5734, "step": 32222 }, { "epoch": 0.9407900499255496, "grad_norm": 0.5299831323808301, "learning_rate": 3.289537712895378e-06, "loss": 0.5885, "step": 32223 }, { "epoch": 0.940819246153397, "grad_norm": 0.568855999509127, "learning_rate": 3.2879156528791566e-06, "loss": 0.6653, "step": 32224 }, { "epoch": 0.9408484423812443, "grad_norm": 0.5077378223865311, "learning_rate": 3.286293592862936e-06, "loss": 0.5375, "step": 32225 }, { "epoch": 0.9408776386090917, "grad_norm": 0.627286595369404, "learning_rate": 3.2846715328467155e-06, "loss": 0.6595, "step": 32226 }, { "epoch": 0.940906834836939, "grad_norm": 0.5632014082341562, "learning_rate": 3.2830494728304947e-06, "loss": 0.6232, "step": 32227 }, { "epoch": 0.9409360310647864, "grad_norm": 0.4751125600915832, "learning_rate": 3.2814274128142747e-06, "loss": 0.4839, "step": 32228 }, { "epoch": 0.9409652272926338, "grad_norm": 0.5505001970752912, "learning_rate": 3.2798053527980535e-06, "loss": 0.6292, "step": 32229 }, { "epoch": 0.9409944235204811, "grad_norm": 0.4923207881162828, "learning_rate": 3.2781832927818328e-06, "loss": 0.4644, "step": 32230 }, { "epoch": 0.9410236197483285, "grad_norm": 0.5555689688914421, "learning_rate": 3.2765612327656124e-06, "loss": 0.614, "step": 32231 }, { "epoch": 0.9410528159761758, "grad_norm": 0.5940574260330065, "learning_rate": 3.2749391727493916e-06, "loss": 0.6815, "step": 32232 }, { "epoch": 0.9410820122040232, "grad_norm": 0.5436450187855838, "learning_rate": 3.2733171127331713e-06, "loss": 0.6056, "step": 32233 }, { "epoch": 0.9411112084318706, "grad_norm": 0.5638044371065275, "learning_rate": 3.2716950527169505e-06, "loss": 0.5986, "step": 32234 }, { "epoch": 0.9411404046597179, "grad_norm": 0.556931185473386, "learning_rate": 3.2700729927007306e-06, "loss": 0.6247, "step": 32235 }, { "epoch": 0.9411696008875653, "grad_norm": 0.5138532165664527, "learning_rate": 3.2684509326845094e-06, "loss": 0.5684, "step": 32236 }, { "epoch": 0.9411987971154127, "grad_norm": 0.5414082381633246, "learning_rate": 3.2668288726682886e-06, "loss": 0.585, "step": 32237 }, { "epoch": 0.94122799334326, "grad_norm": 0.567339767730475, "learning_rate": 3.2652068126520682e-06, "loss": 0.6447, "step": 32238 }, { "epoch": 0.9412571895711074, "grad_norm": 0.5163918386318067, "learning_rate": 3.2635847526358474e-06, "loss": 0.5563, "step": 32239 }, { "epoch": 0.9412863857989547, "grad_norm": 0.5295659090267372, "learning_rate": 3.261962692619627e-06, "loss": 0.5928, "step": 32240 }, { "epoch": 0.9413155820268021, "grad_norm": 0.5342765050925842, "learning_rate": 3.2603406326034063e-06, "loss": 0.6075, "step": 32241 }, { "epoch": 0.9413447782546495, "grad_norm": 0.5726050962999014, "learning_rate": 3.2587185725871864e-06, "loss": 0.6441, "step": 32242 }, { "epoch": 0.9413739744824968, "grad_norm": 0.4769983176144904, "learning_rate": 3.257096512570965e-06, "loss": 0.496, "step": 32243 }, { "epoch": 0.9414031707103442, "grad_norm": 0.5222252017979668, "learning_rate": 3.2554744525547444e-06, "loss": 0.5537, "step": 32244 }, { "epoch": 0.9414323669381915, "grad_norm": 0.5366628906325461, "learning_rate": 3.253852392538524e-06, "loss": 0.57, "step": 32245 }, { "epoch": 0.9414615631660389, "grad_norm": 0.5195806180958923, "learning_rate": 3.2522303325223033e-06, "loss": 0.5529, "step": 32246 }, { "epoch": 0.9414907593938863, "grad_norm": 0.5126783484694845, "learning_rate": 3.250608272506083e-06, "loss": 0.5296, "step": 32247 }, { "epoch": 0.9415199556217336, "grad_norm": 0.5737210751291107, "learning_rate": 3.248986212489862e-06, "loss": 0.7008, "step": 32248 }, { "epoch": 0.941549151849581, "grad_norm": 0.5121755388661093, "learning_rate": 3.247364152473642e-06, "loss": 0.5526, "step": 32249 }, { "epoch": 0.9415783480774284, "grad_norm": 0.534837083475233, "learning_rate": 3.245742092457421e-06, "loss": 0.6139, "step": 32250 }, { "epoch": 0.9416075443052757, "grad_norm": 0.5253023377194862, "learning_rate": 3.2441200324412002e-06, "loss": 0.5539, "step": 32251 }, { "epoch": 0.9416367405331231, "grad_norm": 0.4963609636097783, "learning_rate": 3.24249797242498e-06, "loss": 0.5092, "step": 32252 }, { "epoch": 0.9416659367609704, "grad_norm": 0.5033749331100246, "learning_rate": 3.240875912408759e-06, "loss": 0.5563, "step": 32253 }, { "epoch": 0.9416951329888178, "grad_norm": 0.5526578056785664, "learning_rate": 3.2392538523925387e-06, "loss": 0.6077, "step": 32254 }, { "epoch": 0.9417243292166652, "grad_norm": 0.505549324592212, "learning_rate": 3.237631792376318e-06, "loss": 0.536, "step": 32255 }, { "epoch": 0.9417535254445125, "grad_norm": 0.5733794811127589, "learning_rate": 3.236009732360097e-06, "loss": 0.6594, "step": 32256 }, { "epoch": 0.9417827216723599, "grad_norm": 0.5259278578120546, "learning_rate": 3.234387672343877e-06, "loss": 0.6011, "step": 32257 }, { "epoch": 0.9418119179002072, "grad_norm": 0.5586934009553823, "learning_rate": 3.232765612327656e-06, "loss": 0.6453, "step": 32258 }, { "epoch": 0.9418411141280546, "grad_norm": 0.5193764009160345, "learning_rate": 3.2311435523114357e-06, "loss": 0.5423, "step": 32259 }, { "epoch": 0.941870310355902, "grad_norm": 0.5126249871752853, "learning_rate": 3.229521492295215e-06, "loss": 0.5543, "step": 32260 }, { "epoch": 0.9418995065837493, "grad_norm": 0.49931854906131645, "learning_rate": 3.2278994322789945e-06, "loss": 0.5172, "step": 32261 }, { "epoch": 0.9419287028115968, "grad_norm": 0.5190635357788932, "learning_rate": 3.2262773722627738e-06, "loss": 0.558, "step": 32262 }, { "epoch": 0.9419578990394442, "grad_norm": 0.5220884863451851, "learning_rate": 3.224655312246553e-06, "loss": 0.5619, "step": 32263 }, { "epoch": 0.9419870952672915, "grad_norm": 0.5392902894203735, "learning_rate": 3.2230332522303326e-06, "loss": 0.5745, "step": 32264 }, { "epoch": 0.9420162914951389, "grad_norm": 0.5393740469406838, "learning_rate": 3.221411192214112e-06, "loss": 0.6137, "step": 32265 }, { "epoch": 0.9420454877229862, "grad_norm": 0.5029759347797269, "learning_rate": 3.2197891321978915e-06, "loss": 0.5094, "step": 32266 }, { "epoch": 0.9420746839508336, "grad_norm": 0.5542620633194563, "learning_rate": 3.2181670721816707e-06, "loss": 0.6821, "step": 32267 }, { "epoch": 0.942103880178681, "grad_norm": 0.5465205182604713, "learning_rate": 3.2165450121654504e-06, "loss": 0.5897, "step": 32268 }, { "epoch": 0.9421330764065283, "grad_norm": 0.5498457174641519, "learning_rate": 3.2149229521492296e-06, "loss": 0.6044, "step": 32269 }, { "epoch": 0.9421622726343757, "grad_norm": 0.5313674284569994, "learning_rate": 3.213300892133009e-06, "loss": 0.6023, "step": 32270 }, { "epoch": 0.942191468862223, "grad_norm": 0.5062856496559575, "learning_rate": 3.2116788321167884e-06, "loss": 0.5172, "step": 32271 }, { "epoch": 0.9422206650900704, "grad_norm": 0.5385784532135786, "learning_rate": 3.2100567721005677e-06, "loss": 0.561, "step": 32272 }, { "epoch": 0.9422498613179178, "grad_norm": 0.5425541054328534, "learning_rate": 3.2084347120843473e-06, "loss": 0.5864, "step": 32273 }, { "epoch": 0.9422790575457651, "grad_norm": 0.5212521741991806, "learning_rate": 3.2068126520681265e-06, "loss": 0.557, "step": 32274 }, { "epoch": 0.9423082537736125, "grad_norm": 0.5882494770156771, "learning_rate": 3.205190592051906e-06, "loss": 0.5893, "step": 32275 }, { "epoch": 0.9423374500014599, "grad_norm": 0.5631052597702288, "learning_rate": 3.2035685320356854e-06, "loss": 0.6538, "step": 32276 }, { "epoch": 0.9423666462293072, "grad_norm": 0.469223777918769, "learning_rate": 3.2019464720194646e-06, "loss": 0.4729, "step": 32277 }, { "epoch": 0.9423958424571546, "grad_norm": 0.5903690502682193, "learning_rate": 3.2003244120032443e-06, "loss": 0.6743, "step": 32278 }, { "epoch": 0.9424250386850019, "grad_norm": 0.5413523026893842, "learning_rate": 3.1987023519870235e-06, "loss": 0.6023, "step": 32279 }, { "epoch": 0.9424542349128493, "grad_norm": 0.5072378203748878, "learning_rate": 3.197080291970803e-06, "loss": 0.5269, "step": 32280 }, { "epoch": 0.9424834311406967, "grad_norm": 0.5554695108158475, "learning_rate": 3.1954582319545824e-06, "loss": 0.6154, "step": 32281 }, { "epoch": 0.942512627368544, "grad_norm": 0.4972723269551705, "learning_rate": 3.1938361719383624e-06, "loss": 0.532, "step": 32282 }, { "epoch": 0.9425418235963914, "grad_norm": 0.4858398098758508, "learning_rate": 3.1922141119221412e-06, "loss": 0.5228, "step": 32283 }, { "epoch": 0.9425710198242387, "grad_norm": 0.5338057142056322, "learning_rate": 3.1905920519059204e-06, "loss": 0.5813, "step": 32284 }, { "epoch": 0.9426002160520861, "grad_norm": 0.5470839844965295, "learning_rate": 3.1889699918897e-06, "loss": 0.5999, "step": 32285 }, { "epoch": 0.9426294122799335, "grad_norm": 0.534991764471518, "learning_rate": 3.1873479318734793e-06, "loss": 0.6062, "step": 32286 }, { "epoch": 0.9426586085077808, "grad_norm": 0.5906824525171126, "learning_rate": 3.185725871857259e-06, "loss": 0.7128, "step": 32287 }, { "epoch": 0.9426878047356282, "grad_norm": 0.5246865261525341, "learning_rate": 3.184103811841038e-06, "loss": 0.5592, "step": 32288 }, { "epoch": 0.9427170009634755, "grad_norm": 0.5315462367076781, "learning_rate": 3.1824817518248174e-06, "loss": 0.566, "step": 32289 }, { "epoch": 0.9427461971913229, "grad_norm": 0.5396082549473973, "learning_rate": 3.180859691808597e-06, "loss": 0.5967, "step": 32290 }, { "epoch": 0.9427753934191703, "grad_norm": 0.5274475530820991, "learning_rate": 3.1792376317923763e-06, "loss": 0.5583, "step": 32291 }, { "epoch": 0.9428045896470176, "grad_norm": 0.505938114486207, "learning_rate": 3.177615571776156e-06, "loss": 0.5324, "step": 32292 }, { "epoch": 0.942833785874865, "grad_norm": 0.5372983858136244, "learning_rate": 3.175993511759935e-06, "loss": 0.6219, "step": 32293 }, { "epoch": 0.9428629821027124, "grad_norm": 0.5217261934655979, "learning_rate": 3.1743714517437148e-06, "loss": 0.5225, "step": 32294 }, { "epoch": 0.9428921783305597, "grad_norm": 0.5282447999560587, "learning_rate": 3.172749391727494e-06, "loss": 0.5811, "step": 32295 }, { "epoch": 0.9429213745584071, "grad_norm": 0.5373886196383637, "learning_rate": 3.171127331711273e-06, "loss": 0.6189, "step": 32296 }, { "epoch": 0.9429505707862544, "grad_norm": 0.5703731937222348, "learning_rate": 3.169505271695053e-06, "loss": 0.6629, "step": 32297 }, { "epoch": 0.9429797670141018, "grad_norm": 0.4898739675365496, "learning_rate": 3.167883211678832e-06, "loss": 0.4792, "step": 32298 }, { "epoch": 0.9430089632419492, "grad_norm": 0.5417824948433138, "learning_rate": 3.1662611516626117e-06, "loss": 0.6311, "step": 32299 }, { "epoch": 0.9430381594697965, "grad_norm": 0.5043652496706669, "learning_rate": 3.164639091646391e-06, "loss": 0.5318, "step": 32300 }, { "epoch": 0.9430673556976439, "grad_norm": 0.54900343195635, "learning_rate": 3.1630170316301706e-06, "loss": 0.6273, "step": 32301 }, { "epoch": 0.9430965519254912, "grad_norm": 0.5048222800741956, "learning_rate": 3.16139497161395e-06, "loss": 0.5467, "step": 32302 }, { "epoch": 0.9431257481533386, "grad_norm": 0.5316817051492532, "learning_rate": 3.159772911597729e-06, "loss": 0.5991, "step": 32303 }, { "epoch": 0.943154944381186, "grad_norm": 0.5585428109403019, "learning_rate": 3.1581508515815087e-06, "loss": 0.6319, "step": 32304 }, { "epoch": 0.9431841406090333, "grad_norm": 0.5701549192279117, "learning_rate": 3.156528791565288e-06, "loss": 0.6205, "step": 32305 }, { "epoch": 0.9432133368368807, "grad_norm": 0.48532759532099556, "learning_rate": 3.1549067315490675e-06, "loss": 0.5158, "step": 32306 }, { "epoch": 0.943242533064728, "grad_norm": 0.5641538219003879, "learning_rate": 3.1532846715328468e-06, "loss": 0.6243, "step": 32307 }, { "epoch": 0.9432717292925754, "grad_norm": 0.5128418743758749, "learning_rate": 3.1516626115166264e-06, "loss": 0.5215, "step": 32308 }, { "epoch": 0.9433009255204228, "grad_norm": 0.4927404898271445, "learning_rate": 3.1500405515004056e-06, "loss": 0.499, "step": 32309 }, { "epoch": 0.9433301217482701, "grad_norm": 0.5559181305180092, "learning_rate": 3.148418491484185e-06, "loss": 0.5949, "step": 32310 }, { "epoch": 0.9433593179761175, "grad_norm": 0.5464855040438614, "learning_rate": 3.1467964314679645e-06, "loss": 0.6334, "step": 32311 }, { "epoch": 0.9433885142039649, "grad_norm": 0.5421991029187502, "learning_rate": 3.1451743714517437e-06, "loss": 0.5796, "step": 32312 }, { "epoch": 0.9434177104318122, "grad_norm": 0.5008265315148621, "learning_rate": 3.1435523114355234e-06, "loss": 0.5415, "step": 32313 }, { "epoch": 0.9434469066596596, "grad_norm": 0.5587970753521595, "learning_rate": 3.1419302514193026e-06, "loss": 0.6286, "step": 32314 }, { "epoch": 0.9434761028875069, "grad_norm": 0.542609978865544, "learning_rate": 3.1403081914030822e-06, "loss": 0.5604, "step": 32315 }, { "epoch": 0.9435052991153543, "grad_norm": 0.5371336810473919, "learning_rate": 3.1386861313868614e-06, "loss": 0.5541, "step": 32316 }, { "epoch": 0.9435344953432017, "grad_norm": 0.556266115234386, "learning_rate": 3.1370640713706407e-06, "loss": 0.6271, "step": 32317 }, { "epoch": 0.943563691571049, "grad_norm": 0.571833911069401, "learning_rate": 3.1354420113544203e-06, "loss": 0.6689, "step": 32318 }, { "epoch": 0.9435928877988964, "grad_norm": 0.5318529453578185, "learning_rate": 3.1338199513381995e-06, "loss": 0.5405, "step": 32319 }, { "epoch": 0.9436220840267437, "grad_norm": 0.5699495684686243, "learning_rate": 3.132197891321979e-06, "loss": 0.583, "step": 32320 }, { "epoch": 0.9436512802545911, "grad_norm": 0.5115453694560493, "learning_rate": 3.1305758313057584e-06, "loss": 0.5404, "step": 32321 }, { "epoch": 0.9436804764824385, "grad_norm": 0.5386514832202461, "learning_rate": 3.1289537712895376e-06, "loss": 0.5726, "step": 32322 }, { "epoch": 0.9437096727102858, "grad_norm": 0.5788767457904888, "learning_rate": 3.1273317112733173e-06, "loss": 0.6113, "step": 32323 }, { "epoch": 0.9437388689381332, "grad_norm": 0.5325619620552698, "learning_rate": 3.1257096512570965e-06, "loss": 0.5817, "step": 32324 }, { "epoch": 0.9437680651659806, "grad_norm": 0.5468118046213594, "learning_rate": 3.1240875912408757e-06, "loss": 0.6134, "step": 32325 }, { "epoch": 0.9437972613938279, "grad_norm": 0.4929713412233732, "learning_rate": 3.1224655312246553e-06, "loss": 0.5313, "step": 32326 }, { "epoch": 0.9438264576216753, "grad_norm": 0.5344703821960947, "learning_rate": 3.120843471208435e-06, "loss": 0.5904, "step": 32327 }, { "epoch": 0.9438556538495226, "grad_norm": 0.535559345062675, "learning_rate": 3.119221411192214e-06, "loss": 0.5798, "step": 32328 }, { "epoch": 0.94388485007737, "grad_norm": 0.5491659731302089, "learning_rate": 3.117599351175994e-06, "loss": 0.5831, "step": 32329 }, { "epoch": 0.9439140463052174, "grad_norm": 0.49798644284659327, "learning_rate": 3.115977291159773e-06, "loss": 0.5088, "step": 32330 }, { "epoch": 0.9439432425330647, "grad_norm": 0.4930525291477692, "learning_rate": 3.1143552311435527e-06, "loss": 0.5194, "step": 32331 }, { "epoch": 0.9439724387609121, "grad_norm": 0.5507376011241598, "learning_rate": 3.1127331711273315e-06, "loss": 0.5843, "step": 32332 }, { "epoch": 0.9440016349887594, "grad_norm": 0.5280792075975405, "learning_rate": 3.111111111111111e-06, "loss": 0.5547, "step": 32333 }, { "epoch": 0.9440308312166068, "grad_norm": 0.5546438183691796, "learning_rate": 3.109489051094891e-06, "loss": 0.6289, "step": 32334 }, { "epoch": 0.9440600274444542, "grad_norm": 0.4781113717911202, "learning_rate": 3.10786699107867e-06, "loss": 0.469, "step": 32335 }, { "epoch": 0.9440892236723015, "grad_norm": 0.5389120873106323, "learning_rate": 3.1062449310624497e-06, "loss": 0.5796, "step": 32336 }, { "epoch": 0.9441184199001489, "grad_norm": 0.5526662729747331, "learning_rate": 3.104622871046229e-06, "loss": 0.6391, "step": 32337 }, { "epoch": 0.9441476161279962, "grad_norm": 0.5168053090208419, "learning_rate": 3.103000811030008e-06, "loss": 0.5455, "step": 32338 }, { "epoch": 0.9441768123558436, "grad_norm": 0.5877155173479901, "learning_rate": 3.1013787510137873e-06, "loss": 0.7049, "step": 32339 }, { "epoch": 0.944206008583691, "grad_norm": 0.5232374009157618, "learning_rate": 3.099756690997567e-06, "loss": 0.5581, "step": 32340 }, { "epoch": 0.9442352048115383, "grad_norm": 0.5283560993935186, "learning_rate": 3.0981346309813466e-06, "loss": 0.5946, "step": 32341 }, { "epoch": 0.9442644010393857, "grad_norm": 0.5514507076643277, "learning_rate": 3.096512570965126e-06, "loss": 0.6618, "step": 32342 }, { "epoch": 0.944293597267233, "grad_norm": 0.5201995309736104, "learning_rate": 3.0948905109489055e-06, "loss": 0.5863, "step": 32343 }, { "epoch": 0.9443227934950804, "grad_norm": 0.5099306516405537, "learning_rate": 3.0932684509326847e-06, "loss": 0.5236, "step": 32344 }, { "epoch": 0.9443519897229278, "grad_norm": 0.5099904981668043, "learning_rate": 3.091646390916464e-06, "loss": 0.5615, "step": 32345 }, { "epoch": 0.9443811859507751, "grad_norm": 0.5033403319646788, "learning_rate": 3.0900243309002436e-06, "loss": 0.5375, "step": 32346 }, { "epoch": 0.9444103821786225, "grad_norm": 0.5164537534655805, "learning_rate": 3.088402270884023e-06, "loss": 0.5664, "step": 32347 }, { "epoch": 0.9444395784064699, "grad_norm": 0.5031775588090551, "learning_rate": 3.0867802108678024e-06, "loss": 0.4921, "step": 32348 }, { "epoch": 0.9444687746343172, "grad_norm": 0.5026308011384634, "learning_rate": 3.0851581508515817e-06, "loss": 0.5385, "step": 32349 }, { "epoch": 0.9444979708621646, "grad_norm": 0.5313699622886277, "learning_rate": 3.0835360908353613e-06, "loss": 0.5254, "step": 32350 }, { "epoch": 0.9445271670900119, "grad_norm": 0.5535534838346804, "learning_rate": 3.0819140308191405e-06, "loss": 0.6145, "step": 32351 }, { "epoch": 0.9445563633178593, "grad_norm": 0.559691782452749, "learning_rate": 3.0802919708029197e-06, "loss": 0.6003, "step": 32352 }, { "epoch": 0.9445855595457067, "grad_norm": 0.5338186936097341, "learning_rate": 3.0786699107866994e-06, "loss": 0.5688, "step": 32353 }, { "epoch": 0.944614755773554, "grad_norm": 0.4969292864005201, "learning_rate": 3.0770478507704786e-06, "loss": 0.4852, "step": 32354 }, { "epoch": 0.9446439520014014, "grad_norm": 0.49350365866655393, "learning_rate": 3.0754257907542583e-06, "loss": 0.5097, "step": 32355 }, { "epoch": 0.9446731482292487, "grad_norm": 0.49030710873507116, "learning_rate": 3.0738037307380375e-06, "loss": 0.5184, "step": 32356 }, { "epoch": 0.9447023444570961, "grad_norm": 0.5190744176287514, "learning_rate": 3.072181670721817e-06, "loss": 0.5935, "step": 32357 }, { "epoch": 0.9447315406849435, "grad_norm": 0.5375127506394091, "learning_rate": 3.070559610705596e-06, "loss": 0.5986, "step": 32358 }, { "epoch": 0.9447607369127908, "grad_norm": 0.555936409338295, "learning_rate": 3.0689375506893756e-06, "loss": 0.6688, "step": 32359 }, { "epoch": 0.9447899331406382, "grad_norm": 0.5341539220000258, "learning_rate": 3.067315490673155e-06, "loss": 0.5729, "step": 32360 }, { "epoch": 0.9448191293684856, "grad_norm": 0.5349497345252815, "learning_rate": 3.0656934306569344e-06, "loss": 0.5897, "step": 32361 }, { "epoch": 0.9448483255963329, "grad_norm": 0.5078827338729323, "learning_rate": 3.064071370640714e-06, "loss": 0.5368, "step": 32362 }, { "epoch": 0.9448775218241803, "grad_norm": 0.5403233865357698, "learning_rate": 3.0624493106244933e-06, "loss": 0.5711, "step": 32363 }, { "epoch": 0.9449067180520276, "grad_norm": 0.5292697927782365, "learning_rate": 3.060827250608273e-06, "loss": 0.5828, "step": 32364 }, { "epoch": 0.944935914279875, "grad_norm": 0.5075000402630319, "learning_rate": 3.0592051905920517e-06, "loss": 0.5465, "step": 32365 }, { "epoch": 0.9449651105077224, "grad_norm": 0.4878006443378957, "learning_rate": 3.0575831305758314e-06, "loss": 0.5176, "step": 32366 }, { "epoch": 0.9449943067355697, "grad_norm": 0.5323372172850283, "learning_rate": 3.055961070559611e-06, "loss": 0.6506, "step": 32367 }, { "epoch": 0.9450235029634171, "grad_norm": 0.5534923976295848, "learning_rate": 3.0543390105433902e-06, "loss": 0.6055, "step": 32368 }, { "epoch": 0.9450526991912644, "grad_norm": 0.5356088852112213, "learning_rate": 3.05271695052717e-06, "loss": 0.5728, "step": 32369 }, { "epoch": 0.9450818954191118, "grad_norm": 0.5375426806053288, "learning_rate": 3.051094890510949e-06, "loss": 0.6207, "step": 32370 }, { "epoch": 0.9451110916469592, "grad_norm": 0.5187697342238567, "learning_rate": 3.0494728304947283e-06, "loss": 0.5296, "step": 32371 }, { "epoch": 0.9451402878748065, "grad_norm": 0.5408695813234544, "learning_rate": 3.0478507704785076e-06, "loss": 0.5539, "step": 32372 }, { "epoch": 0.9451694841026539, "grad_norm": 0.49475089560928176, "learning_rate": 3.046228710462287e-06, "loss": 0.4934, "step": 32373 }, { "epoch": 0.9451986803305013, "grad_norm": 0.47931107764233166, "learning_rate": 3.044606650446067e-06, "loss": 0.4783, "step": 32374 }, { "epoch": 0.9452278765583486, "grad_norm": 0.5359974110883846, "learning_rate": 3.042984590429846e-06, "loss": 0.6284, "step": 32375 }, { "epoch": 0.945257072786196, "grad_norm": 0.5208776687856592, "learning_rate": 3.0413625304136257e-06, "loss": 0.5645, "step": 32376 }, { "epoch": 0.9452862690140433, "grad_norm": 0.5300305302984716, "learning_rate": 3.039740470397405e-06, "loss": 0.5759, "step": 32377 }, { "epoch": 0.9453154652418907, "grad_norm": 0.5269026915027002, "learning_rate": 3.038118410381184e-06, "loss": 0.5921, "step": 32378 }, { "epoch": 0.9453446614697381, "grad_norm": 0.5428244132272678, "learning_rate": 3.0364963503649634e-06, "loss": 0.6031, "step": 32379 }, { "epoch": 0.9453738576975854, "grad_norm": 0.5693152604177336, "learning_rate": 3.034874290348743e-06, "loss": 0.668, "step": 32380 }, { "epoch": 0.9454030539254328, "grad_norm": 0.5396843085854555, "learning_rate": 3.0332522303325227e-06, "loss": 0.5599, "step": 32381 }, { "epoch": 0.9454322501532803, "grad_norm": 0.5053655409727461, "learning_rate": 3.031630170316302e-06, "loss": 0.5188, "step": 32382 }, { "epoch": 0.9454614463811276, "grad_norm": 0.4901064932096684, "learning_rate": 3.0300081103000815e-06, "loss": 0.5371, "step": 32383 }, { "epoch": 0.945490642608975, "grad_norm": 0.5262889508696198, "learning_rate": 3.0283860502838603e-06, "loss": 0.5378, "step": 32384 }, { "epoch": 0.9455198388368223, "grad_norm": 0.5409888325946539, "learning_rate": 3.02676399026764e-06, "loss": 0.5792, "step": 32385 }, { "epoch": 0.9455490350646697, "grad_norm": 0.5257987271761156, "learning_rate": 3.025141930251419e-06, "loss": 0.559, "step": 32386 }, { "epoch": 0.9455782312925171, "grad_norm": 0.5386058145813399, "learning_rate": 3.023519870235199e-06, "loss": 0.5862, "step": 32387 }, { "epoch": 0.9456074275203644, "grad_norm": 0.5179099684137147, "learning_rate": 3.0218978102189785e-06, "loss": 0.5481, "step": 32388 }, { "epoch": 0.9456366237482118, "grad_norm": 0.5325850122283643, "learning_rate": 3.0202757502027577e-06, "loss": 0.564, "step": 32389 }, { "epoch": 0.9456658199760591, "grad_norm": 0.534997735329883, "learning_rate": 3.0186536901865373e-06, "loss": 0.5776, "step": 32390 }, { "epoch": 0.9456950162039065, "grad_norm": 0.5353369864397725, "learning_rate": 3.017031630170316e-06, "loss": 0.5649, "step": 32391 }, { "epoch": 0.9457242124317539, "grad_norm": 0.5936848789861005, "learning_rate": 3.0154095701540958e-06, "loss": 0.6384, "step": 32392 }, { "epoch": 0.9457534086596012, "grad_norm": 0.4770977652954078, "learning_rate": 3.013787510137875e-06, "loss": 0.4325, "step": 32393 }, { "epoch": 0.9457826048874486, "grad_norm": 0.5634112587243617, "learning_rate": 3.0121654501216546e-06, "loss": 0.5179, "step": 32394 }, { "epoch": 0.945811801115296, "grad_norm": 0.5415545462016802, "learning_rate": 3.0105433901054343e-06, "loss": 0.6054, "step": 32395 }, { "epoch": 0.9458409973431433, "grad_norm": 0.5761492364074412, "learning_rate": 3.0089213300892135e-06, "loss": 0.6624, "step": 32396 }, { "epoch": 0.9458701935709907, "grad_norm": 0.537937077363282, "learning_rate": 3.007299270072993e-06, "loss": 0.5822, "step": 32397 }, { "epoch": 0.945899389798838, "grad_norm": 0.5419461545414969, "learning_rate": 3.005677210056772e-06, "loss": 0.6435, "step": 32398 }, { "epoch": 0.9459285860266854, "grad_norm": 0.5795627170157559, "learning_rate": 3.0040551500405516e-06, "loss": 0.6237, "step": 32399 }, { "epoch": 0.9459577822545328, "grad_norm": 0.5409118088808303, "learning_rate": 3.002433090024331e-06, "loss": 0.6027, "step": 32400 }, { "epoch": 0.9459869784823801, "grad_norm": 0.5636103267991687, "learning_rate": 3.0008110300081105e-06, "loss": 0.6285, "step": 32401 }, { "epoch": 0.9460161747102275, "grad_norm": 0.8322191672999367, "learning_rate": 2.99918896999189e-06, "loss": 0.6331, "step": 32402 }, { "epoch": 0.9460453709380748, "grad_norm": 0.5699392719161007, "learning_rate": 2.9975669099756693e-06, "loss": 0.63, "step": 32403 }, { "epoch": 0.9460745671659222, "grad_norm": 0.5298013927606628, "learning_rate": 2.9959448499594486e-06, "loss": 0.5602, "step": 32404 }, { "epoch": 0.9461037633937696, "grad_norm": 0.5306611158218703, "learning_rate": 2.9943227899432278e-06, "loss": 0.5603, "step": 32405 }, { "epoch": 0.9461329596216169, "grad_norm": 0.5293345848737327, "learning_rate": 2.9927007299270074e-06, "loss": 0.5796, "step": 32406 }, { "epoch": 0.9461621558494643, "grad_norm": 0.5664867881316431, "learning_rate": 2.9910786699107866e-06, "loss": 0.6838, "step": 32407 }, { "epoch": 0.9461913520773116, "grad_norm": 0.5401229258288577, "learning_rate": 2.9894566098945663e-06, "loss": 0.6186, "step": 32408 }, { "epoch": 0.946220548305159, "grad_norm": 0.4956753285587265, "learning_rate": 2.987834549878346e-06, "loss": 0.504, "step": 32409 }, { "epoch": 0.9462497445330064, "grad_norm": 0.525418626051678, "learning_rate": 2.986212489862125e-06, "loss": 0.5719, "step": 32410 }, { "epoch": 0.9462789407608537, "grad_norm": 0.5685659111118633, "learning_rate": 2.9845904298459044e-06, "loss": 0.5936, "step": 32411 }, { "epoch": 0.9463081369887011, "grad_norm": 0.5451944470873219, "learning_rate": 2.9829683698296836e-06, "loss": 0.5871, "step": 32412 }, { "epoch": 0.9463373332165484, "grad_norm": 0.4752665702240893, "learning_rate": 2.9813463098134632e-06, "loss": 0.4985, "step": 32413 }, { "epoch": 0.9463665294443958, "grad_norm": 0.541290170027567, "learning_rate": 2.979724249797243e-06, "loss": 0.5908, "step": 32414 }, { "epoch": 0.9463957256722432, "grad_norm": 0.5547817870284403, "learning_rate": 2.978102189781022e-06, "loss": 0.5678, "step": 32415 }, { "epoch": 0.9464249219000905, "grad_norm": 0.5313988584291284, "learning_rate": 2.9764801297648017e-06, "loss": 0.6046, "step": 32416 }, { "epoch": 0.9464541181279379, "grad_norm": 0.5121567376633018, "learning_rate": 2.9748580697485805e-06, "loss": 0.5347, "step": 32417 }, { "epoch": 0.9464833143557853, "grad_norm": 0.5253740898728384, "learning_rate": 2.97323600973236e-06, "loss": 0.5707, "step": 32418 }, { "epoch": 0.9465125105836326, "grad_norm": 0.554114887712599, "learning_rate": 2.9716139497161394e-06, "loss": 0.6286, "step": 32419 }, { "epoch": 0.94654170681148, "grad_norm": 0.5634734210007837, "learning_rate": 2.969991889699919e-06, "loss": 0.6333, "step": 32420 }, { "epoch": 0.9465709030393273, "grad_norm": 0.5150875003950601, "learning_rate": 2.9683698296836987e-06, "loss": 0.5695, "step": 32421 }, { "epoch": 0.9466000992671747, "grad_norm": 0.6046398027022833, "learning_rate": 2.966747769667478e-06, "loss": 0.5757, "step": 32422 }, { "epoch": 0.9466292954950221, "grad_norm": 0.5233468351898085, "learning_rate": 2.9651257096512576e-06, "loss": 0.5673, "step": 32423 }, { "epoch": 0.9466584917228694, "grad_norm": 0.5461124620158989, "learning_rate": 2.9635036496350364e-06, "loss": 0.595, "step": 32424 }, { "epoch": 0.9466876879507168, "grad_norm": 0.5748271555621491, "learning_rate": 2.961881589618816e-06, "loss": 0.6592, "step": 32425 }, { "epoch": 0.9467168841785641, "grad_norm": 0.5521468989824135, "learning_rate": 2.9602595296025952e-06, "loss": 0.6097, "step": 32426 }, { "epoch": 0.9467460804064115, "grad_norm": 0.48971745535322303, "learning_rate": 2.958637469586375e-06, "loss": 0.5291, "step": 32427 }, { "epoch": 0.9467752766342589, "grad_norm": 0.5147720504358317, "learning_rate": 2.9570154095701545e-06, "loss": 0.5809, "step": 32428 }, { "epoch": 0.9468044728621062, "grad_norm": 0.5309500177226351, "learning_rate": 2.9553933495539337e-06, "loss": 0.5681, "step": 32429 }, { "epoch": 0.9468336690899536, "grad_norm": 0.4665378715578604, "learning_rate": 2.953771289537713e-06, "loss": 0.4545, "step": 32430 }, { "epoch": 0.946862865317801, "grad_norm": 0.4848176649555855, "learning_rate": 2.952149229521492e-06, "loss": 0.499, "step": 32431 }, { "epoch": 0.9468920615456483, "grad_norm": 0.5508304566328216, "learning_rate": 2.950527169505272e-06, "loss": 0.6242, "step": 32432 }, { "epoch": 0.9469212577734957, "grad_norm": 0.50325846090155, "learning_rate": 2.948905109489051e-06, "loss": 0.5118, "step": 32433 }, { "epoch": 0.946950454001343, "grad_norm": 0.5108009490864197, "learning_rate": 2.9472830494728307e-06, "loss": 0.5226, "step": 32434 }, { "epoch": 0.9469796502291904, "grad_norm": 0.5250476059684305, "learning_rate": 2.9456609894566103e-06, "loss": 0.5777, "step": 32435 }, { "epoch": 0.9470088464570378, "grad_norm": 0.5252650984291372, "learning_rate": 2.9440389294403896e-06, "loss": 0.5746, "step": 32436 }, { "epoch": 0.9470380426848851, "grad_norm": 0.5316972586501328, "learning_rate": 2.9424168694241688e-06, "loss": 0.5689, "step": 32437 }, { "epoch": 0.9470672389127325, "grad_norm": 0.5288831072798197, "learning_rate": 2.940794809407948e-06, "loss": 0.5279, "step": 32438 }, { "epoch": 0.9470964351405798, "grad_norm": 0.5135938796274209, "learning_rate": 2.9391727493917276e-06, "loss": 0.5631, "step": 32439 }, { "epoch": 0.9471256313684272, "grad_norm": 0.5046856984045319, "learning_rate": 2.937550689375507e-06, "loss": 0.5448, "step": 32440 }, { "epoch": 0.9471548275962746, "grad_norm": 0.4521696864088224, "learning_rate": 2.9359286293592865e-06, "loss": 0.4495, "step": 32441 }, { "epoch": 0.9471840238241219, "grad_norm": 0.5483668495755644, "learning_rate": 2.934306569343066e-06, "loss": 0.6544, "step": 32442 }, { "epoch": 0.9472132200519693, "grad_norm": 0.5555087090078629, "learning_rate": 2.932684509326845e-06, "loss": 0.596, "step": 32443 }, { "epoch": 0.9472424162798166, "grad_norm": 0.49973763998581944, "learning_rate": 2.9310624493106246e-06, "loss": 0.5304, "step": 32444 }, { "epoch": 0.947271612507664, "grad_norm": 0.5017969210072744, "learning_rate": 2.929440389294404e-06, "loss": 0.5223, "step": 32445 }, { "epoch": 0.9473008087355114, "grad_norm": 0.5753276859213912, "learning_rate": 2.9278183292781835e-06, "loss": 0.5991, "step": 32446 }, { "epoch": 0.9473300049633587, "grad_norm": 0.5137208209724903, "learning_rate": 2.9261962692619627e-06, "loss": 0.5683, "step": 32447 }, { "epoch": 0.9473592011912061, "grad_norm": 0.5510934148655591, "learning_rate": 2.9245742092457423e-06, "loss": 0.5781, "step": 32448 }, { "epoch": 0.9473883974190535, "grad_norm": 0.5551878692839791, "learning_rate": 2.922952149229522e-06, "loss": 0.6463, "step": 32449 }, { "epoch": 0.9474175936469008, "grad_norm": 0.5245096606473175, "learning_rate": 2.9213300892133008e-06, "loss": 0.5336, "step": 32450 }, { "epoch": 0.9474467898747482, "grad_norm": 0.5264083858133763, "learning_rate": 2.9197080291970804e-06, "loss": 0.5486, "step": 32451 }, { "epoch": 0.9474759861025955, "grad_norm": 0.569711364343224, "learning_rate": 2.9180859691808596e-06, "loss": 0.6042, "step": 32452 }, { "epoch": 0.9475051823304429, "grad_norm": 0.4938721967727702, "learning_rate": 2.9164639091646393e-06, "loss": 0.5313, "step": 32453 }, { "epoch": 0.9475343785582903, "grad_norm": 0.5510587181895656, "learning_rate": 2.9148418491484185e-06, "loss": 0.5431, "step": 32454 }, { "epoch": 0.9475635747861376, "grad_norm": 0.5328688018995994, "learning_rate": 2.913219789132198e-06, "loss": 0.5609, "step": 32455 }, { "epoch": 0.947592771013985, "grad_norm": 0.5371459468365894, "learning_rate": 2.9115977291159778e-06, "loss": 0.6029, "step": 32456 }, { "epoch": 0.9476219672418323, "grad_norm": 0.5600140984726257, "learning_rate": 2.9099756690997566e-06, "loss": 0.6157, "step": 32457 }, { "epoch": 0.9476511634696797, "grad_norm": 0.5073156354312172, "learning_rate": 2.9083536090835362e-06, "loss": 0.5261, "step": 32458 }, { "epoch": 0.9476803596975271, "grad_norm": 0.5159400211346808, "learning_rate": 2.9067315490673154e-06, "loss": 0.5482, "step": 32459 }, { "epoch": 0.9477095559253744, "grad_norm": 0.5337716105177582, "learning_rate": 2.905109489051095e-06, "loss": 0.5476, "step": 32460 }, { "epoch": 0.9477387521532218, "grad_norm": 0.5405319313011209, "learning_rate": 2.9034874290348743e-06, "loss": 0.6262, "step": 32461 }, { "epoch": 0.9477679483810691, "grad_norm": 0.5497829695610787, "learning_rate": 2.901865369018654e-06, "loss": 0.5974, "step": 32462 }, { "epoch": 0.9477971446089165, "grad_norm": 0.49886363366288666, "learning_rate": 2.900243309002433e-06, "loss": 0.5267, "step": 32463 }, { "epoch": 0.9478263408367639, "grad_norm": 0.5503082552484979, "learning_rate": 2.8986212489862124e-06, "loss": 0.6513, "step": 32464 }, { "epoch": 0.9478555370646112, "grad_norm": 0.5270594331818635, "learning_rate": 2.896999188969992e-06, "loss": 0.5906, "step": 32465 }, { "epoch": 0.9478847332924586, "grad_norm": 0.5508786533994043, "learning_rate": 2.8953771289537713e-06, "loss": 0.6103, "step": 32466 }, { "epoch": 0.947913929520306, "grad_norm": 0.5257958280067598, "learning_rate": 2.893755068937551e-06, "loss": 0.5565, "step": 32467 }, { "epoch": 0.9479431257481533, "grad_norm": 0.5146526028632634, "learning_rate": 2.89213300892133e-06, "loss": 0.5417, "step": 32468 }, { "epoch": 0.9479723219760007, "grad_norm": 0.5218914987326428, "learning_rate": 2.8905109489051098e-06, "loss": 0.5686, "step": 32469 }, { "epoch": 0.948001518203848, "grad_norm": 0.5293064983419082, "learning_rate": 2.888888888888889e-06, "loss": 0.5969, "step": 32470 }, { "epoch": 0.9480307144316954, "grad_norm": 0.5333861203730865, "learning_rate": 2.8872668288726682e-06, "loss": 0.6053, "step": 32471 }, { "epoch": 0.9480599106595428, "grad_norm": 0.5164155391928094, "learning_rate": 2.885644768856448e-06, "loss": 0.5383, "step": 32472 }, { "epoch": 0.9480891068873901, "grad_norm": 0.49642634218425286, "learning_rate": 2.884022708840227e-06, "loss": 0.5153, "step": 32473 }, { "epoch": 0.9481183031152375, "grad_norm": 0.5428207687521678, "learning_rate": 2.8824006488240067e-06, "loss": 0.6219, "step": 32474 }, { "epoch": 0.9481474993430848, "grad_norm": 0.584717864425838, "learning_rate": 2.8807785888077864e-06, "loss": 0.6341, "step": 32475 }, { "epoch": 0.9481766955709322, "grad_norm": 0.5121694706277108, "learning_rate": 2.879156528791565e-06, "loss": 0.5489, "step": 32476 }, { "epoch": 0.9482058917987796, "grad_norm": 0.5465996768612921, "learning_rate": 2.877534468775345e-06, "loss": 0.5733, "step": 32477 }, { "epoch": 0.9482350880266269, "grad_norm": 0.5144258476766851, "learning_rate": 2.875912408759124e-06, "loss": 0.5249, "step": 32478 }, { "epoch": 0.9482642842544743, "grad_norm": 0.545263517086774, "learning_rate": 2.8742903487429037e-06, "loss": 0.5941, "step": 32479 }, { "epoch": 0.9482934804823216, "grad_norm": 0.578973592271566, "learning_rate": 2.872668288726683e-06, "loss": 0.6364, "step": 32480 }, { "epoch": 0.948322676710169, "grad_norm": 0.5276466791958585, "learning_rate": 2.8710462287104625e-06, "loss": 0.5977, "step": 32481 }, { "epoch": 0.9483518729380164, "grad_norm": 0.5425511249209397, "learning_rate": 2.869424168694242e-06, "loss": 0.6037, "step": 32482 }, { "epoch": 0.9483810691658637, "grad_norm": 0.549496601682212, "learning_rate": 2.867802108678021e-06, "loss": 0.5889, "step": 32483 }, { "epoch": 0.9484102653937111, "grad_norm": 0.547260912971248, "learning_rate": 2.8661800486618006e-06, "loss": 0.5986, "step": 32484 }, { "epoch": 0.9484394616215585, "grad_norm": 0.5761825548744085, "learning_rate": 2.86455798864558e-06, "loss": 0.7021, "step": 32485 }, { "epoch": 0.9484686578494058, "grad_norm": 0.5117226045575604, "learning_rate": 2.8629359286293595e-06, "loss": 0.5215, "step": 32486 }, { "epoch": 0.9484978540772532, "grad_norm": 0.5209007154737696, "learning_rate": 2.8613138686131387e-06, "loss": 0.5398, "step": 32487 }, { "epoch": 0.9485270503051005, "grad_norm": 0.5686909509647532, "learning_rate": 2.8596918085969184e-06, "loss": 0.6091, "step": 32488 }, { "epoch": 0.9485562465329479, "grad_norm": 0.5453482829693845, "learning_rate": 2.8580697485806976e-06, "loss": 0.6231, "step": 32489 }, { "epoch": 0.9485854427607953, "grad_norm": 0.5269958942496323, "learning_rate": 2.856447688564477e-06, "loss": 0.5685, "step": 32490 }, { "epoch": 0.9486146389886426, "grad_norm": 0.5252152437858372, "learning_rate": 2.8548256285482564e-06, "loss": 0.5906, "step": 32491 }, { "epoch": 0.94864383521649, "grad_norm": 0.5764280056044252, "learning_rate": 2.8532035685320357e-06, "loss": 0.6297, "step": 32492 }, { "epoch": 0.9486730314443373, "grad_norm": 0.5130509693572157, "learning_rate": 2.8515815085158153e-06, "loss": 0.5182, "step": 32493 }, { "epoch": 0.9487022276721847, "grad_norm": 0.48993198317124326, "learning_rate": 2.8499594484995945e-06, "loss": 0.5014, "step": 32494 }, { "epoch": 0.9487314239000321, "grad_norm": 0.4915773636103085, "learning_rate": 2.848337388483374e-06, "loss": 0.492, "step": 32495 }, { "epoch": 0.9487606201278794, "grad_norm": 0.5384069310327092, "learning_rate": 2.8467153284671534e-06, "loss": 0.5945, "step": 32496 }, { "epoch": 0.9487898163557268, "grad_norm": 0.5465873106448673, "learning_rate": 2.8450932684509326e-06, "loss": 0.5623, "step": 32497 }, { "epoch": 0.9488190125835742, "grad_norm": 0.5491153050333583, "learning_rate": 2.8434712084347123e-06, "loss": 0.6471, "step": 32498 }, { "epoch": 0.9488482088114215, "grad_norm": 0.5405656086004341, "learning_rate": 2.8418491484184915e-06, "loss": 0.6208, "step": 32499 }, { "epoch": 0.9488774050392689, "grad_norm": 0.525524289359939, "learning_rate": 2.840227088402271e-06, "loss": 0.5654, "step": 32500 }, { "epoch": 0.9489066012671162, "grad_norm": 0.5386271813676111, "learning_rate": 2.8386050283860503e-06, "loss": 0.5761, "step": 32501 }, { "epoch": 0.9489357974949636, "grad_norm": 0.5266207846297102, "learning_rate": 2.83698296836983e-06, "loss": 0.5879, "step": 32502 }, { "epoch": 0.9489649937228111, "grad_norm": 0.5587317548781021, "learning_rate": 2.8353609083536092e-06, "loss": 0.6359, "step": 32503 }, { "epoch": 0.9489941899506584, "grad_norm": 0.485502785637242, "learning_rate": 2.8337388483373884e-06, "loss": 0.521, "step": 32504 }, { "epoch": 0.9490233861785058, "grad_norm": 0.5311571167114573, "learning_rate": 2.832116788321168e-06, "loss": 0.5701, "step": 32505 }, { "epoch": 0.9490525824063532, "grad_norm": 0.5361483210512559, "learning_rate": 2.8304947283049473e-06, "loss": 0.5689, "step": 32506 }, { "epoch": 0.9490817786342005, "grad_norm": 0.5543664544559461, "learning_rate": 2.828872668288727e-06, "loss": 0.6179, "step": 32507 }, { "epoch": 0.9491109748620479, "grad_norm": 0.541126283157477, "learning_rate": 2.827250608272506e-06, "loss": 0.6057, "step": 32508 }, { "epoch": 0.9491401710898952, "grad_norm": 0.49810195381262495, "learning_rate": 2.8256285482562854e-06, "loss": 0.5082, "step": 32509 }, { "epoch": 0.9491693673177426, "grad_norm": 0.5109669126239633, "learning_rate": 2.824006488240065e-06, "loss": 0.5247, "step": 32510 }, { "epoch": 0.94919856354559, "grad_norm": 0.45973425389836103, "learning_rate": 2.8223844282238443e-06, "loss": 0.4627, "step": 32511 }, { "epoch": 0.9492277597734373, "grad_norm": 0.5276151395638436, "learning_rate": 2.820762368207624e-06, "loss": 0.5778, "step": 32512 }, { "epoch": 0.9492569560012847, "grad_norm": 0.5222435381442908, "learning_rate": 2.819140308191403e-06, "loss": 0.5655, "step": 32513 }, { "epoch": 0.949286152229132, "grad_norm": 0.5361211331760188, "learning_rate": 2.8175182481751828e-06, "loss": 0.606, "step": 32514 }, { "epoch": 0.9493153484569794, "grad_norm": 0.48496795300212087, "learning_rate": 2.815896188158962e-06, "loss": 0.4732, "step": 32515 }, { "epoch": 0.9493445446848268, "grad_norm": 0.5171709837416194, "learning_rate": 2.814274128142741e-06, "loss": 0.5269, "step": 32516 }, { "epoch": 0.9493737409126741, "grad_norm": 0.5027741620352799, "learning_rate": 2.812652068126521e-06, "loss": 0.5534, "step": 32517 }, { "epoch": 0.9494029371405215, "grad_norm": 0.5481359854060411, "learning_rate": 2.8110300081103e-06, "loss": 0.6366, "step": 32518 }, { "epoch": 0.9494321333683688, "grad_norm": 0.5976169203026017, "learning_rate": 2.8094079480940797e-06, "loss": 0.7305, "step": 32519 }, { "epoch": 0.9494613295962162, "grad_norm": 0.4908947269791337, "learning_rate": 2.807785888077859e-06, "loss": 0.4872, "step": 32520 }, { "epoch": 0.9494905258240636, "grad_norm": 0.5302558424788697, "learning_rate": 2.8061638280616386e-06, "loss": 0.5738, "step": 32521 }, { "epoch": 0.9495197220519109, "grad_norm": 0.6126974271191404, "learning_rate": 2.804541768045418e-06, "loss": 0.6974, "step": 32522 }, { "epoch": 0.9495489182797583, "grad_norm": 0.5509036701751654, "learning_rate": 2.802919708029197e-06, "loss": 0.6684, "step": 32523 }, { "epoch": 0.9495781145076057, "grad_norm": 0.5070546687515316, "learning_rate": 2.8012976480129767e-06, "loss": 0.5587, "step": 32524 }, { "epoch": 0.949607310735453, "grad_norm": 0.5513588629969896, "learning_rate": 2.799675587996756e-06, "loss": 0.61, "step": 32525 }, { "epoch": 0.9496365069633004, "grad_norm": 0.5163820659178933, "learning_rate": 2.7980535279805355e-06, "loss": 0.5623, "step": 32526 }, { "epoch": 0.9496657031911477, "grad_norm": 0.5331073558791263, "learning_rate": 2.7964314679643148e-06, "loss": 0.5843, "step": 32527 }, { "epoch": 0.9496948994189951, "grad_norm": 0.5165213300889206, "learning_rate": 2.7948094079480944e-06, "loss": 0.5416, "step": 32528 }, { "epoch": 0.9497240956468425, "grad_norm": 0.5412712754774122, "learning_rate": 2.7931873479318736e-06, "loss": 0.5972, "step": 32529 }, { "epoch": 0.9497532918746898, "grad_norm": 0.5336496907220303, "learning_rate": 2.791565287915653e-06, "loss": 0.5426, "step": 32530 }, { "epoch": 0.9497824881025372, "grad_norm": 0.518211198063616, "learning_rate": 2.7899432278994325e-06, "loss": 0.5548, "step": 32531 }, { "epoch": 0.9498116843303845, "grad_norm": 0.5335890999072476, "learning_rate": 2.7883211678832117e-06, "loss": 0.573, "step": 32532 }, { "epoch": 0.9498408805582319, "grad_norm": 0.5419543724297438, "learning_rate": 2.7866991078669913e-06, "loss": 0.6423, "step": 32533 }, { "epoch": 0.9498700767860793, "grad_norm": 0.5870278856327441, "learning_rate": 2.7850770478507706e-06, "loss": 0.6881, "step": 32534 }, { "epoch": 0.9498992730139266, "grad_norm": 0.48676771013408443, "learning_rate": 2.78345498783455e-06, "loss": 0.5024, "step": 32535 }, { "epoch": 0.949928469241774, "grad_norm": 0.48767500268066744, "learning_rate": 2.7818329278183294e-06, "loss": 0.5025, "step": 32536 }, { "epoch": 0.9499576654696213, "grad_norm": 0.5220508061952486, "learning_rate": 2.7802108678021087e-06, "loss": 0.5973, "step": 32537 }, { "epoch": 0.9499868616974687, "grad_norm": 0.560531075580277, "learning_rate": 2.7785888077858883e-06, "loss": 0.6083, "step": 32538 }, { "epoch": 0.9500160579253161, "grad_norm": 0.5476727526487707, "learning_rate": 2.7769667477696675e-06, "loss": 0.6162, "step": 32539 }, { "epoch": 0.9500452541531634, "grad_norm": 0.5250418704521832, "learning_rate": 2.775344687753447e-06, "loss": 0.5747, "step": 32540 }, { "epoch": 0.9500744503810108, "grad_norm": 0.5272585392049568, "learning_rate": 2.7737226277372264e-06, "loss": 0.574, "step": 32541 }, { "epoch": 0.9501036466088582, "grad_norm": 0.5144396181970899, "learning_rate": 2.7721005677210056e-06, "loss": 0.5425, "step": 32542 }, { "epoch": 0.9501328428367055, "grad_norm": 0.5047219541854863, "learning_rate": 2.7704785077047853e-06, "loss": 0.5093, "step": 32543 }, { "epoch": 0.9501620390645529, "grad_norm": 0.522080290462111, "learning_rate": 2.7688564476885645e-06, "loss": 0.5596, "step": 32544 }, { "epoch": 0.9501912352924002, "grad_norm": 0.5168592509500467, "learning_rate": 2.767234387672344e-06, "loss": 0.5354, "step": 32545 }, { "epoch": 0.9502204315202476, "grad_norm": 0.5558655032382238, "learning_rate": 2.7656123276561233e-06, "loss": 0.6283, "step": 32546 }, { "epoch": 0.950249627748095, "grad_norm": 0.5308277712860531, "learning_rate": 2.763990267639903e-06, "loss": 0.6074, "step": 32547 }, { "epoch": 0.9502788239759423, "grad_norm": 0.514885107556069, "learning_rate": 2.762368207623682e-06, "loss": 0.5403, "step": 32548 }, { "epoch": 0.9503080202037897, "grad_norm": 0.5441228997285626, "learning_rate": 2.7607461476074614e-06, "loss": 0.6011, "step": 32549 }, { "epoch": 0.950337216431637, "grad_norm": 0.5295047221723197, "learning_rate": 2.759124087591241e-06, "loss": 0.6104, "step": 32550 }, { "epoch": 0.9503664126594844, "grad_norm": 0.5100680925128739, "learning_rate": 2.7575020275750203e-06, "loss": 0.5525, "step": 32551 }, { "epoch": 0.9503956088873318, "grad_norm": 0.5446646642204109, "learning_rate": 2.7558799675588e-06, "loss": 0.6075, "step": 32552 }, { "epoch": 0.9504248051151791, "grad_norm": 0.5567601342268197, "learning_rate": 2.754257907542579e-06, "loss": 0.5688, "step": 32553 }, { "epoch": 0.9504540013430265, "grad_norm": 0.5710773709740695, "learning_rate": 2.752635847526359e-06, "loss": 0.6767, "step": 32554 }, { "epoch": 0.9504831975708739, "grad_norm": 0.5590513525921985, "learning_rate": 2.751013787510138e-06, "loss": 0.6118, "step": 32555 }, { "epoch": 0.9505123937987212, "grad_norm": 0.5358391907110351, "learning_rate": 2.7493917274939172e-06, "loss": 0.5582, "step": 32556 }, { "epoch": 0.9505415900265686, "grad_norm": 0.4917971740392879, "learning_rate": 2.747769667477697e-06, "loss": 0.5253, "step": 32557 }, { "epoch": 0.9505707862544159, "grad_norm": 0.529237451222231, "learning_rate": 2.746147607461476e-06, "loss": 0.6215, "step": 32558 }, { "epoch": 0.9505999824822633, "grad_norm": 0.49458431126040225, "learning_rate": 2.7445255474452558e-06, "loss": 0.5194, "step": 32559 }, { "epoch": 0.9506291787101107, "grad_norm": 0.4999970760798892, "learning_rate": 2.742903487429035e-06, "loss": 0.5278, "step": 32560 }, { "epoch": 0.950658374937958, "grad_norm": 0.5199108685073244, "learning_rate": 2.7412814274128146e-06, "loss": 0.5685, "step": 32561 }, { "epoch": 0.9506875711658054, "grad_norm": 0.5262076111476479, "learning_rate": 2.739659367396594e-06, "loss": 0.5576, "step": 32562 }, { "epoch": 0.9507167673936527, "grad_norm": 0.5260796570122058, "learning_rate": 2.738037307380373e-06, "loss": 0.5642, "step": 32563 }, { "epoch": 0.9507459636215001, "grad_norm": 0.5314981807114071, "learning_rate": 2.7364152473641527e-06, "loss": 0.5806, "step": 32564 }, { "epoch": 0.9507751598493475, "grad_norm": 0.5213577198831649, "learning_rate": 2.734793187347932e-06, "loss": 0.5683, "step": 32565 }, { "epoch": 0.9508043560771948, "grad_norm": 0.5385487342117604, "learning_rate": 2.7331711273317116e-06, "loss": 0.569, "step": 32566 }, { "epoch": 0.9508335523050422, "grad_norm": 0.5816651270013913, "learning_rate": 2.731549067315491e-06, "loss": 0.6662, "step": 32567 }, { "epoch": 0.9508627485328895, "grad_norm": 0.5119607547155552, "learning_rate": 2.72992700729927e-06, "loss": 0.5123, "step": 32568 }, { "epoch": 0.9508919447607369, "grad_norm": 0.5210920000006694, "learning_rate": 2.7283049472830497e-06, "loss": 0.6038, "step": 32569 }, { "epoch": 0.9509211409885843, "grad_norm": 0.5137753560157873, "learning_rate": 2.726682887266829e-06, "loss": 0.5673, "step": 32570 }, { "epoch": 0.9509503372164316, "grad_norm": 0.526154791555929, "learning_rate": 2.7250608272506085e-06, "loss": 0.5737, "step": 32571 }, { "epoch": 0.950979533444279, "grad_norm": 0.5112760958832199, "learning_rate": 2.7234387672343877e-06, "loss": 0.54, "step": 32572 }, { "epoch": 0.9510087296721264, "grad_norm": 0.5509334510430275, "learning_rate": 2.7218167072181674e-06, "loss": 0.6094, "step": 32573 }, { "epoch": 0.9510379258999737, "grad_norm": 0.5181070411625426, "learning_rate": 2.7201946472019466e-06, "loss": 0.5643, "step": 32574 }, { "epoch": 0.9510671221278211, "grad_norm": 0.517855823069804, "learning_rate": 2.718572587185726e-06, "loss": 0.5565, "step": 32575 }, { "epoch": 0.9510963183556684, "grad_norm": 0.5521443171212895, "learning_rate": 2.7169505271695055e-06, "loss": 0.6105, "step": 32576 }, { "epoch": 0.9511255145835158, "grad_norm": 0.5135352033248914, "learning_rate": 2.7153284671532847e-06, "loss": 0.5161, "step": 32577 }, { "epoch": 0.9511547108113632, "grad_norm": 0.5505802758773359, "learning_rate": 2.7137064071370643e-06, "loss": 0.6139, "step": 32578 }, { "epoch": 0.9511839070392105, "grad_norm": 0.521465883189863, "learning_rate": 2.7120843471208436e-06, "loss": 0.5398, "step": 32579 }, { "epoch": 0.9512131032670579, "grad_norm": 0.5251643792653503, "learning_rate": 2.710462287104623e-06, "loss": 0.5354, "step": 32580 }, { "epoch": 0.9512422994949052, "grad_norm": 0.4939244163297748, "learning_rate": 2.7088402270884024e-06, "loss": 0.5097, "step": 32581 }, { "epoch": 0.9512714957227526, "grad_norm": 0.5186887457992096, "learning_rate": 2.7072181670721816e-06, "loss": 0.5731, "step": 32582 }, { "epoch": 0.9513006919506, "grad_norm": 0.5343319315345898, "learning_rate": 2.7055961070559613e-06, "loss": 0.5751, "step": 32583 }, { "epoch": 0.9513298881784473, "grad_norm": 0.5301090513934024, "learning_rate": 2.7039740470397405e-06, "loss": 0.5829, "step": 32584 }, { "epoch": 0.9513590844062947, "grad_norm": 0.540486082987458, "learning_rate": 2.70235198702352e-06, "loss": 0.5839, "step": 32585 }, { "epoch": 0.951388280634142, "grad_norm": 0.5504429924522934, "learning_rate": 2.7007299270072994e-06, "loss": 0.6101, "step": 32586 }, { "epoch": 0.9514174768619894, "grad_norm": 0.5247974709416382, "learning_rate": 2.699107866991079e-06, "loss": 0.5174, "step": 32587 }, { "epoch": 0.9514466730898368, "grad_norm": 0.526164568484457, "learning_rate": 2.6974858069748582e-06, "loss": 0.5784, "step": 32588 }, { "epoch": 0.9514758693176841, "grad_norm": 0.5478162356423706, "learning_rate": 2.6958637469586375e-06, "loss": 0.5947, "step": 32589 }, { "epoch": 0.9515050655455315, "grad_norm": 0.5241620243209116, "learning_rate": 2.694241686942417e-06, "loss": 0.5628, "step": 32590 }, { "epoch": 0.9515342617733789, "grad_norm": 0.5083140532825212, "learning_rate": 2.6926196269261963e-06, "loss": 0.5356, "step": 32591 }, { "epoch": 0.9515634580012262, "grad_norm": 0.569173012277036, "learning_rate": 2.690997566909976e-06, "loss": 0.6109, "step": 32592 }, { "epoch": 0.9515926542290736, "grad_norm": 0.5404692321321529, "learning_rate": 2.689375506893755e-06, "loss": 0.5969, "step": 32593 }, { "epoch": 0.9516218504569209, "grad_norm": 0.47888275005974545, "learning_rate": 2.6877534468775344e-06, "loss": 0.4871, "step": 32594 }, { "epoch": 0.9516510466847683, "grad_norm": 0.6501912746352199, "learning_rate": 2.686131386861314e-06, "loss": 0.6886, "step": 32595 }, { "epoch": 0.9516802429126157, "grad_norm": 0.5504520219131489, "learning_rate": 2.6845093268450933e-06, "loss": 0.6245, "step": 32596 }, { "epoch": 0.951709439140463, "grad_norm": 0.5011841578245586, "learning_rate": 2.682887266828873e-06, "loss": 0.5045, "step": 32597 }, { "epoch": 0.9517386353683104, "grad_norm": 0.5266666190980144, "learning_rate": 2.681265206812652e-06, "loss": 0.5878, "step": 32598 }, { "epoch": 0.9517678315961577, "grad_norm": 0.5190688377563532, "learning_rate": 2.679643146796432e-06, "loss": 0.5651, "step": 32599 }, { "epoch": 0.9517970278240051, "grad_norm": 0.556220215526132, "learning_rate": 2.678021086780211e-06, "loss": 0.5978, "step": 32600 }, { "epoch": 0.9518262240518525, "grad_norm": 0.5544569327533948, "learning_rate": 2.6763990267639902e-06, "loss": 0.565, "step": 32601 }, { "epoch": 0.9518554202796998, "grad_norm": 0.5150864190723003, "learning_rate": 2.67477696674777e-06, "loss": 0.5238, "step": 32602 }, { "epoch": 0.9518846165075472, "grad_norm": 0.5737359760436177, "learning_rate": 2.673154906731549e-06, "loss": 0.6712, "step": 32603 }, { "epoch": 0.9519138127353945, "grad_norm": 0.5527491128996814, "learning_rate": 2.6715328467153287e-06, "loss": 0.5821, "step": 32604 }, { "epoch": 0.9519430089632419, "grad_norm": 0.46293037097745515, "learning_rate": 2.669910786699108e-06, "loss": 0.4778, "step": 32605 }, { "epoch": 0.9519722051910893, "grad_norm": 0.5598908416232719, "learning_rate": 2.6682887266828876e-06, "loss": 0.6754, "step": 32606 }, { "epoch": 0.9520014014189366, "grad_norm": 0.5070579780219301, "learning_rate": 2.666666666666667e-06, "loss": 0.5423, "step": 32607 }, { "epoch": 0.952030597646784, "grad_norm": 0.5189316555235072, "learning_rate": 2.665044606650446e-06, "loss": 0.5028, "step": 32608 }, { "epoch": 0.9520597938746314, "grad_norm": 0.5199855749485814, "learning_rate": 2.6634225466342257e-06, "loss": 0.54, "step": 32609 }, { "epoch": 0.9520889901024787, "grad_norm": 0.5420609938320099, "learning_rate": 2.661800486618005e-06, "loss": 0.5964, "step": 32610 }, { "epoch": 0.9521181863303261, "grad_norm": 0.576970606600222, "learning_rate": 2.6601784266017846e-06, "loss": 0.638, "step": 32611 }, { "epoch": 0.9521473825581734, "grad_norm": 0.5778590268825776, "learning_rate": 2.6585563665855638e-06, "loss": 0.6367, "step": 32612 }, { "epoch": 0.9521765787860208, "grad_norm": 0.49740713004948683, "learning_rate": 2.6569343065693434e-06, "loss": 0.5308, "step": 32613 }, { "epoch": 0.9522057750138682, "grad_norm": 0.5757456610989864, "learning_rate": 2.6553122465531226e-06, "loss": 0.6777, "step": 32614 }, { "epoch": 0.9522349712417155, "grad_norm": 0.4601969398325656, "learning_rate": 2.653690186536902e-06, "loss": 0.4621, "step": 32615 }, { "epoch": 0.9522641674695629, "grad_norm": 0.5553912993185298, "learning_rate": 2.6520681265206815e-06, "loss": 0.6024, "step": 32616 }, { "epoch": 0.9522933636974102, "grad_norm": 0.5184254666936605, "learning_rate": 2.6504460665044607e-06, "loss": 0.5804, "step": 32617 }, { "epoch": 0.9523225599252576, "grad_norm": 0.5179060082126684, "learning_rate": 2.6488240064882404e-06, "loss": 0.581, "step": 32618 }, { "epoch": 0.952351756153105, "grad_norm": 0.5273969707902865, "learning_rate": 2.6472019464720196e-06, "loss": 0.5934, "step": 32619 }, { "epoch": 0.9523809523809523, "grad_norm": 0.49291595111503106, "learning_rate": 2.6455798864557992e-06, "loss": 0.5367, "step": 32620 }, { "epoch": 0.9524101486087997, "grad_norm": 0.507979905235826, "learning_rate": 2.6439578264395785e-06, "loss": 0.562, "step": 32621 }, { "epoch": 0.952439344836647, "grad_norm": 0.5644139509616123, "learning_rate": 2.6423357664233577e-06, "loss": 0.6255, "step": 32622 }, { "epoch": 0.9524685410644945, "grad_norm": 0.5996831388289792, "learning_rate": 2.6407137064071373e-06, "loss": 0.687, "step": 32623 }, { "epoch": 0.9524977372923419, "grad_norm": 0.48398592773285626, "learning_rate": 2.6390916463909165e-06, "loss": 0.4943, "step": 32624 }, { "epoch": 0.9525269335201892, "grad_norm": 0.5383561175849518, "learning_rate": 2.637469586374696e-06, "loss": 0.569, "step": 32625 }, { "epoch": 0.9525561297480366, "grad_norm": 0.5041564270871485, "learning_rate": 2.6358475263584754e-06, "loss": 0.4922, "step": 32626 }, { "epoch": 0.952585325975884, "grad_norm": 0.5651407684875076, "learning_rate": 2.6342254663422546e-06, "loss": 0.595, "step": 32627 }, { "epoch": 0.9526145222037313, "grad_norm": 0.5081577084219995, "learning_rate": 2.6326034063260343e-06, "loss": 0.5489, "step": 32628 }, { "epoch": 0.9526437184315787, "grad_norm": 0.5416266927755251, "learning_rate": 2.6309813463098135e-06, "loss": 0.5786, "step": 32629 }, { "epoch": 0.952672914659426, "grad_norm": 0.5388956006252951, "learning_rate": 2.629359286293593e-06, "loss": 0.5876, "step": 32630 }, { "epoch": 0.9527021108872734, "grad_norm": 0.5508349991539817, "learning_rate": 2.6277372262773724e-06, "loss": 0.5687, "step": 32631 }, { "epoch": 0.9527313071151208, "grad_norm": 0.5243360690683317, "learning_rate": 2.626115166261152e-06, "loss": 0.5893, "step": 32632 }, { "epoch": 0.9527605033429681, "grad_norm": 0.5118024802604647, "learning_rate": 2.6244931062449312e-06, "loss": 0.575, "step": 32633 }, { "epoch": 0.9527896995708155, "grad_norm": 0.5512801221725206, "learning_rate": 2.6228710462287105e-06, "loss": 0.6226, "step": 32634 }, { "epoch": 0.9528188957986629, "grad_norm": 0.5264077442029217, "learning_rate": 2.62124898621249e-06, "loss": 0.5808, "step": 32635 }, { "epoch": 0.9528480920265102, "grad_norm": 0.507063394358702, "learning_rate": 2.6196269261962693e-06, "loss": 0.5075, "step": 32636 }, { "epoch": 0.9528772882543576, "grad_norm": 0.5137379726679155, "learning_rate": 2.618004866180049e-06, "loss": 0.5271, "step": 32637 }, { "epoch": 0.9529064844822049, "grad_norm": 0.5161024364737057, "learning_rate": 2.616382806163828e-06, "loss": 0.5524, "step": 32638 }, { "epoch": 0.9529356807100523, "grad_norm": 0.5371064095958601, "learning_rate": 2.614760746147608e-06, "loss": 0.6214, "step": 32639 }, { "epoch": 0.9529648769378997, "grad_norm": 0.5287896412494413, "learning_rate": 2.6131386861313866e-06, "loss": 0.6053, "step": 32640 }, { "epoch": 0.952994073165747, "grad_norm": 0.5400631469632441, "learning_rate": 2.6115166261151663e-06, "loss": 0.6408, "step": 32641 }, { "epoch": 0.9530232693935944, "grad_norm": 0.5567891977414066, "learning_rate": 2.609894566098946e-06, "loss": 0.6699, "step": 32642 }, { "epoch": 0.9530524656214417, "grad_norm": 0.5371432613261378, "learning_rate": 2.608272506082725e-06, "loss": 0.6018, "step": 32643 }, { "epoch": 0.9530816618492891, "grad_norm": 0.516805415939006, "learning_rate": 2.6066504460665048e-06, "loss": 0.5397, "step": 32644 }, { "epoch": 0.9531108580771365, "grad_norm": 0.5353980159486555, "learning_rate": 2.605028386050284e-06, "loss": 0.6082, "step": 32645 }, { "epoch": 0.9531400543049838, "grad_norm": 0.523282962484272, "learning_rate": 2.6034063260340636e-06, "loss": 0.5774, "step": 32646 }, { "epoch": 0.9531692505328312, "grad_norm": 0.4832554213370634, "learning_rate": 2.6017842660178424e-06, "loss": 0.4689, "step": 32647 }, { "epoch": 0.9531984467606786, "grad_norm": 0.5461654323922719, "learning_rate": 2.600162206001622e-06, "loss": 0.5949, "step": 32648 }, { "epoch": 0.9532276429885259, "grad_norm": 0.5405597681706515, "learning_rate": 2.5985401459854017e-06, "loss": 0.5938, "step": 32649 }, { "epoch": 0.9532568392163733, "grad_norm": 0.5310289211361735, "learning_rate": 2.596918085969181e-06, "loss": 0.5656, "step": 32650 }, { "epoch": 0.9532860354442206, "grad_norm": 0.5288208032599031, "learning_rate": 2.5952960259529606e-06, "loss": 0.5888, "step": 32651 }, { "epoch": 0.953315231672068, "grad_norm": 0.5268487691122642, "learning_rate": 2.59367396593674e-06, "loss": 0.5793, "step": 32652 }, { "epoch": 0.9533444278999154, "grad_norm": 0.490191489448905, "learning_rate": 2.592051905920519e-06, "loss": 0.4962, "step": 32653 }, { "epoch": 0.9533736241277627, "grad_norm": 0.5087232065998905, "learning_rate": 2.5904298459042983e-06, "loss": 0.5279, "step": 32654 }, { "epoch": 0.9534028203556101, "grad_norm": 0.526863425461259, "learning_rate": 2.588807785888078e-06, "loss": 0.6004, "step": 32655 }, { "epoch": 0.9534320165834574, "grad_norm": 0.5184237264205744, "learning_rate": 2.5871857258718575e-06, "loss": 0.5665, "step": 32656 }, { "epoch": 0.9534612128113048, "grad_norm": 0.5075019848400815, "learning_rate": 2.5855636658556368e-06, "loss": 0.541, "step": 32657 }, { "epoch": 0.9534904090391522, "grad_norm": 0.5555928477257709, "learning_rate": 2.5839416058394164e-06, "loss": 0.634, "step": 32658 }, { "epoch": 0.9535196052669995, "grad_norm": 0.5091842713178963, "learning_rate": 2.5823195458231956e-06, "loss": 0.5815, "step": 32659 }, { "epoch": 0.9535488014948469, "grad_norm": 0.5646784753315568, "learning_rate": 2.580697485806975e-06, "loss": 0.6863, "step": 32660 }, { "epoch": 0.9535779977226942, "grad_norm": 0.5205516420398195, "learning_rate": 2.579075425790754e-06, "loss": 0.5523, "step": 32661 }, { "epoch": 0.9536071939505416, "grad_norm": 0.5076414178498674, "learning_rate": 2.5774533657745337e-06, "loss": 0.5525, "step": 32662 }, { "epoch": 0.953636390178389, "grad_norm": 0.4906640846249684, "learning_rate": 2.5758313057583134e-06, "loss": 0.5244, "step": 32663 }, { "epoch": 0.9536655864062363, "grad_norm": 0.4927035409019706, "learning_rate": 2.5742092457420926e-06, "loss": 0.542, "step": 32664 }, { "epoch": 0.9536947826340837, "grad_norm": 0.4997952978071237, "learning_rate": 2.5725871857258722e-06, "loss": 0.5049, "step": 32665 }, { "epoch": 0.953723978861931, "grad_norm": 0.5351271557564169, "learning_rate": 2.5709651257096515e-06, "loss": 0.5874, "step": 32666 }, { "epoch": 0.9537531750897784, "grad_norm": 0.49549579928411086, "learning_rate": 2.5693430656934307e-06, "loss": 0.5159, "step": 32667 }, { "epoch": 0.9537823713176258, "grad_norm": 0.5373694432985096, "learning_rate": 2.5677210056772103e-06, "loss": 0.5768, "step": 32668 }, { "epoch": 0.9538115675454731, "grad_norm": 0.5074739500449418, "learning_rate": 2.5660989456609895e-06, "loss": 0.5135, "step": 32669 }, { "epoch": 0.9538407637733205, "grad_norm": 0.5192061221657839, "learning_rate": 2.564476885644769e-06, "loss": 0.5689, "step": 32670 }, { "epoch": 0.9538699600011679, "grad_norm": 0.5387299254276674, "learning_rate": 2.5628548256285484e-06, "loss": 0.634, "step": 32671 }, { "epoch": 0.9538991562290152, "grad_norm": 0.5656610343528606, "learning_rate": 2.561232765612328e-06, "loss": 0.6141, "step": 32672 }, { "epoch": 0.9539283524568626, "grad_norm": 0.5387579531791848, "learning_rate": 2.559610705596107e-06, "loss": 0.6161, "step": 32673 }, { "epoch": 0.9539575486847099, "grad_norm": 0.5562100284271403, "learning_rate": 2.5579886455798865e-06, "loss": 0.5411, "step": 32674 }, { "epoch": 0.9539867449125573, "grad_norm": 0.5467484331970549, "learning_rate": 2.556366585563666e-06, "loss": 0.6143, "step": 32675 }, { "epoch": 0.9540159411404047, "grad_norm": 0.5538719345414284, "learning_rate": 2.5547445255474454e-06, "loss": 0.6178, "step": 32676 }, { "epoch": 0.954045137368252, "grad_norm": 0.5266046336346552, "learning_rate": 2.553122465531225e-06, "loss": 0.5489, "step": 32677 }, { "epoch": 0.9540743335960994, "grad_norm": 0.5545014864213328, "learning_rate": 2.5515004055150042e-06, "loss": 0.6269, "step": 32678 }, { "epoch": 0.9541035298239467, "grad_norm": 0.5160325188048495, "learning_rate": 2.549878345498784e-06, "loss": 0.5444, "step": 32679 }, { "epoch": 0.9541327260517941, "grad_norm": 0.5614378451348025, "learning_rate": 2.5482562854825627e-06, "loss": 0.6333, "step": 32680 }, { "epoch": 0.9541619222796415, "grad_norm": 0.5415574892804217, "learning_rate": 2.5466342254663423e-06, "loss": 0.6338, "step": 32681 }, { "epoch": 0.9541911185074888, "grad_norm": 0.4943699465822598, "learning_rate": 2.545012165450122e-06, "loss": 0.4866, "step": 32682 }, { "epoch": 0.9542203147353362, "grad_norm": 0.5353094493232027, "learning_rate": 2.543390105433901e-06, "loss": 0.5999, "step": 32683 }, { "epoch": 0.9542495109631836, "grad_norm": 0.538094607858055, "learning_rate": 2.541768045417681e-06, "loss": 0.595, "step": 32684 }, { "epoch": 0.9542787071910309, "grad_norm": 0.5161697226020925, "learning_rate": 2.54014598540146e-06, "loss": 0.5523, "step": 32685 }, { "epoch": 0.9543079034188783, "grad_norm": 0.5972843642413874, "learning_rate": 2.5385239253852393e-06, "loss": 0.6414, "step": 32686 }, { "epoch": 0.9543370996467256, "grad_norm": 0.5370530877621588, "learning_rate": 2.5369018653690185e-06, "loss": 0.6154, "step": 32687 }, { "epoch": 0.954366295874573, "grad_norm": 0.485905810048327, "learning_rate": 2.535279805352798e-06, "loss": 0.5148, "step": 32688 }, { "epoch": 0.9543954921024204, "grad_norm": 0.53878359786557, "learning_rate": 2.5336577453365778e-06, "loss": 0.5539, "step": 32689 }, { "epoch": 0.9544246883302677, "grad_norm": 0.4734476745425464, "learning_rate": 2.532035685320357e-06, "loss": 0.4985, "step": 32690 }, { "epoch": 0.9544538845581151, "grad_norm": 0.5448805191211655, "learning_rate": 2.5304136253041366e-06, "loss": 0.6153, "step": 32691 }, { "epoch": 0.9544830807859624, "grad_norm": 0.5087873335130684, "learning_rate": 2.528791565287916e-06, "loss": 0.4908, "step": 32692 }, { "epoch": 0.9545122770138098, "grad_norm": 0.5103295323973197, "learning_rate": 2.527169505271695e-06, "loss": 0.5357, "step": 32693 }, { "epoch": 0.9545414732416572, "grad_norm": 0.5135829647153476, "learning_rate": 2.5255474452554743e-06, "loss": 0.5652, "step": 32694 }, { "epoch": 0.9545706694695045, "grad_norm": 0.5081289207778992, "learning_rate": 2.523925385239254e-06, "loss": 0.5308, "step": 32695 }, { "epoch": 0.9545998656973519, "grad_norm": 0.5195154794205742, "learning_rate": 2.5223033252230336e-06, "loss": 0.5459, "step": 32696 }, { "epoch": 0.9546290619251993, "grad_norm": 0.5402364311851489, "learning_rate": 2.520681265206813e-06, "loss": 0.6135, "step": 32697 }, { "epoch": 0.9546582581530466, "grad_norm": 0.5205680289264776, "learning_rate": 2.5190592051905925e-06, "loss": 0.5759, "step": 32698 }, { "epoch": 0.954687454380894, "grad_norm": 0.49595284166161613, "learning_rate": 2.5174371451743712e-06, "loss": 0.4981, "step": 32699 }, { "epoch": 0.9547166506087413, "grad_norm": 0.564515596573343, "learning_rate": 2.515815085158151e-06, "loss": 0.6734, "step": 32700 }, { "epoch": 0.9547458468365887, "grad_norm": 0.49138250050988774, "learning_rate": 2.51419302514193e-06, "loss": 0.4924, "step": 32701 }, { "epoch": 0.9547750430644361, "grad_norm": 0.5449809234587589, "learning_rate": 2.5125709651257098e-06, "loss": 0.6302, "step": 32702 }, { "epoch": 0.9548042392922834, "grad_norm": 0.5330968696127627, "learning_rate": 2.5109489051094894e-06, "loss": 0.594, "step": 32703 }, { "epoch": 0.9548334355201308, "grad_norm": 0.5365713459659578, "learning_rate": 2.5093268450932686e-06, "loss": 0.5733, "step": 32704 }, { "epoch": 0.9548626317479781, "grad_norm": 0.5378162480518569, "learning_rate": 2.5077047850770483e-06, "loss": 0.5827, "step": 32705 }, { "epoch": 0.9548918279758255, "grad_norm": 0.5803307468183717, "learning_rate": 2.506082725060827e-06, "loss": 0.6858, "step": 32706 }, { "epoch": 0.9549210242036729, "grad_norm": 0.5037340443563822, "learning_rate": 2.5044606650446067e-06, "loss": 0.5787, "step": 32707 }, { "epoch": 0.9549502204315202, "grad_norm": 0.5022737331278014, "learning_rate": 2.502838605028386e-06, "loss": 0.5241, "step": 32708 }, { "epoch": 0.9549794166593676, "grad_norm": 0.48731395791913684, "learning_rate": 2.5012165450121656e-06, "loss": 0.4885, "step": 32709 }, { "epoch": 0.955008612887215, "grad_norm": 0.5162252252072259, "learning_rate": 2.4995944849959452e-06, "loss": 0.5565, "step": 32710 }, { "epoch": 0.9550378091150623, "grad_norm": 0.4860064217206235, "learning_rate": 2.4979724249797244e-06, "loss": 0.5207, "step": 32711 }, { "epoch": 0.9550670053429097, "grad_norm": 0.5125102299920582, "learning_rate": 2.496350364963504e-06, "loss": 0.5191, "step": 32712 }, { "epoch": 0.955096201570757, "grad_norm": 0.5103955932002507, "learning_rate": 2.494728304947283e-06, "loss": 0.5485, "step": 32713 }, { "epoch": 0.9551253977986044, "grad_norm": 0.5041429715418496, "learning_rate": 2.4931062449310625e-06, "loss": 0.5656, "step": 32714 }, { "epoch": 0.9551545940264518, "grad_norm": 0.5645425310680722, "learning_rate": 2.4914841849148417e-06, "loss": 0.6724, "step": 32715 }, { "epoch": 0.9551837902542991, "grad_norm": 0.546746061123752, "learning_rate": 2.4898621248986214e-06, "loss": 0.5957, "step": 32716 }, { "epoch": 0.9552129864821465, "grad_norm": 0.6982656459043303, "learning_rate": 2.488240064882401e-06, "loss": 0.6096, "step": 32717 }, { "epoch": 0.9552421827099938, "grad_norm": 0.532093943601525, "learning_rate": 2.4866180048661803e-06, "loss": 0.5909, "step": 32718 }, { "epoch": 0.9552713789378412, "grad_norm": 0.5309080879817273, "learning_rate": 2.4849959448499595e-06, "loss": 0.5652, "step": 32719 }, { "epoch": 0.9553005751656886, "grad_norm": 0.595197266148657, "learning_rate": 2.4833738848337387e-06, "loss": 0.613, "step": 32720 }, { "epoch": 0.9553297713935359, "grad_norm": 0.5593025885190274, "learning_rate": 2.4817518248175183e-06, "loss": 0.6101, "step": 32721 }, { "epoch": 0.9553589676213833, "grad_norm": 0.5214656666929635, "learning_rate": 2.4801297648012976e-06, "loss": 0.5483, "step": 32722 }, { "epoch": 0.9553881638492306, "grad_norm": 0.5273819046271684, "learning_rate": 2.478507704785077e-06, "loss": 0.5578, "step": 32723 }, { "epoch": 0.955417360077078, "grad_norm": 0.5155099663656031, "learning_rate": 2.476885644768857e-06, "loss": 0.5635, "step": 32724 }, { "epoch": 0.9554465563049254, "grad_norm": 0.517169670000625, "learning_rate": 2.475263584752636e-06, "loss": 0.5277, "step": 32725 }, { "epoch": 0.9554757525327727, "grad_norm": 0.537287831256997, "learning_rate": 2.4736415247364153e-06, "loss": 0.5539, "step": 32726 }, { "epoch": 0.9555049487606201, "grad_norm": 0.4998220141366594, "learning_rate": 2.4720194647201945e-06, "loss": 0.4995, "step": 32727 }, { "epoch": 0.9555341449884674, "grad_norm": 0.5503141098225452, "learning_rate": 2.470397404703974e-06, "loss": 0.6327, "step": 32728 }, { "epoch": 0.9555633412163148, "grad_norm": 0.5200480170843644, "learning_rate": 2.4687753446877534e-06, "loss": 0.5403, "step": 32729 }, { "epoch": 0.9555925374441622, "grad_norm": 0.5349186599096858, "learning_rate": 2.467153284671533e-06, "loss": 0.6046, "step": 32730 }, { "epoch": 0.9556217336720095, "grad_norm": 0.5345604152645673, "learning_rate": 2.4655312246553127e-06, "loss": 0.5749, "step": 32731 }, { "epoch": 0.9556509298998569, "grad_norm": 0.5172173746659293, "learning_rate": 2.4639091646390915e-06, "loss": 0.5185, "step": 32732 }, { "epoch": 0.9556801261277043, "grad_norm": 0.5547727582599723, "learning_rate": 2.462287104622871e-06, "loss": 0.5946, "step": 32733 }, { "epoch": 0.9557093223555516, "grad_norm": 0.49654638539487567, "learning_rate": 2.4606650446066503e-06, "loss": 0.528, "step": 32734 }, { "epoch": 0.955738518583399, "grad_norm": 0.5024810316237872, "learning_rate": 2.45904298459043e-06, "loss": 0.5119, "step": 32735 }, { "epoch": 0.9557677148112463, "grad_norm": 0.5100516065284323, "learning_rate": 2.4574209245742096e-06, "loss": 0.5575, "step": 32736 }, { "epoch": 0.9557969110390937, "grad_norm": 0.5491753242128393, "learning_rate": 2.455798864557989e-06, "loss": 0.6218, "step": 32737 }, { "epoch": 0.9558261072669411, "grad_norm": 0.5241517404164359, "learning_rate": 2.4541768045417685e-06, "loss": 0.5264, "step": 32738 }, { "epoch": 0.9558553034947884, "grad_norm": 0.537252203529285, "learning_rate": 2.4525547445255473e-06, "loss": 0.5837, "step": 32739 }, { "epoch": 0.9558844997226358, "grad_norm": 0.5699791696461245, "learning_rate": 2.450932684509327e-06, "loss": 0.5717, "step": 32740 }, { "epoch": 0.9559136959504831, "grad_norm": 0.5605059444724833, "learning_rate": 2.449310624493106e-06, "loss": 0.5985, "step": 32741 }, { "epoch": 0.9559428921783305, "grad_norm": 0.5012680063295667, "learning_rate": 2.447688564476886e-06, "loss": 0.5332, "step": 32742 }, { "epoch": 0.9559720884061779, "grad_norm": 0.5064422735019658, "learning_rate": 2.4460665044606654e-06, "loss": 0.5263, "step": 32743 }, { "epoch": 0.9560012846340253, "grad_norm": 0.518765105730802, "learning_rate": 2.4444444444444447e-06, "loss": 0.5628, "step": 32744 }, { "epoch": 0.9560304808618727, "grad_norm": 0.5506492165779658, "learning_rate": 2.442822384428224e-06, "loss": 0.5999, "step": 32745 }, { "epoch": 0.9560596770897201, "grad_norm": 0.563257475782768, "learning_rate": 2.441200324412003e-06, "loss": 0.6172, "step": 32746 }, { "epoch": 0.9560888733175674, "grad_norm": 0.5320040705598106, "learning_rate": 2.4395782643957827e-06, "loss": 0.5771, "step": 32747 }, { "epoch": 0.9561180695454148, "grad_norm": 0.5120314806108809, "learning_rate": 2.437956204379562e-06, "loss": 0.5651, "step": 32748 }, { "epoch": 0.9561472657732621, "grad_norm": 0.5795979044202879, "learning_rate": 2.4363341443633416e-06, "loss": 0.6377, "step": 32749 }, { "epoch": 0.9561764620011095, "grad_norm": 0.5435676619272742, "learning_rate": 2.4347120843471213e-06, "loss": 0.5744, "step": 32750 }, { "epoch": 0.9562056582289569, "grad_norm": 0.5273840447196179, "learning_rate": 2.4330900243309005e-06, "loss": 0.4876, "step": 32751 }, { "epoch": 0.9562348544568042, "grad_norm": 0.5354153524908568, "learning_rate": 2.4314679643146797e-06, "loss": 0.6076, "step": 32752 }, { "epoch": 0.9562640506846516, "grad_norm": 0.5658801659012077, "learning_rate": 2.429845904298459e-06, "loss": 0.6264, "step": 32753 }, { "epoch": 0.956293246912499, "grad_norm": 0.5416992686705543, "learning_rate": 2.4282238442822386e-06, "loss": 0.6221, "step": 32754 }, { "epoch": 0.9563224431403463, "grad_norm": 0.5431816970729022, "learning_rate": 2.4266017842660178e-06, "loss": 0.607, "step": 32755 }, { "epoch": 0.9563516393681937, "grad_norm": 0.5689497388526715, "learning_rate": 2.4249797242497974e-06, "loss": 0.6606, "step": 32756 }, { "epoch": 0.956380835596041, "grad_norm": 0.5448095887529615, "learning_rate": 2.423357664233577e-06, "loss": 0.5977, "step": 32757 }, { "epoch": 0.9564100318238884, "grad_norm": 0.534012680257048, "learning_rate": 2.421735604217356e-06, "loss": 0.5924, "step": 32758 }, { "epoch": 0.9564392280517358, "grad_norm": 0.5939363851630693, "learning_rate": 2.4201135442011355e-06, "loss": 0.7224, "step": 32759 }, { "epoch": 0.9564684242795831, "grad_norm": 0.48472852313752096, "learning_rate": 2.4184914841849147e-06, "loss": 0.4938, "step": 32760 }, { "epoch": 0.9564976205074305, "grad_norm": 0.6108379079099606, "learning_rate": 2.4168694241686944e-06, "loss": 0.7302, "step": 32761 }, { "epoch": 0.9565268167352778, "grad_norm": 0.4764764421659952, "learning_rate": 2.4152473641524736e-06, "loss": 0.4728, "step": 32762 }, { "epoch": 0.9565560129631252, "grad_norm": 0.5432769936195836, "learning_rate": 2.4136253041362532e-06, "loss": 0.586, "step": 32763 }, { "epoch": 0.9565852091909726, "grad_norm": 0.5299755505450126, "learning_rate": 2.412003244120033e-06, "loss": 0.6127, "step": 32764 }, { "epoch": 0.9566144054188199, "grad_norm": 0.5717449545838551, "learning_rate": 2.4103811841038117e-06, "loss": 0.5965, "step": 32765 }, { "epoch": 0.9566436016466673, "grad_norm": 0.5376142029423603, "learning_rate": 2.4087591240875913e-06, "loss": 0.5655, "step": 32766 }, { "epoch": 0.9566727978745146, "grad_norm": 0.5201836509264889, "learning_rate": 2.4071370640713706e-06, "loss": 0.5754, "step": 32767 }, { "epoch": 0.956701994102362, "grad_norm": 0.514055185274686, "learning_rate": 2.40551500405515e-06, "loss": 0.5419, "step": 32768 }, { "epoch": 0.9567311903302094, "grad_norm": 0.5482534698624784, "learning_rate": 2.4038929440389294e-06, "loss": 0.6397, "step": 32769 }, { "epoch": 0.9567603865580567, "grad_norm": 0.5537655199825946, "learning_rate": 2.402270884022709e-06, "loss": 0.6166, "step": 32770 }, { "epoch": 0.9567895827859041, "grad_norm": 0.5241320422496898, "learning_rate": 2.4006488240064887e-06, "loss": 0.5866, "step": 32771 }, { "epoch": 0.9568187790137515, "grad_norm": 0.5166840186820866, "learning_rate": 2.3990267639902675e-06, "loss": 0.5394, "step": 32772 }, { "epoch": 0.9568479752415988, "grad_norm": 0.48711934664944884, "learning_rate": 2.397404703974047e-06, "loss": 0.5027, "step": 32773 }, { "epoch": 0.9568771714694462, "grad_norm": 0.5131690574230057, "learning_rate": 2.3957826439578264e-06, "loss": 0.5557, "step": 32774 }, { "epoch": 0.9569063676972935, "grad_norm": 0.5561248549328006, "learning_rate": 2.394160583941606e-06, "loss": 0.6549, "step": 32775 }, { "epoch": 0.9569355639251409, "grad_norm": 0.5515766193470582, "learning_rate": 2.3925385239253852e-06, "loss": 0.6242, "step": 32776 }, { "epoch": 0.9569647601529883, "grad_norm": 0.48885481137908504, "learning_rate": 2.390916463909165e-06, "loss": 0.5158, "step": 32777 }, { "epoch": 0.9569939563808356, "grad_norm": 0.5316072745228616, "learning_rate": 2.389294403892944e-06, "loss": 0.5907, "step": 32778 }, { "epoch": 0.957023152608683, "grad_norm": 0.5018881649253728, "learning_rate": 2.3876723438767233e-06, "loss": 0.537, "step": 32779 }, { "epoch": 0.9570523488365303, "grad_norm": 0.5342752919772888, "learning_rate": 2.386050283860503e-06, "loss": 0.5787, "step": 32780 }, { "epoch": 0.9570815450643777, "grad_norm": 0.5537081201305079, "learning_rate": 2.384428223844282e-06, "loss": 0.6198, "step": 32781 }, { "epoch": 0.9571107412922251, "grad_norm": 0.516807498474833, "learning_rate": 2.382806163828062e-06, "loss": 0.5493, "step": 32782 }, { "epoch": 0.9571399375200724, "grad_norm": 0.5795930575373579, "learning_rate": 2.381184103811841e-06, "loss": 0.5816, "step": 32783 }, { "epoch": 0.9571691337479198, "grad_norm": 0.5253655751123769, "learning_rate": 2.3795620437956207e-06, "loss": 0.5538, "step": 32784 }, { "epoch": 0.9571983299757671, "grad_norm": 0.5139366397522815, "learning_rate": 2.3779399837794e-06, "loss": 0.5666, "step": 32785 }, { "epoch": 0.9572275262036145, "grad_norm": 0.5084628190846755, "learning_rate": 2.376317923763179e-06, "loss": 0.5507, "step": 32786 }, { "epoch": 0.9572567224314619, "grad_norm": 0.5303003936222503, "learning_rate": 2.3746958637469588e-06, "loss": 0.6007, "step": 32787 }, { "epoch": 0.9572859186593092, "grad_norm": 0.5400646190030746, "learning_rate": 2.373073803730738e-06, "loss": 0.5394, "step": 32788 }, { "epoch": 0.9573151148871566, "grad_norm": 0.5128181870724536, "learning_rate": 2.3714517437145177e-06, "loss": 0.5219, "step": 32789 }, { "epoch": 0.957344311115004, "grad_norm": 0.549780336177799, "learning_rate": 2.369829683698297e-06, "loss": 0.6152, "step": 32790 }, { "epoch": 0.9573735073428513, "grad_norm": 0.546558649669787, "learning_rate": 2.368207623682076e-06, "loss": 0.6053, "step": 32791 }, { "epoch": 0.9574027035706987, "grad_norm": 0.5590123687813622, "learning_rate": 2.3665855636658557e-06, "loss": 0.6492, "step": 32792 }, { "epoch": 0.957431899798546, "grad_norm": 0.5102560441800265, "learning_rate": 2.364963503649635e-06, "loss": 0.4948, "step": 32793 }, { "epoch": 0.9574610960263934, "grad_norm": 0.5690918424033008, "learning_rate": 2.3633414436334146e-06, "loss": 0.6594, "step": 32794 }, { "epoch": 0.9574902922542408, "grad_norm": 0.554018511464546, "learning_rate": 2.361719383617194e-06, "loss": 0.5695, "step": 32795 }, { "epoch": 0.9575194884820881, "grad_norm": 0.5063607419650421, "learning_rate": 2.3600973236009735e-06, "loss": 0.5344, "step": 32796 }, { "epoch": 0.9575486847099355, "grad_norm": 0.5706320857046489, "learning_rate": 2.358475263584753e-06, "loss": 0.5952, "step": 32797 }, { "epoch": 0.9575778809377828, "grad_norm": 0.49591617342434074, "learning_rate": 2.356853203568532e-06, "loss": 0.5277, "step": 32798 }, { "epoch": 0.9576070771656302, "grad_norm": 0.5537506639652289, "learning_rate": 2.3552311435523116e-06, "loss": 0.6281, "step": 32799 }, { "epoch": 0.9576362733934776, "grad_norm": 0.5221252029910224, "learning_rate": 2.3536090835360908e-06, "loss": 0.5478, "step": 32800 }, { "epoch": 0.9576654696213249, "grad_norm": 0.5035349780291715, "learning_rate": 2.3519870235198704e-06, "loss": 0.5148, "step": 32801 }, { "epoch": 0.9576946658491723, "grad_norm": 0.5434302088353904, "learning_rate": 2.3503649635036496e-06, "loss": 0.5678, "step": 32802 }, { "epoch": 0.9577238620770196, "grad_norm": 0.5266786192584463, "learning_rate": 2.3487429034874293e-06, "loss": 0.5657, "step": 32803 }, { "epoch": 0.957753058304867, "grad_norm": 0.5300903426147303, "learning_rate": 2.3471208434712085e-06, "loss": 0.6033, "step": 32804 }, { "epoch": 0.9577822545327144, "grad_norm": 0.5467620727489616, "learning_rate": 2.3454987834549877e-06, "loss": 0.6264, "step": 32805 }, { "epoch": 0.9578114507605617, "grad_norm": 0.5384116445541814, "learning_rate": 2.3438767234387674e-06, "loss": 0.5936, "step": 32806 }, { "epoch": 0.9578406469884091, "grad_norm": 0.5205527907950438, "learning_rate": 2.3422546634225466e-06, "loss": 0.5899, "step": 32807 }, { "epoch": 0.9578698432162565, "grad_norm": 0.5333277360347158, "learning_rate": 2.3406326034063262e-06, "loss": 0.5537, "step": 32808 }, { "epoch": 0.9578990394441038, "grad_norm": 0.5269572599641251, "learning_rate": 2.3390105433901055e-06, "loss": 0.5367, "step": 32809 }, { "epoch": 0.9579282356719512, "grad_norm": 0.5079075052702007, "learning_rate": 2.337388483373885e-06, "loss": 0.5457, "step": 32810 }, { "epoch": 0.9579574318997985, "grad_norm": 0.53233917338594, "learning_rate": 2.3357664233576643e-06, "loss": 0.5709, "step": 32811 }, { "epoch": 0.9579866281276459, "grad_norm": 0.540781148757422, "learning_rate": 2.3341443633414435e-06, "loss": 0.5984, "step": 32812 }, { "epoch": 0.9580158243554933, "grad_norm": 0.6042723324890004, "learning_rate": 2.332522303325223e-06, "loss": 0.6561, "step": 32813 }, { "epoch": 0.9580450205833406, "grad_norm": 0.4983282271203591, "learning_rate": 2.3309002433090024e-06, "loss": 0.5226, "step": 32814 }, { "epoch": 0.958074216811188, "grad_norm": 0.5211951199502871, "learning_rate": 2.329278183292782e-06, "loss": 0.5521, "step": 32815 }, { "epoch": 0.9581034130390353, "grad_norm": 0.5757651338238139, "learning_rate": 2.3276561232765613e-06, "loss": 0.6297, "step": 32816 }, { "epoch": 0.9581326092668827, "grad_norm": 0.5395564542385956, "learning_rate": 2.326034063260341e-06, "loss": 0.5594, "step": 32817 }, { "epoch": 0.9581618054947301, "grad_norm": 0.49988069634820387, "learning_rate": 2.32441200324412e-06, "loss": 0.535, "step": 32818 }, { "epoch": 0.9581910017225774, "grad_norm": 0.5175038075155683, "learning_rate": 2.3227899432278994e-06, "loss": 0.5786, "step": 32819 }, { "epoch": 0.9582201979504248, "grad_norm": 0.507285732266436, "learning_rate": 2.321167883211679e-06, "loss": 0.5342, "step": 32820 }, { "epoch": 0.9582493941782722, "grad_norm": 0.5266691605626971, "learning_rate": 2.3195458231954582e-06, "loss": 0.5698, "step": 32821 }, { "epoch": 0.9582785904061195, "grad_norm": 0.5914754393727976, "learning_rate": 2.317923763179238e-06, "loss": 0.6425, "step": 32822 }, { "epoch": 0.9583077866339669, "grad_norm": 0.5563644479712371, "learning_rate": 2.316301703163017e-06, "loss": 0.6011, "step": 32823 }, { "epoch": 0.9583369828618142, "grad_norm": 0.5278133328162217, "learning_rate": 2.3146796431467963e-06, "loss": 0.5406, "step": 32824 }, { "epoch": 0.9583661790896616, "grad_norm": 0.5196205410486262, "learning_rate": 2.313057583130576e-06, "loss": 0.5836, "step": 32825 }, { "epoch": 0.958395375317509, "grad_norm": 0.48258756031127165, "learning_rate": 2.311435523114355e-06, "loss": 0.4963, "step": 32826 }, { "epoch": 0.9584245715453563, "grad_norm": 0.49726026885872676, "learning_rate": 2.309813463098135e-06, "loss": 0.5142, "step": 32827 }, { "epoch": 0.9584537677732037, "grad_norm": 0.49984851084803456, "learning_rate": 2.308191403081914e-06, "loss": 0.4779, "step": 32828 }, { "epoch": 0.958482964001051, "grad_norm": 0.5280133604903449, "learning_rate": 2.3065693430656937e-06, "loss": 0.5155, "step": 32829 }, { "epoch": 0.9585121602288984, "grad_norm": 0.5255314524665741, "learning_rate": 2.304947283049473e-06, "loss": 0.533, "step": 32830 }, { "epoch": 0.9585413564567458, "grad_norm": 0.5424273571378304, "learning_rate": 2.303325223033252e-06, "loss": 0.5747, "step": 32831 }, { "epoch": 0.9585705526845931, "grad_norm": 0.5690992251824976, "learning_rate": 2.3017031630170318e-06, "loss": 0.6653, "step": 32832 }, { "epoch": 0.9585997489124405, "grad_norm": 0.5208499950697127, "learning_rate": 2.300081103000811e-06, "loss": 0.5379, "step": 32833 }, { "epoch": 0.9586289451402878, "grad_norm": 0.4964014466052348, "learning_rate": 2.2984590429845906e-06, "loss": 0.5114, "step": 32834 }, { "epoch": 0.9586581413681352, "grad_norm": 0.5765560411324056, "learning_rate": 2.29683698296837e-06, "loss": 0.6606, "step": 32835 }, { "epoch": 0.9586873375959826, "grad_norm": 0.5206989758151946, "learning_rate": 2.2952149229521495e-06, "loss": 0.5693, "step": 32836 }, { "epoch": 0.9587165338238299, "grad_norm": 0.5446489987593981, "learning_rate": 2.2935928629359287e-06, "loss": 0.555, "step": 32837 }, { "epoch": 0.9587457300516773, "grad_norm": 0.499246303146543, "learning_rate": 2.291970802919708e-06, "loss": 0.5259, "step": 32838 }, { "epoch": 0.9587749262795247, "grad_norm": 0.5210880684086394, "learning_rate": 2.2903487429034876e-06, "loss": 0.5279, "step": 32839 }, { "epoch": 0.958804122507372, "grad_norm": 0.5159347811668806, "learning_rate": 2.288726682887267e-06, "loss": 0.5541, "step": 32840 }, { "epoch": 0.9588333187352194, "grad_norm": 0.5369432699066965, "learning_rate": 2.2871046228710465e-06, "loss": 0.5845, "step": 32841 }, { "epoch": 0.9588625149630667, "grad_norm": 0.5462655965517268, "learning_rate": 2.2854825628548257e-06, "loss": 0.5796, "step": 32842 }, { "epoch": 0.9588917111909141, "grad_norm": 0.5180859006501575, "learning_rate": 2.2838605028386053e-06, "loss": 0.5391, "step": 32843 }, { "epoch": 0.9589209074187615, "grad_norm": 0.550475280846525, "learning_rate": 2.2822384428223845e-06, "loss": 0.6315, "step": 32844 }, { "epoch": 0.9589501036466088, "grad_norm": 0.5179492604881721, "learning_rate": 2.2806163828061638e-06, "loss": 0.5338, "step": 32845 }, { "epoch": 0.9589792998744562, "grad_norm": 0.5249519349561723, "learning_rate": 2.2789943227899434e-06, "loss": 0.5974, "step": 32846 }, { "epoch": 0.9590084961023035, "grad_norm": 0.5092219616594963, "learning_rate": 2.2773722627737226e-06, "loss": 0.555, "step": 32847 }, { "epoch": 0.9590376923301509, "grad_norm": 0.5280439628709741, "learning_rate": 2.2757502027575023e-06, "loss": 0.5992, "step": 32848 }, { "epoch": 0.9590668885579983, "grad_norm": 0.5585138086177908, "learning_rate": 2.2741281427412815e-06, "loss": 0.6202, "step": 32849 }, { "epoch": 0.9590960847858456, "grad_norm": 0.49298403741057994, "learning_rate": 2.2725060827250607e-06, "loss": 0.544, "step": 32850 }, { "epoch": 0.959125281013693, "grad_norm": 0.5522828826623225, "learning_rate": 2.2708840227088404e-06, "loss": 0.6755, "step": 32851 }, { "epoch": 0.9591544772415403, "grad_norm": 0.542187627732403, "learning_rate": 2.2692619626926196e-06, "loss": 0.607, "step": 32852 }, { "epoch": 0.9591836734693877, "grad_norm": 0.6357869622804523, "learning_rate": 2.2676399026763992e-06, "loss": 0.6201, "step": 32853 }, { "epoch": 0.9592128696972351, "grad_norm": 0.545095094383544, "learning_rate": 2.2660178426601785e-06, "loss": 0.5642, "step": 32854 }, { "epoch": 0.9592420659250824, "grad_norm": 0.4999756473153538, "learning_rate": 2.264395782643958e-06, "loss": 0.5194, "step": 32855 }, { "epoch": 0.9592712621529298, "grad_norm": 0.5352602380675165, "learning_rate": 2.2627737226277373e-06, "loss": 0.5876, "step": 32856 }, { "epoch": 0.9593004583807772, "grad_norm": 0.5746328368565905, "learning_rate": 2.2611516626115165e-06, "loss": 0.6339, "step": 32857 }, { "epoch": 0.9593296546086245, "grad_norm": 0.5548015242715432, "learning_rate": 2.259529602595296e-06, "loss": 0.6522, "step": 32858 }, { "epoch": 0.9593588508364719, "grad_norm": 0.529486766251447, "learning_rate": 2.2579075425790754e-06, "loss": 0.5664, "step": 32859 }, { "epoch": 0.9593880470643192, "grad_norm": 0.5591156437750916, "learning_rate": 2.256285482562855e-06, "loss": 0.641, "step": 32860 }, { "epoch": 0.9594172432921666, "grad_norm": 0.5379774554919768, "learning_rate": 2.2546634225466343e-06, "loss": 0.6055, "step": 32861 }, { "epoch": 0.959446439520014, "grad_norm": 0.5393911932832091, "learning_rate": 2.253041362530414e-06, "loss": 0.6044, "step": 32862 }, { "epoch": 0.9594756357478613, "grad_norm": 0.5113712536905225, "learning_rate": 2.251419302514193e-06, "loss": 0.558, "step": 32863 }, { "epoch": 0.9595048319757087, "grad_norm": 0.5113686260657387, "learning_rate": 2.2497972424979724e-06, "loss": 0.5491, "step": 32864 }, { "epoch": 0.9595340282035562, "grad_norm": 0.5630513378399871, "learning_rate": 2.248175182481752e-06, "loss": 0.6185, "step": 32865 }, { "epoch": 0.9595632244314035, "grad_norm": 0.5571582315346426, "learning_rate": 2.2465531224655312e-06, "loss": 0.6108, "step": 32866 }, { "epoch": 0.9595924206592509, "grad_norm": 0.5603173497803904, "learning_rate": 2.244931062449311e-06, "loss": 0.6258, "step": 32867 }, { "epoch": 0.9596216168870982, "grad_norm": 0.5515111672218188, "learning_rate": 2.24330900243309e-06, "loss": 0.607, "step": 32868 }, { "epoch": 0.9596508131149456, "grad_norm": 0.537985482704804, "learning_rate": 2.2416869424168697e-06, "loss": 0.6428, "step": 32869 }, { "epoch": 0.959680009342793, "grad_norm": 0.5156232811156398, "learning_rate": 2.240064882400649e-06, "loss": 0.5269, "step": 32870 }, { "epoch": 0.9597092055706403, "grad_norm": 0.5185651164198526, "learning_rate": 2.238442822384428e-06, "loss": 0.5371, "step": 32871 }, { "epoch": 0.9597384017984877, "grad_norm": 0.5645202083510448, "learning_rate": 2.236820762368208e-06, "loss": 0.6163, "step": 32872 }, { "epoch": 0.959767598026335, "grad_norm": 0.5272281955038183, "learning_rate": 2.235198702351987e-06, "loss": 0.5729, "step": 32873 }, { "epoch": 0.9597967942541824, "grad_norm": 0.5359961598049361, "learning_rate": 2.2335766423357667e-06, "loss": 0.5779, "step": 32874 }, { "epoch": 0.9598259904820298, "grad_norm": 0.5412118410620873, "learning_rate": 2.231954582319546e-06, "loss": 0.5876, "step": 32875 }, { "epoch": 0.9598551867098771, "grad_norm": 0.4978921565682768, "learning_rate": 2.2303325223033255e-06, "loss": 0.4862, "step": 32876 }, { "epoch": 0.9598843829377245, "grad_norm": 0.5507113441785977, "learning_rate": 2.2287104622871048e-06, "loss": 0.6241, "step": 32877 }, { "epoch": 0.9599135791655719, "grad_norm": 0.5168377970653959, "learning_rate": 2.227088402270884e-06, "loss": 0.5748, "step": 32878 }, { "epoch": 0.9599427753934192, "grad_norm": 0.5045601805986758, "learning_rate": 2.2254663422546636e-06, "loss": 0.5341, "step": 32879 }, { "epoch": 0.9599719716212666, "grad_norm": 0.5601089207704134, "learning_rate": 2.223844282238443e-06, "loss": 0.6112, "step": 32880 }, { "epoch": 0.9600011678491139, "grad_norm": 0.5353149694781146, "learning_rate": 2.2222222222222225e-06, "loss": 0.5734, "step": 32881 }, { "epoch": 0.9600303640769613, "grad_norm": 0.5536764557909946, "learning_rate": 2.2206001622060017e-06, "loss": 0.6181, "step": 32882 }, { "epoch": 0.9600595603048087, "grad_norm": 0.4943190451796744, "learning_rate": 2.218978102189781e-06, "loss": 0.5013, "step": 32883 }, { "epoch": 0.960088756532656, "grad_norm": 0.523266236014365, "learning_rate": 2.2173560421735606e-06, "loss": 0.5602, "step": 32884 }, { "epoch": 0.9601179527605034, "grad_norm": 0.4968033865783605, "learning_rate": 2.21573398215734e-06, "loss": 0.5221, "step": 32885 }, { "epoch": 0.9601471489883507, "grad_norm": 0.4817591980234845, "learning_rate": 2.2141119221411194e-06, "loss": 0.5029, "step": 32886 }, { "epoch": 0.9601763452161981, "grad_norm": 0.5449558908879636, "learning_rate": 2.2124898621248987e-06, "loss": 0.5629, "step": 32887 }, { "epoch": 0.9602055414440455, "grad_norm": 0.5234035254265311, "learning_rate": 2.2108678021086783e-06, "loss": 0.6083, "step": 32888 }, { "epoch": 0.9602347376718928, "grad_norm": 0.5130479588954129, "learning_rate": 2.2092457420924575e-06, "loss": 0.5793, "step": 32889 }, { "epoch": 0.9602639338997402, "grad_norm": 0.5596680690552446, "learning_rate": 2.2076236820762368e-06, "loss": 0.5946, "step": 32890 }, { "epoch": 0.9602931301275875, "grad_norm": 0.5187163272184853, "learning_rate": 2.2060016220600164e-06, "loss": 0.5561, "step": 32891 }, { "epoch": 0.9603223263554349, "grad_norm": 0.5485682164370471, "learning_rate": 2.2043795620437956e-06, "loss": 0.6233, "step": 32892 }, { "epoch": 0.9603515225832823, "grad_norm": 0.5069695458885246, "learning_rate": 2.2027575020275753e-06, "loss": 0.5074, "step": 32893 }, { "epoch": 0.9603807188111296, "grad_norm": 0.5266444715839573, "learning_rate": 2.2011354420113545e-06, "loss": 0.5777, "step": 32894 }, { "epoch": 0.960409915038977, "grad_norm": 0.5354489748131372, "learning_rate": 2.199513381995134e-06, "loss": 0.5401, "step": 32895 }, { "epoch": 0.9604391112668244, "grad_norm": 0.5254719408471779, "learning_rate": 2.1978913219789134e-06, "loss": 0.5643, "step": 32896 }, { "epoch": 0.9604683074946717, "grad_norm": 0.5327987413924264, "learning_rate": 2.1962692619626926e-06, "loss": 0.5791, "step": 32897 }, { "epoch": 0.9604975037225191, "grad_norm": 0.5325375082311239, "learning_rate": 2.1946472019464722e-06, "loss": 0.6097, "step": 32898 }, { "epoch": 0.9605266999503664, "grad_norm": 0.5304117194408512, "learning_rate": 2.1930251419302514e-06, "loss": 0.5619, "step": 32899 }, { "epoch": 0.9605558961782138, "grad_norm": 0.5451109552161408, "learning_rate": 2.191403081914031e-06, "loss": 0.5918, "step": 32900 }, { "epoch": 0.9605850924060612, "grad_norm": 0.550894087166082, "learning_rate": 2.1897810218978103e-06, "loss": 0.5955, "step": 32901 }, { "epoch": 0.9606142886339085, "grad_norm": 0.5052139750962811, "learning_rate": 2.18815896188159e-06, "loss": 0.5185, "step": 32902 }, { "epoch": 0.9606434848617559, "grad_norm": 0.5400417483654513, "learning_rate": 2.186536901865369e-06, "loss": 0.5646, "step": 32903 }, { "epoch": 0.9606726810896032, "grad_norm": 0.46356709788128747, "learning_rate": 2.1849148418491484e-06, "loss": 0.47, "step": 32904 }, { "epoch": 0.9607018773174506, "grad_norm": 0.47539553452555783, "learning_rate": 2.183292781832928e-06, "loss": 0.4962, "step": 32905 }, { "epoch": 0.960731073545298, "grad_norm": 0.5327627735607503, "learning_rate": 2.1816707218167073e-06, "loss": 0.559, "step": 32906 }, { "epoch": 0.9607602697731453, "grad_norm": 0.47846195883764325, "learning_rate": 2.180048661800487e-06, "loss": 0.4989, "step": 32907 }, { "epoch": 0.9607894660009927, "grad_norm": 0.5200466461987149, "learning_rate": 2.178426601784266e-06, "loss": 0.573, "step": 32908 }, { "epoch": 0.96081866222884, "grad_norm": 0.5509479389255524, "learning_rate": 2.1768045417680453e-06, "loss": 0.6085, "step": 32909 }, { "epoch": 0.9608478584566874, "grad_norm": 0.5088469943967509, "learning_rate": 2.175182481751825e-06, "loss": 0.5102, "step": 32910 }, { "epoch": 0.9608770546845348, "grad_norm": 0.48838488905661837, "learning_rate": 2.173560421735604e-06, "loss": 0.4871, "step": 32911 }, { "epoch": 0.9609062509123821, "grad_norm": 0.49916274022477275, "learning_rate": 2.171938361719384e-06, "loss": 0.527, "step": 32912 }, { "epoch": 0.9609354471402295, "grad_norm": 0.5212872994897144, "learning_rate": 2.170316301703163e-06, "loss": 0.5722, "step": 32913 }, { "epoch": 0.9609646433680769, "grad_norm": 0.533810123870588, "learning_rate": 2.1686942416869427e-06, "loss": 0.5549, "step": 32914 }, { "epoch": 0.9609938395959242, "grad_norm": 0.47445746434750996, "learning_rate": 2.167072181670722e-06, "loss": 0.4797, "step": 32915 }, { "epoch": 0.9610230358237716, "grad_norm": 0.5240022079663513, "learning_rate": 2.165450121654501e-06, "loss": 0.5341, "step": 32916 }, { "epoch": 0.9610522320516189, "grad_norm": 0.5514234027165188, "learning_rate": 2.163828061638281e-06, "loss": 0.5942, "step": 32917 }, { "epoch": 0.9610814282794663, "grad_norm": 0.5181342868381034, "learning_rate": 2.16220600162206e-06, "loss": 0.5481, "step": 32918 }, { "epoch": 0.9611106245073137, "grad_norm": 0.513918658020724, "learning_rate": 2.1605839416058397e-06, "loss": 0.5417, "step": 32919 }, { "epoch": 0.961139820735161, "grad_norm": 0.513632878426257, "learning_rate": 2.158961881589619e-06, "loss": 0.5718, "step": 32920 }, { "epoch": 0.9611690169630084, "grad_norm": 0.5320370593483302, "learning_rate": 2.1573398215733985e-06, "loss": 0.5454, "step": 32921 }, { "epoch": 0.9611982131908557, "grad_norm": 0.5121788598944557, "learning_rate": 2.1557177615571778e-06, "loss": 0.5295, "step": 32922 }, { "epoch": 0.9612274094187031, "grad_norm": 0.6106402110455482, "learning_rate": 2.154095701540957e-06, "loss": 0.7204, "step": 32923 }, { "epoch": 0.9612566056465505, "grad_norm": 0.5156751355773441, "learning_rate": 2.1524736415247366e-06, "loss": 0.5444, "step": 32924 }, { "epoch": 0.9612858018743978, "grad_norm": 0.5317541871120058, "learning_rate": 2.150851581508516e-06, "loss": 0.5967, "step": 32925 }, { "epoch": 0.9613149981022452, "grad_norm": 0.567840674238156, "learning_rate": 2.1492295214922955e-06, "loss": 0.6311, "step": 32926 }, { "epoch": 0.9613441943300925, "grad_norm": 0.5016707317024065, "learning_rate": 2.1476074614760747e-06, "loss": 0.5307, "step": 32927 }, { "epoch": 0.9613733905579399, "grad_norm": 0.5222799584640777, "learning_rate": 2.1459854014598544e-06, "loss": 0.5872, "step": 32928 }, { "epoch": 0.9614025867857873, "grad_norm": 0.5172558675834819, "learning_rate": 2.1443633414436336e-06, "loss": 0.5563, "step": 32929 }, { "epoch": 0.9614317830136346, "grad_norm": 0.5122271219210166, "learning_rate": 2.142741281427413e-06, "loss": 0.5374, "step": 32930 }, { "epoch": 0.961460979241482, "grad_norm": 0.5333592364066025, "learning_rate": 2.1411192214111924e-06, "loss": 0.6173, "step": 32931 }, { "epoch": 0.9614901754693294, "grad_norm": 0.5473398919488405, "learning_rate": 2.1394971613949717e-06, "loss": 0.6597, "step": 32932 }, { "epoch": 0.9615193716971767, "grad_norm": 0.562376397858835, "learning_rate": 2.1378751013787513e-06, "loss": 0.6588, "step": 32933 }, { "epoch": 0.9615485679250241, "grad_norm": 0.5011627591786939, "learning_rate": 2.1362530413625305e-06, "loss": 0.5413, "step": 32934 }, { "epoch": 0.9615777641528714, "grad_norm": 0.5577549632436906, "learning_rate": 2.13463098134631e-06, "loss": 0.6339, "step": 32935 }, { "epoch": 0.9616069603807188, "grad_norm": 0.5560194464869445, "learning_rate": 2.1330089213300894e-06, "loss": 0.6154, "step": 32936 }, { "epoch": 0.9616361566085662, "grad_norm": 0.541336928622634, "learning_rate": 2.1313868613138686e-06, "loss": 0.5896, "step": 32937 }, { "epoch": 0.9616653528364135, "grad_norm": 0.5275994918143482, "learning_rate": 2.1297648012976483e-06, "loss": 0.5513, "step": 32938 }, { "epoch": 0.9616945490642609, "grad_norm": 0.5322376013556963, "learning_rate": 2.1281427412814275e-06, "loss": 0.5924, "step": 32939 }, { "epoch": 0.9617237452921082, "grad_norm": 0.5008380013119049, "learning_rate": 2.126520681265207e-06, "loss": 0.524, "step": 32940 }, { "epoch": 0.9617529415199556, "grad_norm": 0.5444137680341964, "learning_rate": 2.1248986212489863e-06, "loss": 0.5849, "step": 32941 }, { "epoch": 0.961782137747803, "grad_norm": 0.5096899176118903, "learning_rate": 2.1232765612327656e-06, "loss": 0.5563, "step": 32942 }, { "epoch": 0.9618113339756503, "grad_norm": 0.5419468163091241, "learning_rate": 2.121654501216545e-06, "loss": 0.5845, "step": 32943 }, { "epoch": 0.9618405302034977, "grad_norm": 0.5161545827336299, "learning_rate": 2.1200324412003244e-06, "loss": 0.5261, "step": 32944 }, { "epoch": 0.961869726431345, "grad_norm": 0.4800568300594354, "learning_rate": 2.118410381184104e-06, "loss": 0.484, "step": 32945 }, { "epoch": 0.9618989226591924, "grad_norm": 0.4952688583896193, "learning_rate": 2.1167883211678833e-06, "loss": 0.5258, "step": 32946 }, { "epoch": 0.9619281188870398, "grad_norm": 0.535912178904495, "learning_rate": 2.115166261151663e-06, "loss": 0.5458, "step": 32947 }, { "epoch": 0.9619573151148871, "grad_norm": 0.5486138475922051, "learning_rate": 2.113544201135442e-06, "loss": 0.6272, "step": 32948 }, { "epoch": 0.9619865113427345, "grad_norm": 0.5677386017014713, "learning_rate": 2.1119221411192214e-06, "loss": 0.7049, "step": 32949 }, { "epoch": 0.9620157075705819, "grad_norm": 0.528960149048589, "learning_rate": 2.110300081103001e-06, "loss": 0.5509, "step": 32950 }, { "epoch": 0.9620449037984292, "grad_norm": 0.5192471745197312, "learning_rate": 2.1086780210867802e-06, "loss": 0.5453, "step": 32951 }, { "epoch": 0.9620741000262766, "grad_norm": 0.5616755717316, "learning_rate": 2.10705596107056e-06, "loss": 0.5994, "step": 32952 }, { "epoch": 0.9621032962541239, "grad_norm": 0.5199967914142178, "learning_rate": 2.105433901054339e-06, "loss": 0.5628, "step": 32953 }, { "epoch": 0.9621324924819713, "grad_norm": 0.5141487473913582, "learning_rate": 2.1038118410381188e-06, "loss": 0.568, "step": 32954 }, { "epoch": 0.9621616887098187, "grad_norm": 0.5138346240321648, "learning_rate": 2.1021897810218976e-06, "loss": 0.5388, "step": 32955 }, { "epoch": 0.962190884937666, "grad_norm": 0.5080786988493369, "learning_rate": 2.100567721005677e-06, "loss": 0.5482, "step": 32956 }, { "epoch": 0.9622200811655134, "grad_norm": 0.5358505211167558, "learning_rate": 2.098945660989457e-06, "loss": 0.5933, "step": 32957 }, { "epoch": 0.9622492773933607, "grad_norm": 0.5340774266969407, "learning_rate": 2.097323600973236e-06, "loss": 0.5976, "step": 32958 }, { "epoch": 0.9622784736212081, "grad_norm": 0.5342465388650759, "learning_rate": 2.0957015409570157e-06, "loss": 0.5728, "step": 32959 }, { "epoch": 0.9623076698490555, "grad_norm": 0.5354316281278692, "learning_rate": 2.094079480940795e-06, "loss": 0.6282, "step": 32960 }, { "epoch": 0.9623368660769028, "grad_norm": 0.5392280704343956, "learning_rate": 2.0924574209245746e-06, "loss": 0.5822, "step": 32961 }, { "epoch": 0.9623660623047502, "grad_norm": 0.5716409508596295, "learning_rate": 2.0908353609083534e-06, "loss": 0.5923, "step": 32962 }, { "epoch": 0.9623952585325976, "grad_norm": 0.5070123529891285, "learning_rate": 2.089213300892133e-06, "loss": 0.5423, "step": 32963 }, { "epoch": 0.9624244547604449, "grad_norm": 0.5099291922817352, "learning_rate": 2.0875912408759127e-06, "loss": 0.5791, "step": 32964 }, { "epoch": 0.9624536509882923, "grad_norm": 0.5194140135505877, "learning_rate": 2.085969180859692e-06, "loss": 0.5884, "step": 32965 }, { "epoch": 0.9624828472161396, "grad_norm": 0.497027322478961, "learning_rate": 2.0843471208434715e-06, "loss": 0.5192, "step": 32966 }, { "epoch": 0.962512043443987, "grad_norm": 0.544728753810256, "learning_rate": 2.0827250608272507e-06, "loss": 0.6168, "step": 32967 }, { "epoch": 0.9625412396718344, "grad_norm": 0.48368109220099154, "learning_rate": 2.08110300081103e-06, "loss": 0.4838, "step": 32968 }, { "epoch": 0.9625704358996817, "grad_norm": 0.5562618203872994, "learning_rate": 2.079480940794809e-06, "loss": 0.6123, "step": 32969 }, { "epoch": 0.9625996321275291, "grad_norm": 0.5583632467511315, "learning_rate": 2.077858880778589e-06, "loss": 0.6532, "step": 32970 }, { "epoch": 0.9626288283553764, "grad_norm": 0.4994974787232378, "learning_rate": 2.0762368207623685e-06, "loss": 0.5293, "step": 32971 }, { "epoch": 0.9626580245832238, "grad_norm": 0.5318406478414116, "learning_rate": 2.0746147607461477e-06, "loss": 0.5632, "step": 32972 }, { "epoch": 0.9626872208110712, "grad_norm": 0.5057499312796354, "learning_rate": 2.0729927007299273e-06, "loss": 0.5426, "step": 32973 }, { "epoch": 0.9627164170389185, "grad_norm": 0.5332150663058931, "learning_rate": 2.0713706407137066e-06, "loss": 0.5913, "step": 32974 }, { "epoch": 0.9627456132667659, "grad_norm": 0.5180791288157579, "learning_rate": 2.0697485806974858e-06, "loss": 0.5202, "step": 32975 }, { "epoch": 0.9627748094946132, "grad_norm": 0.49827803051128217, "learning_rate": 2.068126520681265e-06, "loss": 0.5074, "step": 32976 }, { "epoch": 0.9628040057224606, "grad_norm": 0.5576978615686079, "learning_rate": 2.0665044606650447e-06, "loss": 0.6157, "step": 32977 }, { "epoch": 0.962833201950308, "grad_norm": 0.5450623839291366, "learning_rate": 2.0648824006488243e-06, "loss": 0.5943, "step": 32978 }, { "epoch": 0.9628623981781553, "grad_norm": 0.5096470290824798, "learning_rate": 2.0632603406326035e-06, "loss": 0.5447, "step": 32979 }, { "epoch": 0.9628915944060027, "grad_norm": 0.5059525607468085, "learning_rate": 2.061638280616383e-06, "loss": 0.5162, "step": 32980 }, { "epoch": 0.96292079063385, "grad_norm": 0.5480525292381809, "learning_rate": 2.0600162206001624e-06, "loss": 0.6063, "step": 32981 }, { "epoch": 0.9629499868616974, "grad_norm": 0.5268385258880468, "learning_rate": 2.0583941605839416e-06, "loss": 0.5158, "step": 32982 }, { "epoch": 0.9629791830895448, "grad_norm": 0.5426588315553058, "learning_rate": 2.056772100567721e-06, "loss": 0.6071, "step": 32983 }, { "epoch": 0.9630083793173921, "grad_norm": 0.5297713199950117, "learning_rate": 2.0551500405515005e-06, "loss": 0.5834, "step": 32984 }, { "epoch": 0.9630375755452396, "grad_norm": 0.5080168727983608, "learning_rate": 2.05352798053528e-06, "loss": 0.5601, "step": 32985 }, { "epoch": 0.963066771773087, "grad_norm": 0.520244067024809, "learning_rate": 2.0519059205190593e-06, "loss": 0.5936, "step": 32986 }, { "epoch": 0.9630959680009343, "grad_norm": 0.5270485290539343, "learning_rate": 2.050283860502839e-06, "loss": 0.5802, "step": 32987 }, { "epoch": 0.9631251642287817, "grad_norm": 0.5086677547781975, "learning_rate": 2.0486618004866178e-06, "loss": 0.5465, "step": 32988 }, { "epoch": 0.963154360456629, "grad_norm": 0.5365356231263846, "learning_rate": 2.0470397404703974e-06, "loss": 0.5001, "step": 32989 }, { "epoch": 0.9631835566844764, "grad_norm": 0.5449115545053452, "learning_rate": 2.0454176804541766e-06, "loss": 0.5873, "step": 32990 }, { "epoch": 0.9632127529123238, "grad_norm": 0.5391439227047474, "learning_rate": 2.0437956204379563e-06, "loss": 0.5998, "step": 32991 }, { "epoch": 0.9632419491401711, "grad_norm": 0.520118702781253, "learning_rate": 2.042173560421736e-06, "loss": 0.5577, "step": 32992 }, { "epoch": 0.9632711453680185, "grad_norm": 0.5283886853559261, "learning_rate": 2.040551500405515e-06, "loss": 0.5753, "step": 32993 }, { "epoch": 0.9633003415958659, "grad_norm": 0.5291958124132435, "learning_rate": 2.038929440389295e-06, "loss": 0.5975, "step": 32994 }, { "epoch": 0.9633295378237132, "grad_norm": 0.5543955733008727, "learning_rate": 2.0373073803730736e-06, "loss": 0.6419, "step": 32995 }, { "epoch": 0.9633587340515606, "grad_norm": 0.5441904039042903, "learning_rate": 2.0356853203568532e-06, "loss": 0.6263, "step": 32996 }, { "epoch": 0.963387930279408, "grad_norm": 0.5139078978726758, "learning_rate": 2.034063260340633e-06, "loss": 0.5391, "step": 32997 }, { "epoch": 0.9634171265072553, "grad_norm": 0.5741176343086009, "learning_rate": 2.032441200324412e-06, "loss": 0.6109, "step": 32998 }, { "epoch": 0.9634463227351027, "grad_norm": 0.519769382908234, "learning_rate": 2.0308191403081917e-06, "loss": 0.5663, "step": 32999 }, { "epoch": 0.96347551896295, "grad_norm": 0.5683156302581457, "learning_rate": 2.029197080291971e-06, "loss": 0.6231, "step": 33000 }, { "epoch": 0.9635047151907974, "grad_norm": 0.5378974856939409, "learning_rate": 2.02757502027575e-06, "loss": 0.5784, "step": 33001 }, { "epoch": 0.9635339114186448, "grad_norm": 0.5065013642112027, "learning_rate": 2.0259529602595294e-06, "loss": 0.5598, "step": 33002 }, { "epoch": 0.9635631076464921, "grad_norm": 0.5187126582843271, "learning_rate": 2.024330900243309e-06, "loss": 0.5398, "step": 33003 }, { "epoch": 0.9635923038743395, "grad_norm": 0.5646088450603597, "learning_rate": 2.0227088402270887e-06, "loss": 0.6258, "step": 33004 }, { "epoch": 0.9636215001021868, "grad_norm": 0.5666875706572496, "learning_rate": 2.021086780210868e-06, "loss": 0.6249, "step": 33005 }, { "epoch": 0.9636506963300342, "grad_norm": 0.5162972254019585, "learning_rate": 2.0194647201946476e-06, "loss": 0.5063, "step": 33006 }, { "epoch": 0.9636798925578816, "grad_norm": 0.5093664009283293, "learning_rate": 2.0178426601784268e-06, "loss": 0.5563, "step": 33007 }, { "epoch": 0.9637090887857289, "grad_norm": 0.5362901555555596, "learning_rate": 2.016220600162206e-06, "loss": 0.5868, "step": 33008 }, { "epoch": 0.9637382850135763, "grad_norm": 0.5268259806238823, "learning_rate": 2.0145985401459852e-06, "loss": 0.5895, "step": 33009 }, { "epoch": 0.9637674812414236, "grad_norm": 0.5109303921765465, "learning_rate": 2.012976480129765e-06, "loss": 0.5425, "step": 33010 }, { "epoch": 0.963796677469271, "grad_norm": 0.5491692302104897, "learning_rate": 2.0113544201135445e-06, "loss": 0.4965, "step": 33011 }, { "epoch": 0.9638258736971184, "grad_norm": 0.5132165456506111, "learning_rate": 2.0097323600973237e-06, "loss": 0.5503, "step": 33012 }, { "epoch": 0.9638550699249657, "grad_norm": 0.4902771903934307, "learning_rate": 2.0081103000811034e-06, "loss": 0.497, "step": 33013 }, { "epoch": 0.9638842661528131, "grad_norm": 0.534510288779558, "learning_rate": 2.006488240064882e-06, "loss": 0.6016, "step": 33014 }, { "epoch": 0.9639134623806604, "grad_norm": 0.5476814224256534, "learning_rate": 2.004866180048662e-06, "loss": 0.5706, "step": 33015 }, { "epoch": 0.9639426586085078, "grad_norm": 0.5487557698574187, "learning_rate": 2.003244120032441e-06, "loss": 0.6052, "step": 33016 }, { "epoch": 0.9639718548363552, "grad_norm": 0.47596598275730145, "learning_rate": 2.0016220600162207e-06, "loss": 0.473, "step": 33017 }, { "epoch": 0.9640010510642025, "grad_norm": 0.5278652120654007, "learning_rate": 2.0000000000000003e-06, "loss": 0.5945, "step": 33018 }, { "epoch": 0.9640302472920499, "grad_norm": 0.562988273596625, "learning_rate": 1.9983779399837796e-06, "loss": 0.6563, "step": 33019 }, { "epoch": 0.9640594435198973, "grad_norm": 0.4850975864822413, "learning_rate": 1.996755879967559e-06, "loss": 0.4797, "step": 33020 }, { "epoch": 0.9640886397477446, "grad_norm": 0.5692560981963422, "learning_rate": 1.995133819951338e-06, "loss": 0.6397, "step": 33021 }, { "epoch": 0.964117835975592, "grad_norm": 0.5268310548536776, "learning_rate": 1.9935117599351176e-06, "loss": 0.5748, "step": 33022 }, { "epoch": 0.9641470322034393, "grad_norm": 0.5092327614929869, "learning_rate": 1.991889699918897e-06, "loss": 0.5589, "step": 33023 }, { "epoch": 0.9641762284312867, "grad_norm": 0.6068280072176521, "learning_rate": 1.9902676399026765e-06, "loss": 0.6487, "step": 33024 }, { "epoch": 0.9642054246591341, "grad_norm": 0.5964490094800604, "learning_rate": 1.988645579886456e-06, "loss": 0.6315, "step": 33025 }, { "epoch": 0.9642346208869814, "grad_norm": 0.5031995570156775, "learning_rate": 1.9870235198702354e-06, "loss": 0.5289, "step": 33026 }, { "epoch": 0.9642638171148288, "grad_norm": 0.5225679472985523, "learning_rate": 1.985401459854015e-06, "loss": 0.5633, "step": 33027 }, { "epoch": 0.9642930133426761, "grad_norm": 0.5129596088766746, "learning_rate": 1.983779399837794e-06, "loss": 0.567, "step": 33028 }, { "epoch": 0.9643222095705235, "grad_norm": 0.5364921350554601, "learning_rate": 1.9821573398215735e-06, "loss": 0.5907, "step": 33029 }, { "epoch": 0.9643514057983709, "grad_norm": 0.5388598378939271, "learning_rate": 1.9805352798053527e-06, "loss": 0.5283, "step": 33030 }, { "epoch": 0.9643806020262182, "grad_norm": 0.5041344379789464, "learning_rate": 1.9789132197891323e-06, "loss": 0.5296, "step": 33031 }, { "epoch": 0.9644097982540656, "grad_norm": 0.5256916188214177, "learning_rate": 1.977291159772912e-06, "loss": 0.5509, "step": 33032 }, { "epoch": 0.964438994481913, "grad_norm": 0.5465083839298509, "learning_rate": 1.975669099756691e-06, "loss": 0.5804, "step": 33033 }, { "epoch": 0.9644681907097603, "grad_norm": 0.5440297561838237, "learning_rate": 1.9740470397404704e-06, "loss": 0.6377, "step": 33034 }, { "epoch": 0.9644973869376077, "grad_norm": 0.5752302743625611, "learning_rate": 1.9724249797242496e-06, "loss": 0.6467, "step": 33035 }, { "epoch": 0.964526583165455, "grad_norm": 0.5713201311072307, "learning_rate": 1.9708029197080293e-06, "loss": 0.6607, "step": 33036 }, { "epoch": 0.9645557793933024, "grad_norm": 0.535610113904365, "learning_rate": 1.9691808596918085e-06, "loss": 0.5915, "step": 33037 }, { "epoch": 0.9645849756211498, "grad_norm": 0.48720671210524646, "learning_rate": 1.967558799675588e-06, "loss": 0.5248, "step": 33038 }, { "epoch": 0.9646141718489971, "grad_norm": 0.5748055591921447, "learning_rate": 1.9659367396593678e-06, "loss": 0.6508, "step": 33039 }, { "epoch": 0.9646433680768445, "grad_norm": 0.5485634664676733, "learning_rate": 1.964314679643147e-06, "loss": 0.5899, "step": 33040 }, { "epoch": 0.9646725643046918, "grad_norm": 0.521095990463275, "learning_rate": 1.9626926196269262e-06, "loss": 0.577, "step": 33041 }, { "epoch": 0.9647017605325392, "grad_norm": 0.5142697864006174, "learning_rate": 1.9610705596107054e-06, "loss": 0.5432, "step": 33042 }, { "epoch": 0.9647309567603866, "grad_norm": 0.49510998418052715, "learning_rate": 1.959448499594485e-06, "loss": 0.5169, "step": 33043 }, { "epoch": 0.9647601529882339, "grad_norm": 0.531229254676234, "learning_rate": 1.9578264395782643e-06, "loss": 0.5276, "step": 33044 }, { "epoch": 0.9647893492160813, "grad_norm": 0.5510535698559843, "learning_rate": 1.956204379562044e-06, "loss": 0.6059, "step": 33045 }, { "epoch": 0.9648185454439286, "grad_norm": 0.5633875429090206, "learning_rate": 1.9545823195458236e-06, "loss": 0.6371, "step": 33046 }, { "epoch": 0.964847741671776, "grad_norm": 0.5335243376220123, "learning_rate": 1.9529602595296024e-06, "loss": 0.601, "step": 33047 }, { "epoch": 0.9648769378996234, "grad_norm": 0.5286932803318949, "learning_rate": 1.951338199513382e-06, "loss": 0.5938, "step": 33048 }, { "epoch": 0.9649061341274707, "grad_norm": 0.5330510948823259, "learning_rate": 1.9497161394971613e-06, "loss": 0.5615, "step": 33049 }, { "epoch": 0.9649353303553181, "grad_norm": 0.5249322480982191, "learning_rate": 1.948094079480941e-06, "loss": 0.5731, "step": 33050 }, { "epoch": 0.9649645265831654, "grad_norm": 0.5339562235683402, "learning_rate": 1.94647201946472e-06, "loss": 0.6161, "step": 33051 }, { "epoch": 0.9649937228110128, "grad_norm": 0.5233376785518361, "learning_rate": 1.9448499594484998e-06, "loss": 0.5222, "step": 33052 }, { "epoch": 0.9650229190388602, "grad_norm": 0.5291385420917846, "learning_rate": 1.9432278994322794e-06, "loss": 0.5427, "step": 33053 }, { "epoch": 0.9650521152667075, "grad_norm": 0.5331304512235875, "learning_rate": 1.9416058394160582e-06, "loss": 0.5976, "step": 33054 }, { "epoch": 0.9650813114945549, "grad_norm": 0.4972859488011349, "learning_rate": 1.939983779399838e-06, "loss": 0.5105, "step": 33055 }, { "epoch": 0.9651105077224023, "grad_norm": 0.5395920410250034, "learning_rate": 1.938361719383617e-06, "loss": 0.581, "step": 33056 }, { "epoch": 0.9651397039502496, "grad_norm": 0.5191983669748299, "learning_rate": 1.9367396593673967e-06, "loss": 0.5874, "step": 33057 }, { "epoch": 0.965168900178097, "grad_norm": 0.5626436823359956, "learning_rate": 1.9351175993511764e-06, "loss": 0.6284, "step": 33058 }, { "epoch": 0.9651980964059443, "grad_norm": 0.5068730275281311, "learning_rate": 1.9334955393349556e-06, "loss": 0.5522, "step": 33059 }, { "epoch": 0.9652272926337917, "grad_norm": 0.5488592919572516, "learning_rate": 1.931873479318735e-06, "loss": 0.625, "step": 33060 }, { "epoch": 0.9652564888616391, "grad_norm": 0.5078973930633779, "learning_rate": 1.930251419302514e-06, "loss": 0.5477, "step": 33061 }, { "epoch": 0.9652856850894864, "grad_norm": 0.5540352475940047, "learning_rate": 1.9286293592862937e-06, "loss": 0.6573, "step": 33062 }, { "epoch": 0.9653148813173338, "grad_norm": 0.5559756233787692, "learning_rate": 1.927007299270073e-06, "loss": 0.5556, "step": 33063 }, { "epoch": 0.9653440775451811, "grad_norm": 0.49806717901721653, "learning_rate": 1.9253852392538525e-06, "loss": 0.519, "step": 33064 }, { "epoch": 0.9653732737730285, "grad_norm": 0.5327288569524447, "learning_rate": 1.923763179237632e-06, "loss": 0.558, "step": 33065 }, { "epoch": 0.9654024700008759, "grad_norm": 0.5056506717073822, "learning_rate": 1.9221411192214114e-06, "loss": 0.5206, "step": 33066 }, { "epoch": 0.9654316662287232, "grad_norm": 0.5071648094642257, "learning_rate": 1.9205190592051906e-06, "loss": 0.5245, "step": 33067 }, { "epoch": 0.9654608624565706, "grad_norm": 0.5318699870820226, "learning_rate": 1.91889699918897e-06, "loss": 0.6043, "step": 33068 }, { "epoch": 0.965490058684418, "grad_norm": 0.49695097406982913, "learning_rate": 1.9172749391727495e-06, "loss": 0.5495, "step": 33069 }, { "epoch": 0.9655192549122653, "grad_norm": 0.5026582303476057, "learning_rate": 1.9156528791565287e-06, "loss": 0.5402, "step": 33070 }, { "epoch": 0.9655484511401127, "grad_norm": 0.522212198859901, "learning_rate": 1.9140308191403084e-06, "loss": 0.5723, "step": 33071 }, { "epoch": 0.96557764736796, "grad_norm": 0.5171197955302874, "learning_rate": 1.912408759124088e-06, "loss": 0.5593, "step": 33072 }, { "epoch": 0.9656068435958074, "grad_norm": 0.5123083240703548, "learning_rate": 1.910786699107867e-06, "loss": 0.5417, "step": 33073 }, { "epoch": 0.9656360398236548, "grad_norm": 0.5351832632187303, "learning_rate": 1.9091646390916464e-06, "loss": 0.5834, "step": 33074 }, { "epoch": 0.9656652360515021, "grad_norm": 0.5091149423252073, "learning_rate": 1.9075425790754257e-06, "loss": 0.5488, "step": 33075 }, { "epoch": 0.9656944322793495, "grad_norm": 0.542454711762334, "learning_rate": 1.9059205190592053e-06, "loss": 0.5556, "step": 33076 }, { "epoch": 0.9657236285071968, "grad_norm": 0.5024015531989682, "learning_rate": 1.9042984590429847e-06, "loss": 0.4986, "step": 33077 }, { "epoch": 0.9657528247350442, "grad_norm": 0.4854128169305086, "learning_rate": 1.9026763990267642e-06, "loss": 0.509, "step": 33078 }, { "epoch": 0.9657820209628916, "grad_norm": 0.5156289164544661, "learning_rate": 1.9010543390105436e-06, "loss": 0.5069, "step": 33079 }, { "epoch": 0.9658112171907389, "grad_norm": 0.5056887086931128, "learning_rate": 1.8994322789943228e-06, "loss": 0.5558, "step": 33080 }, { "epoch": 0.9658404134185863, "grad_norm": 0.5998379452383991, "learning_rate": 1.8978102189781023e-06, "loss": 0.6912, "step": 33081 }, { "epoch": 0.9658696096464336, "grad_norm": 0.5160582515233724, "learning_rate": 1.8961881589618817e-06, "loss": 0.5607, "step": 33082 }, { "epoch": 0.965898805874281, "grad_norm": 0.5400470004270563, "learning_rate": 1.8945660989456611e-06, "loss": 0.5751, "step": 33083 }, { "epoch": 0.9659280021021284, "grad_norm": 0.5275295439184273, "learning_rate": 1.8929440389294406e-06, "loss": 0.5949, "step": 33084 }, { "epoch": 0.9659571983299757, "grad_norm": 0.5096031597204447, "learning_rate": 1.89132197891322e-06, "loss": 0.5158, "step": 33085 }, { "epoch": 0.9659863945578231, "grad_norm": 0.5356356307782053, "learning_rate": 1.8896999188969994e-06, "loss": 0.5832, "step": 33086 }, { "epoch": 0.9660155907856705, "grad_norm": 0.5016484547513045, "learning_rate": 1.8880778588807786e-06, "loss": 0.5239, "step": 33087 }, { "epoch": 0.9660447870135178, "grad_norm": 0.4958065777397098, "learning_rate": 1.886455798864558e-06, "loss": 0.5226, "step": 33088 }, { "epoch": 0.9660739832413652, "grad_norm": 0.49942284255146324, "learning_rate": 1.8848337388483375e-06, "loss": 0.5349, "step": 33089 }, { "epoch": 0.9661031794692125, "grad_norm": 0.5075436880914104, "learning_rate": 1.883211678832117e-06, "loss": 0.5282, "step": 33090 }, { "epoch": 0.9661323756970599, "grad_norm": 0.5350403169836582, "learning_rate": 1.8815896188158964e-06, "loss": 0.616, "step": 33091 }, { "epoch": 0.9661615719249073, "grad_norm": 0.4970619196094075, "learning_rate": 1.8799675587996758e-06, "loss": 0.5206, "step": 33092 }, { "epoch": 0.9661907681527546, "grad_norm": 0.5233711633550201, "learning_rate": 1.8783454987834548e-06, "loss": 0.5501, "step": 33093 }, { "epoch": 0.966219964380602, "grad_norm": 0.5436509142885501, "learning_rate": 1.8767234387672345e-06, "loss": 0.5959, "step": 33094 }, { "epoch": 0.9662491606084493, "grad_norm": 0.5465235637628912, "learning_rate": 1.875101378751014e-06, "loss": 0.5893, "step": 33095 }, { "epoch": 0.9662783568362967, "grad_norm": 0.5284112100290808, "learning_rate": 1.8734793187347933e-06, "loss": 0.5923, "step": 33096 }, { "epoch": 0.9663075530641441, "grad_norm": 0.5550277370959156, "learning_rate": 1.8718572587185728e-06, "loss": 0.6378, "step": 33097 }, { "epoch": 0.9663367492919914, "grad_norm": 0.552498709420542, "learning_rate": 1.8702351987023522e-06, "loss": 0.6419, "step": 33098 }, { "epoch": 0.9663659455198388, "grad_norm": 0.5162270491372183, "learning_rate": 1.8686131386861316e-06, "loss": 0.5416, "step": 33099 }, { "epoch": 0.9663951417476861, "grad_norm": 0.5014604070654403, "learning_rate": 1.8669910786699106e-06, "loss": 0.5353, "step": 33100 }, { "epoch": 0.9664243379755335, "grad_norm": 0.5059335297333792, "learning_rate": 1.8653690186536903e-06, "loss": 0.5248, "step": 33101 }, { "epoch": 0.9664535342033809, "grad_norm": 0.5435913451456714, "learning_rate": 1.8637469586374697e-06, "loss": 0.595, "step": 33102 }, { "epoch": 0.9664827304312282, "grad_norm": 0.5086881308692865, "learning_rate": 1.8621248986212491e-06, "loss": 0.5567, "step": 33103 }, { "epoch": 0.9665119266590756, "grad_norm": 0.4866889893883948, "learning_rate": 1.8605028386050286e-06, "loss": 0.5038, "step": 33104 }, { "epoch": 0.966541122886923, "grad_norm": 0.5317572299020011, "learning_rate": 1.858880778588808e-06, "loss": 0.5704, "step": 33105 }, { "epoch": 0.9665703191147704, "grad_norm": 0.5441166360339773, "learning_rate": 1.857258718572587e-06, "loss": 0.5859, "step": 33106 }, { "epoch": 0.9665995153426178, "grad_norm": 0.5425878365979407, "learning_rate": 1.8556366585563667e-06, "loss": 0.6312, "step": 33107 }, { "epoch": 0.9666287115704651, "grad_norm": 0.5508978274271067, "learning_rate": 1.854014598540146e-06, "loss": 0.5863, "step": 33108 }, { "epoch": 0.9666579077983125, "grad_norm": 0.5408245944824511, "learning_rate": 1.8523925385239255e-06, "loss": 0.6349, "step": 33109 }, { "epoch": 0.9666871040261599, "grad_norm": 0.5067024684313965, "learning_rate": 1.850770478507705e-06, "loss": 0.548, "step": 33110 }, { "epoch": 0.9667163002540072, "grad_norm": 0.5300749130715935, "learning_rate": 1.8491484184914844e-06, "loss": 0.5879, "step": 33111 }, { "epoch": 0.9667454964818546, "grad_norm": 0.5375355298544154, "learning_rate": 1.8475263584752638e-06, "loss": 0.604, "step": 33112 }, { "epoch": 0.966774692709702, "grad_norm": 0.532111480635427, "learning_rate": 1.8459042984590428e-06, "loss": 0.5709, "step": 33113 }, { "epoch": 0.9668038889375493, "grad_norm": 0.5294620814591221, "learning_rate": 1.8442822384428225e-06, "loss": 0.572, "step": 33114 }, { "epoch": 0.9668330851653967, "grad_norm": 0.5599689950579413, "learning_rate": 1.842660178426602e-06, "loss": 0.6244, "step": 33115 }, { "epoch": 0.966862281393244, "grad_norm": 0.49197433405710633, "learning_rate": 1.8410381184103814e-06, "loss": 0.5051, "step": 33116 }, { "epoch": 0.9668914776210914, "grad_norm": 0.5471867591581261, "learning_rate": 1.8394160583941608e-06, "loss": 0.5986, "step": 33117 }, { "epoch": 0.9669206738489388, "grad_norm": 0.5907644019922919, "learning_rate": 1.8377939983779402e-06, "loss": 0.6385, "step": 33118 }, { "epoch": 0.9669498700767861, "grad_norm": 0.5245318887009025, "learning_rate": 1.8361719383617192e-06, "loss": 0.5531, "step": 33119 }, { "epoch": 0.9669790663046335, "grad_norm": 0.5039488254807405, "learning_rate": 1.8345498783454987e-06, "loss": 0.521, "step": 33120 }, { "epoch": 0.9670082625324808, "grad_norm": 0.5465316311049444, "learning_rate": 1.8329278183292783e-06, "loss": 0.5846, "step": 33121 }, { "epoch": 0.9670374587603282, "grad_norm": 0.5608289726424779, "learning_rate": 1.8313057583130577e-06, "loss": 0.6377, "step": 33122 }, { "epoch": 0.9670666549881756, "grad_norm": 0.5187113653371018, "learning_rate": 1.8296836982968372e-06, "loss": 0.5205, "step": 33123 }, { "epoch": 0.9670958512160229, "grad_norm": 0.5311159234032907, "learning_rate": 1.8280616382806166e-06, "loss": 0.5711, "step": 33124 }, { "epoch": 0.9671250474438703, "grad_norm": 0.5392772480450845, "learning_rate": 1.826439578264396e-06, "loss": 0.5922, "step": 33125 }, { "epoch": 0.9671542436717177, "grad_norm": 0.5223443542134171, "learning_rate": 1.824817518248175e-06, "loss": 0.5556, "step": 33126 }, { "epoch": 0.967183439899565, "grad_norm": 0.5199104245826216, "learning_rate": 1.8231954582319545e-06, "loss": 0.5391, "step": 33127 }, { "epoch": 0.9672126361274124, "grad_norm": 0.5189576482756929, "learning_rate": 1.8215733982157341e-06, "loss": 0.5767, "step": 33128 }, { "epoch": 0.9672418323552597, "grad_norm": 0.5303156341185463, "learning_rate": 1.8199513381995136e-06, "loss": 0.55, "step": 33129 }, { "epoch": 0.9672710285831071, "grad_norm": 0.531212113212393, "learning_rate": 1.818329278183293e-06, "loss": 0.5893, "step": 33130 }, { "epoch": 0.9673002248109545, "grad_norm": 0.4820468340100454, "learning_rate": 1.8167072181670724e-06, "loss": 0.4874, "step": 33131 }, { "epoch": 0.9673294210388018, "grad_norm": 0.5387901002632955, "learning_rate": 1.8150851581508519e-06, "loss": 0.5421, "step": 33132 }, { "epoch": 0.9673586172666492, "grad_norm": 0.5513192200637483, "learning_rate": 1.8134630981346309e-06, "loss": 0.6012, "step": 33133 }, { "epoch": 0.9673878134944965, "grad_norm": 0.5128200205903289, "learning_rate": 1.8118410381184103e-06, "loss": 0.5875, "step": 33134 }, { "epoch": 0.9674170097223439, "grad_norm": 0.5266500698569645, "learning_rate": 1.81021897810219e-06, "loss": 0.6158, "step": 33135 }, { "epoch": 0.9674462059501913, "grad_norm": 0.5357721930311339, "learning_rate": 1.8085969180859694e-06, "loss": 0.6381, "step": 33136 }, { "epoch": 0.9674754021780386, "grad_norm": 0.5368350029611285, "learning_rate": 1.8069748580697488e-06, "loss": 0.5909, "step": 33137 }, { "epoch": 0.967504598405886, "grad_norm": 0.5510902711614857, "learning_rate": 1.8053527980535282e-06, "loss": 0.6183, "step": 33138 }, { "epoch": 0.9675337946337333, "grad_norm": 0.5253307086234753, "learning_rate": 1.8037307380373072e-06, "loss": 0.5899, "step": 33139 }, { "epoch": 0.9675629908615807, "grad_norm": 0.5288245500644018, "learning_rate": 1.8021086780210867e-06, "loss": 0.5619, "step": 33140 }, { "epoch": 0.9675921870894281, "grad_norm": 0.5002800439551794, "learning_rate": 1.8004866180048663e-06, "loss": 0.4998, "step": 33141 }, { "epoch": 0.9676213833172754, "grad_norm": 0.5024940230350636, "learning_rate": 1.7988645579886458e-06, "loss": 0.531, "step": 33142 }, { "epoch": 0.9676505795451228, "grad_norm": 0.5238736065866155, "learning_rate": 1.7972424979724252e-06, "loss": 0.563, "step": 33143 }, { "epoch": 0.9676797757729702, "grad_norm": 0.5254149055304187, "learning_rate": 1.7956204379562046e-06, "loss": 0.5516, "step": 33144 }, { "epoch": 0.9677089720008175, "grad_norm": 0.5244452146310006, "learning_rate": 1.793998377939984e-06, "loss": 0.5644, "step": 33145 }, { "epoch": 0.9677381682286649, "grad_norm": 0.5435785331302888, "learning_rate": 1.792376317923763e-06, "loss": 0.5885, "step": 33146 }, { "epoch": 0.9677673644565122, "grad_norm": 0.5469079405416063, "learning_rate": 1.7907542579075425e-06, "loss": 0.6485, "step": 33147 }, { "epoch": 0.9677965606843596, "grad_norm": 0.5170917599046152, "learning_rate": 1.7891321978913221e-06, "loss": 0.5688, "step": 33148 }, { "epoch": 0.967825756912207, "grad_norm": 0.5693757901290212, "learning_rate": 1.7875101378751016e-06, "loss": 0.6437, "step": 33149 }, { "epoch": 0.9678549531400543, "grad_norm": 0.4969317716747358, "learning_rate": 1.785888077858881e-06, "loss": 0.5282, "step": 33150 }, { "epoch": 0.9678841493679017, "grad_norm": 0.5638263881783553, "learning_rate": 1.7842660178426604e-06, "loss": 0.5912, "step": 33151 }, { "epoch": 0.967913345595749, "grad_norm": 0.5231886136636098, "learning_rate": 1.7826439578264394e-06, "loss": 0.5748, "step": 33152 }, { "epoch": 0.9679425418235964, "grad_norm": 0.4881886040909693, "learning_rate": 1.7810218978102189e-06, "loss": 0.5118, "step": 33153 }, { "epoch": 0.9679717380514438, "grad_norm": 2.6307562241960025, "learning_rate": 1.7793998377939983e-06, "loss": 0.5473, "step": 33154 }, { "epoch": 0.9680009342792911, "grad_norm": 0.48910654732297587, "learning_rate": 1.777777777777778e-06, "loss": 0.5034, "step": 33155 }, { "epoch": 0.9680301305071385, "grad_norm": 0.5279507467640999, "learning_rate": 1.7761557177615574e-06, "loss": 0.5306, "step": 33156 }, { "epoch": 0.9680593267349858, "grad_norm": 0.4975550679582615, "learning_rate": 1.7745336577453368e-06, "loss": 0.5314, "step": 33157 }, { "epoch": 0.9680885229628332, "grad_norm": 0.5310477049884043, "learning_rate": 1.7729115977291163e-06, "loss": 0.5925, "step": 33158 }, { "epoch": 0.9681177191906806, "grad_norm": 0.5408413871905688, "learning_rate": 1.7712895377128953e-06, "loss": 0.6113, "step": 33159 }, { "epoch": 0.9681469154185279, "grad_norm": 0.5430430664008664, "learning_rate": 1.7696674776966747e-06, "loss": 0.5892, "step": 33160 }, { "epoch": 0.9681761116463753, "grad_norm": 0.5118651511395816, "learning_rate": 1.7680454176804541e-06, "loss": 0.5513, "step": 33161 }, { "epoch": 0.9682053078742227, "grad_norm": 0.5451684643500504, "learning_rate": 1.7664233576642338e-06, "loss": 0.6105, "step": 33162 }, { "epoch": 0.96823450410207, "grad_norm": 0.532236654492037, "learning_rate": 1.7648012976480132e-06, "loss": 0.562, "step": 33163 }, { "epoch": 0.9682637003299174, "grad_norm": 0.5293631063416866, "learning_rate": 1.7631792376317926e-06, "loss": 0.5981, "step": 33164 }, { "epoch": 0.9682928965577647, "grad_norm": 0.5350994547337725, "learning_rate": 1.7615571776155716e-06, "loss": 0.5962, "step": 33165 }, { "epoch": 0.9683220927856121, "grad_norm": 0.5599932745869474, "learning_rate": 1.759935117599351e-06, "loss": 0.6093, "step": 33166 }, { "epoch": 0.9683512890134595, "grad_norm": 0.49313362911543757, "learning_rate": 1.7583130575831305e-06, "loss": 0.5267, "step": 33167 }, { "epoch": 0.9683804852413068, "grad_norm": 0.5354210630764913, "learning_rate": 1.75669099756691e-06, "loss": 0.5707, "step": 33168 }, { "epoch": 0.9684096814691542, "grad_norm": 0.5329819806763947, "learning_rate": 1.7550689375506896e-06, "loss": 0.6238, "step": 33169 }, { "epoch": 0.9684388776970015, "grad_norm": 0.5432163471209883, "learning_rate": 1.753446877534469e-06, "loss": 0.5927, "step": 33170 }, { "epoch": 0.9684680739248489, "grad_norm": 0.5106695427049027, "learning_rate": 1.7518248175182485e-06, "loss": 0.5474, "step": 33171 }, { "epoch": 0.9684972701526963, "grad_norm": 0.4804869348525418, "learning_rate": 1.7502027575020275e-06, "loss": 0.4888, "step": 33172 }, { "epoch": 0.9685264663805436, "grad_norm": 0.5224949796706193, "learning_rate": 1.748580697485807e-06, "loss": 0.5569, "step": 33173 }, { "epoch": 0.968555662608391, "grad_norm": 0.5113000363662948, "learning_rate": 1.7469586374695863e-06, "loss": 0.5476, "step": 33174 }, { "epoch": 0.9685848588362383, "grad_norm": 0.5192218296504815, "learning_rate": 1.745336577453366e-06, "loss": 0.5569, "step": 33175 }, { "epoch": 0.9686140550640857, "grad_norm": 0.5196324171814478, "learning_rate": 1.7437145174371454e-06, "loss": 0.5759, "step": 33176 }, { "epoch": 0.9686432512919331, "grad_norm": 0.5298573175271512, "learning_rate": 1.7420924574209248e-06, "loss": 0.5979, "step": 33177 }, { "epoch": 0.9686724475197804, "grad_norm": 0.5263500557379361, "learning_rate": 1.7404703974047038e-06, "loss": 0.5705, "step": 33178 }, { "epoch": 0.9687016437476278, "grad_norm": 0.5299521375460683, "learning_rate": 1.7388483373884833e-06, "loss": 0.6082, "step": 33179 }, { "epoch": 0.9687308399754752, "grad_norm": 0.5525508449884478, "learning_rate": 1.7372262773722627e-06, "loss": 0.6197, "step": 33180 }, { "epoch": 0.9687600362033225, "grad_norm": 0.5036410960693302, "learning_rate": 1.7356042173560421e-06, "loss": 0.5411, "step": 33181 }, { "epoch": 0.9687892324311699, "grad_norm": 0.551271876165566, "learning_rate": 1.7339821573398218e-06, "loss": 0.637, "step": 33182 }, { "epoch": 0.9688184286590172, "grad_norm": 0.4997755890364487, "learning_rate": 1.7323600973236012e-06, "loss": 0.5169, "step": 33183 }, { "epoch": 0.9688476248868646, "grad_norm": 0.47260033152032566, "learning_rate": 1.7307380373073807e-06, "loss": 0.5166, "step": 33184 }, { "epoch": 0.968876821114712, "grad_norm": 0.5826644470041116, "learning_rate": 1.7291159772911597e-06, "loss": 0.6959, "step": 33185 }, { "epoch": 0.9689060173425593, "grad_norm": 0.5210477668498238, "learning_rate": 1.727493917274939e-06, "loss": 0.5742, "step": 33186 }, { "epoch": 0.9689352135704067, "grad_norm": 0.5789868258555005, "learning_rate": 1.7258718572587185e-06, "loss": 0.6559, "step": 33187 }, { "epoch": 0.968964409798254, "grad_norm": 0.5295648608009844, "learning_rate": 1.724249797242498e-06, "loss": 0.5955, "step": 33188 }, { "epoch": 0.9689936060261014, "grad_norm": 0.5479059618076025, "learning_rate": 1.7226277372262776e-06, "loss": 0.6107, "step": 33189 }, { "epoch": 0.9690228022539488, "grad_norm": 0.520275457435492, "learning_rate": 1.721005677210057e-06, "loss": 0.5783, "step": 33190 }, { "epoch": 0.9690519984817961, "grad_norm": 0.5306776146747647, "learning_rate": 1.7193836171938365e-06, "loss": 0.5692, "step": 33191 }, { "epoch": 0.9690811947096435, "grad_norm": 0.555142784774583, "learning_rate": 1.7177615571776155e-06, "loss": 0.633, "step": 33192 }, { "epoch": 0.9691103909374909, "grad_norm": 0.4828384207629425, "learning_rate": 1.716139497161395e-06, "loss": 0.4949, "step": 33193 }, { "epoch": 0.9691395871653382, "grad_norm": 0.532472505106061, "learning_rate": 1.7145174371451743e-06, "loss": 0.5517, "step": 33194 }, { "epoch": 0.9691687833931856, "grad_norm": 0.5874674967920657, "learning_rate": 1.7128953771289538e-06, "loss": 0.6653, "step": 33195 }, { "epoch": 0.9691979796210329, "grad_norm": 0.560906292663556, "learning_rate": 1.7112733171127334e-06, "loss": 0.6221, "step": 33196 }, { "epoch": 0.9692271758488803, "grad_norm": 0.5030882288533868, "learning_rate": 1.7096512570965129e-06, "loss": 0.5354, "step": 33197 }, { "epoch": 0.9692563720767277, "grad_norm": 0.5360001125457649, "learning_rate": 1.7080291970802919e-06, "loss": 0.5498, "step": 33198 }, { "epoch": 0.969285568304575, "grad_norm": 0.508227612819436, "learning_rate": 1.7064071370640713e-06, "loss": 0.5108, "step": 33199 }, { "epoch": 0.9693147645324224, "grad_norm": 0.5057359988610207, "learning_rate": 1.7047850770478507e-06, "loss": 0.5308, "step": 33200 }, { "epoch": 0.9693439607602697, "grad_norm": 0.5388725625303615, "learning_rate": 1.7031630170316302e-06, "loss": 0.6159, "step": 33201 }, { "epoch": 0.9693731569881171, "grad_norm": 0.4935217732255867, "learning_rate": 1.7015409570154098e-06, "loss": 0.5158, "step": 33202 }, { "epoch": 0.9694023532159645, "grad_norm": 0.5513141910689426, "learning_rate": 1.6999188969991892e-06, "loss": 0.5978, "step": 33203 }, { "epoch": 0.9694315494438118, "grad_norm": 0.534140110684528, "learning_rate": 1.6982968369829687e-06, "loss": 0.5783, "step": 33204 }, { "epoch": 0.9694607456716592, "grad_norm": 0.5270195087126932, "learning_rate": 1.6966747769667477e-06, "loss": 0.5813, "step": 33205 }, { "epoch": 0.9694899418995065, "grad_norm": 0.4918447606924988, "learning_rate": 1.6950527169505271e-06, "loss": 0.4906, "step": 33206 }, { "epoch": 0.9695191381273539, "grad_norm": 0.5434609903717219, "learning_rate": 1.6934306569343066e-06, "loss": 0.647, "step": 33207 }, { "epoch": 0.9695483343552013, "grad_norm": 0.5242033145399404, "learning_rate": 1.691808596918086e-06, "loss": 0.5824, "step": 33208 }, { "epoch": 0.9695775305830486, "grad_norm": 0.5240791155446234, "learning_rate": 1.6901865369018656e-06, "loss": 0.5726, "step": 33209 }, { "epoch": 0.969606726810896, "grad_norm": 0.5211228509147837, "learning_rate": 1.688564476885645e-06, "loss": 0.5687, "step": 33210 }, { "epoch": 0.9696359230387434, "grad_norm": 0.5584940075044178, "learning_rate": 1.686942416869424e-06, "loss": 0.6435, "step": 33211 }, { "epoch": 0.9696651192665907, "grad_norm": 0.5720942452042076, "learning_rate": 1.6853203568532035e-06, "loss": 0.6649, "step": 33212 }, { "epoch": 0.9696943154944381, "grad_norm": 0.5086231441462755, "learning_rate": 1.683698296836983e-06, "loss": 0.564, "step": 33213 }, { "epoch": 0.9697235117222854, "grad_norm": 0.5421913950142853, "learning_rate": 1.6820762368207624e-06, "loss": 0.5468, "step": 33214 }, { "epoch": 0.9697527079501328, "grad_norm": 0.5485852046076145, "learning_rate": 1.6804541768045418e-06, "loss": 0.6085, "step": 33215 }, { "epoch": 0.9697819041779802, "grad_norm": 0.5481052255477145, "learning_rate": 1.6788321167883214e-06, "loss": 0.6061, "step": 33216 }, { "epoch": 0.9698111004058275, "grad_norm": 0.5189535406371928, "learning_rate": 1.6772100567721009e-06, "loss": 0.5416, "step": 33217 }, { "epoch": 0.9698402966336749, "grad_norm": 0.5889524671172818, "learning_rate": 1.6755879967558799e-06, "loss": 0.6946, "step": 33218 }, { "epoch": 0.9698694928615222, "grad_norm": 0.546102172365777, "learning_rate": 1.6739659367396593e-06, "loss": 0.6407, "step": 33219 }, { "epoch": 0.9698986890893696, "grad_norm": 0.5168686623197413, "learning_rate": 1.6723438767234388e-06, "loss": 0.5425, "step": 33220 }, { "epoch": 0.969927885317217, "grad_norm": 0.5368112326110163, "learning_rate": 1.6707218167072182e-06, "loss": 0.5833, "step": 33221 }, { "epoch": 0.9699570815450643, "grad_norm": 0.5222149994382115, "learning_rate": 1.6690997566909976e-06, "loss": 0.5592, "step": 33222 }, { "epoch": 0.9699862777729117, "grad_norm": 0.5122553537525713, "learning_rate": 1.6674776966747773e-06, "loss": 0.5629, "step": 33223 }, { "epoch": 0.970015474000759, "grad_norm": 0.5491763535429779, "learning_rate": 1.6658556366585563e-06, "loss": 0.6354, "step": 33224 }, { "epoch": 0.9700446702286064, "grad_norm": 0.48536627598877313, "learning_rate": 1.6642335766423357e-06, "loss": 0.4878, "step": 33225 }, { "epoch": 0.9700738664564539, "grad_norm": 0.5522577320785623, "learning_rate": 1.6626115166261151e-06, "loss": 0.5778, "step": 33226 }, { "epoch": 0.9701030626843012, "grad_norm": 0.5616705494709905, "learning_rate": 1.6609894566098946e-06, "loss": 0.6545, "step": 33227 }, { "epoch": 0.9701322589121486, "grad_norm": 0.5487604076158326, "learning_rate": 1.659367396593674e-06, "loss": 0.6055, "step": 33228 }, { "epoch": 0.970161455139996, "grad_norm": 0.5082046624400478, "learning_rate": 1.6577453365774534e-06, "loss": 0.5383, "step": 33229 }, { "epoch": 0.9701906513678433, "grad_norm": 0.535127922593313, "learning_rate": 1.656123276561233e-06, "loss": 0.5987, "step": 33230 }, { "epoch": 0.9702198475956907, "grad_norm": 0.48250022691844807, "learning_rate": 1.654501216545012e-06, "loss": 0.4963, "step": 33231 }, { "epoch": 0.970249043823538, "grad_norm": 0.5041276342712813, "learning_rate": 1.6528791565287915e-06, "loss": 0.5316, "step": 33232 }, { "epoch": 0.9702782400513854, "grad_norm": 0.529067908538191, "learning_rate": 1.651257096512571e-06, "loss": 0.5869, "step": 33233 }, { "epoch": 0.9703074362792328, "grad_norm": 0.5467852850154875, "learning_rate": 1.6496350364963504e-06, "loss": 0.6318, "step": 33234 }, { "epoch": 0.9703366325070801, "grad_norm": 0.5579960379978443, "learning_rate": 1.6480129764801298e-06, "loss": 0.6488, "step": 33235 }, { "epoch": 0.9703658287349275, "grad_norm": 0.5583832212854692, "learning_rate": 1.6463909164639095e-06, "loss": 0.6179, "step": 33236 }, { "epoch": 0.9703950249627749, "grad_norm": 0.5114345373078084, "learning_rate": 1.644768856447689e-06, "loss": 0.5416, "step": 33237 }, { "epoch": 0.9704242211906222, "grad_norm": 0.5182986375827064, "learning_rate": 1.643146796431468e-06, "loss": 0.5544, "step": 33238 }, { "epoch": 0.9704534174184696, "grad_norm": 0.5434335261409955, "learning_rate": 1.6415247364152473e-06, "loss": 0.5959, "step": 33239 }, { "epoch": 0.9704826136463169, "grad_norm": 0.5128161719726975, "learning_rate": 1.6399026763990268e-06, "loss": 0.5373, "step": 33240 }, { "epoch": 0.9705118098741643, "grad_norm": 0.5114502300973426, "learning_rate": 1.6382806163828062e-06, "loss": 0.5071, "step": 33241 }, { "epoch": 0.9705410061020117, "grad_norm": 0.5271239995939173, "learning_rate": 1.6366585563665856e-06, "loss": 0.6058, "step": 33242 }, { "epoch": 0.970570202329859, "grad_norm": 0.5709946962637023, "learning_rate": 1.6350364963503653e-06, "loss": 0.6468, "step": 33243 }, { "epoch": 0.9705993985577064, "grad_norm": 0.5351361137132978, "learning_rate": 1.6334144363341443e-06, "loss": 0.58, "step": 33244 }, { "epoch": 0.9706285947855537, "grad_norm": 0.5285302837802992, "learning_rate": 1.6317923763179237e-06, "loss": 0.5673, "step": 33245 }, { "epoch": 0.9706577910134011, "grad_norm": 0.5249467129922614, "learning_rate": 1.6301703163017032e-06, "loss": 0.5746, "step": 33246 }, { "epoch": 0.9706869872412485, "grad_norm": 0.5395056813183428, "learning_rate": 1.6285482562854826e-06, "loss": 0.5932, "step": 33247 }, { "epoch": 0.9707161834690958, "grad_norm": 0.5497488067483148, "learning_rate": 1.626926196269262e-06, "loss": 0.5946, "step": 33248 }, { "epoch": 0.9707453796969432, "grad_norm": 0.4962399385343988, "learning_rate": 1.6253041362530415e-06, "loss": 0.4833, "step": 33249 }, { "epoch": 0.9707745759247906, "grad_norm": 0.5284356931854486, "learning_rate": 1.623682076236821e-06, "loss": 0.5384, "step": 33250 }, { "epoch": 0.9708037721526379, "grad_norm": 0.5450470345982965, "learning_rate": 1.6220600162206001e-06, "loss": 0.5973, "step": 33251 }, { "epoch": 0.9708329683804853, "grad_norm": 0.5333066971189764, "learning_rate": 1.6204379562043795e-06, "loss": 0.5679, "step": 33252 }, { "epoch": 0.9708621646083326, "grad_norm": 0.5585266297788254, "learning_rate": 1.618815896188159e-06, "loss": 0.6417, "step": 33253 }, { "epoch": 0.97089136083618, "grad_norm": 0.4837709992103911, "learning_rate": 1.6171938361719384e-06, "loss": 0.459, "step": 33254 }, { "epoch": 0.9709205570640274, "grad_norm": 0.5250425715984067, "learning_rate": 1.6155717761557178e-06, "loss": 0.6036, "step": 33255 }, { "epoch": 0.9709497532918747, "grad_norm": 0.5132709076303847, "learning_rate": 1.6139497161394973e-06, "loss": 0.5608, "step": 33256 }, { "epoch": 0.9709789495197221, "grad_norm": 0.5263223779232812, "learning_rate": 1.6123276561232765e-06, "loss": 0.5877, "step": 33257 }, { "epoch": 0.9710081457475694, "grad_norm": 0.5213337432093951, "learning_rate": 1.610705596107056e-06, "loss": 0.5691, "step": 33258 }, { "epoch": 0.9710373419754168, "grad_norm": 0.4953391175791287, "learning_rate": 1.6090835360908354e-06, "loss": 0.5024, "step": 33259 }, { "epoch": 0.9710665382032642, "grad_norm": 0.5368038871869446, "learning_rate": 1.6074614760746148e-06, "loss": 0.5906, "step": 33260 }, { "epoch": 0.9710957344311115, "grad_norm": 0.5354881293853719, "learning_rate": 1.6058394160583942e-06, "loss": 0.5867, "step": 33261 }, { "epoch": 0.9711249306589589, "grad_norm": 0.6122462758932719, "learning_rate": 1.6042173560421737e-06, "loss": 0.7036, "step": 33262 }, { "epoch": 0.9711541268868062, "grad_norm": 0.5139618933126094, "learning_rate": 1.602595296025953e-06, "loss": 0.5612, "step": 33263 }, { "epoch": 0.9711833231146536, "grad_norm": 0.5178068744321802, "learning_rate": 1.6009732360097323e-06, "loss": 0.5504, "step": 33264 }, { "epoch": 0.971212519342501, "grad_norm": 0.5602893537158852, "learning_rate": 1.5993511759935117e-06, "loss": 0.6228, "step": 33265 }, { "epoch": 0.9712417155703483, "grad_norm": 0.5414063526889704, "learning_rate": 1.5977291159772912e-06, "loss": 0.6116, "step": 33266 }, { "epoch": 0.9712709117981957, "grad_norm": 0.5543214368800086, "learning_rate": 1.5961070559610706e-06, "loss": 0.6062, "step": 33267 }, { "epoch": 0.971300108026043, "grad_norm": 0.5142152180953697, "learning_rate": 1.59448499594485e-06, "loss": 0.5841, "step": 33268 }, { "epoch": 0.9713293042538904, "grad_norm": 0.545934690299381, "learning_rate": 1.5928629359286295e-06, "loss": 0.6097, "step": 33269 }, { "epoch": 0.9713585004817378, "grad_norm": 0.49668568409421815, "learning_rate": 1.5912408759124087e-06, "loss": 0.5079, "step": 33270 }, { "epoch": 0.9713876967095851, "grad_norm": 0.5245349575685023, "learning_rate": 1.5896188158961881e-06, "loss": 0.5542, "step": 33271 }, { "epoch": 0.9714168929374325, "grad_norm": 0.4910594665177292, "learning_rate": 1.5879967558799676e-06, "loss": 0.4548, "step": 33272 }, { "epoch": 0.9714460891652799, "grad_norm": 0.500478091826155, "learning_rate": 1.586374695863747e-06, "loss": 0.4975, "step": 33273 }, { "epoch": 0.9714752853931272, "grad_norm": 0.5279983575635875, "learning_rate": 1.5847526358475264e-06, "loss": 0.5998, "step": 33274 }, { "epoch": 0.9715044816209746, "grad_norm": 0.5232806693408771, "learning_rate": 1.5831305758313059e-06, "loss": 0.5325, "step": 33275 }, { "epoch": 0.9715336778488219, "grad_norm": 0.506239143639995, "learning_rate": 1.5815085158150853e-06, "loss": 0.5421, "step": 33276 }, { "epoch": 0.9715628740766693, "grad_norm": 0.5312251561394744, "learning_rate": 1.5798864557988645e-06, "loss": 0.581, "step": 33277 }, { "epoch": 0.9715920703045167, "grad_norm": 0.5226776621189408, "learning_rate": 1.578264395782644e-06, "loss": 0.5679, "step": 33278 }, { "epoch": 0.971621266532364, "grad_norm": 0.5187485130941769, "learning_rate": 1.5766423357664234e-06, "loss": 0.5827, "step": 33279 }, { "epoch": 0.9716504627602114, "grad_norm": 0.5494368662767171, "learning_rate": 1.5750202757502028e-06, "loss": 0.6244, "step": 33280 }, { "epoch": 0.9716796589880587, "grad_norm": 0.5485698167878573, "learning_rate": 1.5733982157339822e-06, "loss": 0.6308, "step": 33281 }, { "epoch": 0.9717088552159061, "grad_norm": 0.5159751403405409, "learning_rate": 1.5717761557177617e-06, "loss": 0.5005, "step": 33282 }, { "epoch": 0.9717380514437535, "grad_norm": 0.5259240131234272, "learning_rate": 1.5701540957015411e-06, "loss": 0.634, "step": 33283 }, { "epoch": 0.9717672476716008, "grad_norm": 0.49250716161168817, "learning_rate": 1.5685320356853203e-06, "loss": 0.5205, "step": 33284 }, { "epoch": 0.9717964438994482, "grad_norm": 0.5396258337177385, "learning_rate": 1.5669099756690998e-06, "loss": 0.5728, "step": 33285 }, { "epoch": 0.9718256401272956, "grad_norm": 0.5482553111995461, "learning_rate": 1.5652879156528792e-06, "loss": 0.6277, "step": 33286 }, { "epoch": 0.9718548363551429, "grad_norm": 0.5171878353163967, "learning_rate": 1.5636658556366586e-06, "loss": 0.5571, "step": 33287 }, { "epoch": 0.9718840325829903, "grad_norm": 0.5141931356443172, "learning_rate": 1.5620437956204378e-06, "loss": 0.5667, "step": 33288 }, { "epoch": 0.9719132288108376, "grad_norm": 0.5410549710600596, "learning_rate": 1.5604217356042175e-06, "loss": 0.6115, "step": 33289 }, { "epoch": 0.971942425038685, "grad_norm": 0.5653206079969197, "learning_rate": 1.558799675587997e-06, "loss": 0.6435, "step": 33290 }, { "epoch": 0.9719716212665324, "grad_norm": 0.5389698990637276, "learning_rate": 1.5571776155717764e-06, "loss": 0.5685, "step": 33291 }, { "epoch": 0.9720008174943797, "grad_norm": 0.5250042815246945, "learning_rate": 1.5555555555555556e-06, "loss": 0.5748, "step": 33292 }, { "epoch": 0.9720300137222271, "grad_norm": 0.5095372504270342, "learning_rate": 1.553933495539335e-06, "loss": 0.5643, "step": 33293 }, { "epoch": 0.9720592099500744, "grad_norm": 0.5522825666887051, "learning_rate": 1.5523114355231144e-06, "loss": 0.6035, "step": 33294 }, { "epoch": 0.9720884061779218, "grad_norm": 0.5229480973824947, "learning_rate": 1.5506893755068937e-06, "loss": 0.5862, "step": 33295 }, { "epoch": 0.9721176024057692, "grad_norm": 0.5055353885611581, "learning_rate": 1.5490673154906733e-06, "loss": 0.5513, "step": 33296 }, { "epoch": 0.9721467986336165, "grad_norm": 0.5675057639626264, "learning_rate": 1.5474452554744527e-06, "loss": 0.6465, "step": 33297 }, { "epoch": 0.9721759948614639, "grad_norm": 0.5293537316810037, "learning_rate": 1.545823195458232e-06, "loss": 0.5821, "step": 33298 }, { "epoch": 0.9722051910893112, "grad_norm": 0.5499400535855733, "learning_rate": 1.5442011354420114e-06, "loss": 0.6397, "step": 33299 }, { "epoch": 0.9722343873171586, "grad_norm": 0.48892921784826215, "learning_rate": 1.5425790754257908e-06, "loss": 0.531, "step": 33300 }, { "epoch": 0.972263583545006, "grad_norm": 0.5631428167874469, "learning_rate": 1.5409570154095703e-06, "loss": 0.6897, "step": 33301 }, { "epoch": 0.9722927797728533, "grad_norm": 0.5252319695224805, "learning_rate": 1.5393349553933497e-06, "loss": 0.5764, "step": 33302 }, { "epoch": 0.9723219760007007, "grad_norm": 0.5140183513712043, "learning_rate": 1.5377128953771291e-06, "loss": 0.5405, "step": 33303 }, { "epoch": 0.972351172228548, "grad_norm": 0.5320370957332803, "learning_rate": 1.5360908353609086e-06, "loss": 0.5768, "step": 33304 }, { "epoch": 0.9723803684563954, "grad_norm": 0.5048696976870498, "learning_rate": 1.5344687753446878e-06, "loss": 0.522, "step": 33305 }, { "epoch": 0.9724095646842428, "grad_norm": 0.5247063853086597, "learning_rate": 1.5328467153284672e-06, "loss": 0.6179, "step": 33306 }, { "epoch": 0.9724387609120901, "grad_norm": 0.5167468802461949, "learning_rate": 1.5312246553122466e-06, "loss": 0.5259, "step": 33307 }, { "epoch": 0.9724679571399375, "grad_norm": 0.5790663549446207, "learning_rate": 1.5296025952960259e-06, "loss": 0.5221, "step": 33308 }, { "epoch": 0.9724971533677849, "grad_norm": 0.5116174387796771, "learning_rate": 1.5279805352798055e-06, "loss": 0.5214, "step": 33309 }, { "epoch": 0.9725263495956322, "grad_norm": 0.515649677367356, "learning_rate": 1.526358475263585e-06, "loss": 0.5303, "step": 33310 }, { "epoch": 0.9725555458234796, "grad_norm": 0.4907185558213889, "learning_rate": 1.5247364152473642e-06, "loss": 0.4884, "step": 33311 }, { "epoch": 0.972584742051327, "grad_norm": 0.5128983878307565, "learning_rate": 1.5231143552311436e-06, "loss": 0.553, "step": 33312 }, { "epoch": 0.9726139382791743, "grad_norm": 0.5329449536331733, "learning_rate": 1.521492295214923e-06, "loss": 0.5963, "step": 33313 }, { "epoch": 0.9726431345070217, "grad_norm": 0.5661320805730653, "learning_rate": 1.5198702351987025e-06, "loss": 0.6575, "step": 33314 }, { "epoch": 0.972672330734869, "grad_norm": 0.5585406305353853, "learning_rate": 1.5182481751824817e-06, "loss": 0.6344, "step": 33315 }, { "epoch": 0.9727015269627164, "grad_norm": 0.5034029952747844, "learning_rate": 1.5166261151662613e-06, "loss": 0.5098, "step": 33316 }, { "epoch": 0.9727307231905638, "grad_norm": 0.4957729007439198, "learning_rate": 1.5150040551500408e-06, "loss": 0.5247, "step": 33317 }, { "epoch": 0.9727599194184111, "grad_norm": 0.563542741866055, "learning_rate": 1.51338199513382e-06, "loss": 0.6632, "step": 33318 }, { "epoch": 0.9727891156462585, "grad_norm": 0.5193634438545712, "learning_rate": 1.5117599351175994e-06, "loss": 0.5482, "step": 33319 }, { "epoch": 0.9728183118741058, "grad_norm": 0.5549889948134866, "learning_rate": 1.5101378751013788e-06, "loss": 0.6275, "step": 33320 }, { "epoch": 0.9728475081019532, "grad_norm": 0.5618541258477594, "learning_rate": 1.508515815085158e-06, "loss": 0.6618, "step": 33321 }, { "epoch": 0.9728767043298006, "grad_norm": 0.5151399524831205, "learning_rate": 1.5068937550689375e-06, "loss": 0.5778, "step": 33322 }, { "epoch": 0.9729059005576479, "grad_norm": 0.5404234879215594, "learning_rate": 1.5052716950527171e-06, "loss": 0.5986, "step": 33323 }, { "epoch": 0.9729350967854953, "grad_norm": 0.5429176598518665, "learning_rate": 1.5036496350364966e-06, "loss": 0.5818, "step": 33324 }, { "epoch": 0.9729642930133426, "grad_norm": 0.49601162270312155, "learning_rate": 1.5020275750202758e-06, "loss": 0.5072, "step": 33325 }, { "epoch": 0.97299348924119, "grad_norm": 0.5053680626198386, "learning_rate": 1.5004055150040552e-06, "loss": 0.5572, "step": 33326 }, { "epoch": 0.9730226854690374, "grad_norm": 0.5575694219245764, "learning_rate": 1.4987834549878347e-06, "loss": 0.6399, "step": 33327 }, { "epoch": 0.9730518816968847, "grad_norm": 0.525873002716109, "learning_rate": 1.4971613949716139e-06, "loss": 0.5668, "step": 33328 }, { "epoch": 0.9730810779247321, "grad_norm": 0.49549762458359176, "learning_rate": 1.4955393349553933e-06, "loss": 0.5334, "step": 33329 }, { "epoch": 0.9731102741525794, "grad_norm": 0.5060772865919545, "learning_rate": 1.493917274939173e-06, "loss": 0.5251, "step": 33330 }, { "epoch": 0.9731394703804268, "grad_norm": 0.503205845120203, "learning_rate": 1.4922952149229522e-06, "loss": 0.4916, "step": 33331 }, { "epoch": 0.9731686666082742, "grad_norm": 0.5441183341578835, "learning_rate": 1.4906731549067316e-06, "loss": 0.6069, "step": 33332 }, { "epoch": 0.9731978628361215, "grad_norm": 0.5431079813880891, "learning_rate": 1.489051094890511e-06, "loss": 0.6293, "step": 33333 }, { "epoch": 0.9732270590639689, "grad_norm": 0.5547391468734354, "learning_rate": 1.4874290348742903e-06, "loss": 0.5739, "step": 33334 }, { "epoch": 0.9732562552918163, "grad_norm": 0.5301826391022783, "learning_rate": 1.4858069748580697e-06, "loss": 0.5634, "step": 33335 }, { "epoch": 0.9732854515196636, "grad_norm": 0.541069608008319, "learning_rate": 1.4841849148418493e-06, "loss": 0.5795, "step": 33336 }, { "epoch": 0.973314647747511, "grad_norm": 0.49381342205441886, "learning_rate": 1.4825628548256288e-06, "loss": 0.508, "step": 33337 }, { "epoch": 0.9733438439753583, "grad_norm": 0.5567578842323622, "learning_rate": 1.480940794809408e-06, "loss": 0.5907, "step": 33338 }, { "epoch": 0.9733730402032057, "grad_norm": 0.5581306121988351, "learning_rate": 1.4793187347931874e-06, "loss": 0.5922, "step": 33339 }, { "epoch": 0.9734022364310531, "grad_norm": 0.5389316574023362, "learning_rate": 1.4776966747769669e-06, "loss": 0.5491, "step": 33340 }, { "epoch": 0.9734314326589004, "grad_norm": 0.5277427266207402, "learning_rate": 1.476074614760746e-06, "loss": 0.5684, "step": 33341 }, { "epoch": 0.9734606288867478, "grad_norm": 0.5138458289210753, "learning_rate": 1.4744525547445255e-06, "loss": 0.5395, "step": 33342 }, { "epoch": 0.9734898251145951, "grad_norm": 0.5535865794168394, "learning_rate": 1.4728304947283052e-06, "loss": 0.6171, "step": 33343 }, { "epoch": 0.9735190213424425, "grad_norm": 0.549517259119382, "learning_rate": 1.4712084347120844e-06, "loss": 0.6236, "step": 33344 }, { "epoch": 0.9735482175702899, "grad_norm": 0.564173760004409, "learning_rate": 1.4695863746958638e-06, "loss": 0.5968, "step": 33345 }, { "epoch": 0.9735774137981372, "grad_norm": 0.5429662239250349, "learning_rate": 1.4679643146796433e-06, "loss": 0.6014, "step": 33346 }, { "epoch": 0.9736066100259847, "grad_norm": 0.5118870568833402, "learning_rate": 1.4663422546634225e-06, "loss": 0.5348, "step": 33347 }, { "epoch": 0.9736358062538321, "grad_norm": 0.49385789754898346, "learning_rate": 1.464720194647202e-06, "loss": 0.5456, "step": 33348 }, { "epoch": 0.9736650024816794, "grad_norm": 0.4940143379742262, "learning_rate": 1.4630981346309813e-06, "loss": 0.4969, "step": 33349 }, { "epoch": 0.9736941987095268, "grad_norm": 0.5008919933633582, "learning_rate": 1.461476074614761e-06, "loss": 0.5385, "step": 33350 }, { "epoch": 0.9737233949373741, "grad_norm": 0.5097117896734703, "learning_rate": 1.4598540145985402e-06, "loss": 0.5176, "step": 33351 }, { "epoch": 0.9737525911652215, "grad_norm": 0.507267493928534, "learning_rate": 1.4582319545823196e-06, "loss": 0.5487, "step": 33352 }, { "epoch": 0.9737817873930689, "grad_norm": 0.5508028271932324, "learning_rate": 1.456609894566099e-06, "loss": 0.614, "step": 33353 }, { "epoch": 0.9738109836209162, "grad_norm": 0.5210947529854097, "learning_rate": 1.4549878345498783e-06, "loss": 0.5521, "step": 33354 }, { "epoch": 0.9738401798487636, "grad_norm": 0.48675855507907223, "learning_rate": 1.4533657745336577e-06, "loss": 0.4881, "step": 33355 }, { "epoch": 0.973869376076611, "grad_norm": 0.5215528971785189, "learning_rate": 1.4517437145174372e-06, "loss": 0.5244, "step": 33356 }, { "epoch": 0.9738985723044583, "grad_norm": 0.48614285551279346, "learning_rate": 1.4501216545012166e-06, "loss": 0.516, "step": 33357 }, { "epoch": 0.9739277685323057, "grad_norm": 0.5305074759118392, "learning_rate": 1.448499594484996e-06, "loss": 0.6134, "step": 33358 }, { "epoch": 0.973956964760153, "grad_norm": 0.5365949264744554, "learning_rate": 1.4468775344687755e-06, "loss": 0.6213, "step": 33359 }, { "epoch": 0.9739861609880004, "grad_norm": 0.5123073352466669, "learning_rate": 1.4452554744525549e-06, "loss": 0.5491, "step": 33360 }, { "epoch": 0.9740153572158478, "grad_norm": 0.5155823333520412, "learning_rate": 1.4436334144363341e-06, "loss": 0.5244, "step": 33361 }, { "epoch": 0.9740445534436951, "grad_norm": 0.5323405240349793, "learning_rate": 1.4420113544201135e-06, "loss": 0.5444, "step": 33362 }, { "epoch": 0.9740737496715425, "grad_norm": 0.4968428611117542, "learning_rate": 1.4403892944038932e-06, "loss": 0.5046, "step": 33363 }, { "epoch": 0.9741029458993898, "grad_norm": 0.4964817987227177, "learning_rate": 1.4387672343876724e-06, "loss": 0.5008, "step": 33364 }, { "epoch": 0.9741321421272372, "grad_norm": 0.5444109383621929, "learning_rate": 1.4371451743714518e-06, "loss": 0.6098, "step": 33365 }, { "epoch": 0.9741613383550846, "grad_norm": 0.5345904232783335, "learning_rate": 1.4355231143552313e-06, "loss": 0.5576, "step": 33366 }, { "epoch": 0.9741905345829319, "grad_norm": 0.5178110816702154, "learning_rate": 1.4339010543390105e-06, "loss": 0.5329, "step": 33367 }, { "epoch": 0.9742197308107793, "grad_norm": 0.5478175368336009, "learning_rate": 1.43227899432279e-06, "loss": 0.5714, "step": 33368 }, { "epoch": 0.9742489270386266, "grad_norm": 0.49404448434903736, "learning_rate": 1.4306569343065694e-06, "loss": 0.5156, "step": 33369 }, { "epoch": 0.974278123266474, "grad_norm": 0.5156388169100892, "learning_rate": 1.4290348742903488e-06, "loss": 0.53, "step": 33370 }, { "epoch": 0.9743073194943214, "grad_norm": 0.5074927219704103, "learning_rate": 1.4274128142741282e-06, "loss": 0.5264, "step": 33371 }, { "epoch": 0.9743365157221687, "grad_norm": 0.5475787751400644, "learning_rate": 1.4257907542579077e-06, "loss": 0.6041, "step": 33372 }, { "epoch": 0.9743657119500161, "grad_norm": 0.4966564718440118, "learning_rate": 1.424168694241687e-06, "loss": 0.5209, "step": 33373 }, { "epoch": 0.9743949081778635, "grad_norm": 0.4957391483487214, "learning_rate": 1.4225466342254663e-06, "loss": 0.5119, "step": 33374 }, { "epoch": 0.9744241044057108, "grad_norm": 0.5526063615312891, "learning_rate": 1.4209245742092457e-06, "loss": 0.6054, "step": 33375 }, { "epoch": 0.9744533006335582, "grad_norm": 0.5368742011521699, "learning_rate": 1.4193025141930252e-06, "loss": 0.5925, "step": 33376 }, { "epoch": 0.9744824968614055, "grad_norm": 0.5544903131845073, "learning_rate": 1.4176804541768046e-06, "loss": 0.6151, "step": 33377 }, { "epoch": 0.9745116930892529, "grad_norm": 0.5392400606920289, "learning_rate": 1.416058394160584e-06, "loss": 0.6126, "step": 33378 }, { "epoch": 0.9745408893171003, "grad_norm": 0.5239870984382348, "learning_rate": 1.4144363341443635e-06, "loss": 0.5758, "step": 33379 }, { "epoch": 0.9745700855449476, "grad_norm": 0.5226239727923013, "learning_rate": 1.4128142741281427e-06, "loss": 0.5579, "step": 33380 }, { "epoch": 0.974599281772795, "grad_norm": 0.5236302385513445, "learning_rate": 1.4111922141119221e-06, "loss": 0.5607, "step": 33381 }, { "epoch": 0.9746284780006423, "grad_norm": 0.5130689271741992, "learning_rate": 1.4095701540957016e-06, "loss": 0.5518, "step": 33382 }, { "epoch": 0.9746576742284897, "grad_norm": 0.5498628138660716, "learning_rate": 1.407948094079481e-06, "loss": 0.6163, "step": 33383 }, { "epoch": 0.9746868704563371, "grad_norm": 0.5209690016892286, "learning_rate": 1.4063260340632604e-06, "loss": 0.5725, "step": 33384 }, { "epoch": 0.9747160666841844, "grad_norm": 0.5421469846343152, "learning_rate": 1.4047039740470399e-06, "loss": 0.6183, "step": 33385 }, { "epoch": 0.9747452629120318, "grad_norm": 0.5200015898105329, "learning_rate": 1.4030819140308193e-06, "loss": 0.5729, "step": 33386 }, { "epoch": 0.9747744591398791, "grad_norm": 0.5471891503404916, "learning_rate": 1.4014598540145985e-06, "loss": 0.5666, "step": 33387 }, { "epoch": 0.9748036553677265, "grad_norm": 0.5288926366420315, "learning_rate": 1.399837793998378e-06, "loss": 0.5712, "step": 33388 }, { "epoch": 0.9748328515955739, "grad_norm": 0.5623878458814322, "learning_rate": 1.3982157339821574e-06, "loss": 0.6657, "step": 33389 }, { "epoch": 0.9748620478234212, "grad_norm": 0.56649516019556, "learning_rate": 1.3965936739659368e-06, "loss": 0.6243, "step": 33390 }, { "epoch": 0.9748912440512686, "grad_norm": 0.539090747199964, "learning_rate": 1.3949716139497162e-06, "loss": 0.586, "step": 33391 }, { "epoch": 0.974920440279116, "grad_norm": 0.5402583518520665, "learning_rate": 1.3933495539334957e-06, "loss": 0.6246, "step": 33392 }, { "epoch": 0.9749496365069633, "grad_norm": 0.5297439298838054, "learning_rate": 1.391727493917275e-06, "loss": 0.5801, "step": 33393 }, { "epoch": 0.9749788327348107, "grad_norm": 0.5283114725798437, "learning_rate": 1.3901054339010543e-06, "loss": 0.5887, "step": 33394 }, { "epoch": 0.975008028962658, "grad_norm": 0.4914165446620325, "learning_rate": 1.3884833738848338e-06, "loss": 0.5319, "step": 33395 }, { "epoch": 0.9750372251905054, "grad_norm": 0.5170473802210434, "learning_rate": 1.3868613138686132e-06, "loss": 0.578, "step": 33396 }, { "epoch": 0.9750664214183528, "grad_norm": 0.5542108935392749, "learning_rate": 1.3852392538523926e-06, "loss": 0.6271, "step": 33397 }, { "epoch": 0.9750956176462001, "grad_norm": 0.5387964266983135, "learning_rate": 1.383617193836172e-06, "loss": 0.575, "step": 33398 }, { "epoch": 0.9751248138740475, "grad_norm": 0.547132671024914, "learning_rate": 1.3819951338199515e-06, "loss": 0.5888, "step": 33399 }, { "epoch": 0.9751540101018948, "grad_norm": 0.577789331491672, "learning_rate": 1.3803730738037307e-06, "loss": 0.6749, "step": 33400 }, { "epoch": 0.9751832063297422, "grad_norm": 0.5241273240879445, "learning_rate": 1.3787510137875101e-06, "loss": 0.5488, "step": 33401 }, { "epoch": 0.9752124025575896, "grad_norm": 0.5473949544298602, "learning_rate": 1.3771289537712896e-06, "loss": 0.6054, "step": 33402 }, { "epoch": 0.9752415987854369, "grad_norm": 0.5118296306336624, "learning_rate": 1.375506893755069e-06, "loss": 0.5083, "step": 33403 }, { "epoch": 0.9752707950132843, "grad_norm": 0.4834108373150286, "learning_rate": 1.3738848337388484e-06, "loss": 0.5005, "step": 33404 }, { "epoch": 0.9752999912411316, "grad_norm": 0.5065021445036889, "learning_rate": 1.3722627737226279e-06, "loss": 0.5328, "step": 33405 }, { "epoch": 0.975329187468979, "grad_norm": 0.55885601254371, "learning_rate": 1.3706407137064073e-06, "loss": 0.6374, "step": 33406 }, { "epoch": 0.9753583836968264, "grad_norm": 0.5490839385927645, "learning_rate": 1.3690186536901865e-06, "loss": 0.6378, "step": 33407 }, { "epoch": 0.9753875799246737, "grad_norm": 0.5562529331141955, "learning_rate": 1.367396593673966e-06, "loss": 0.5863, "step": 33408 }, { "epoch": 0.9754167761525211, "grad_norm": 0.5312412591613095, "learning_rate": 1.3657745336577454e-06, "loss": 0.5974, "step": 33409 }, { "epoch": 0.9754459723803685, "grad_norm": 0.5542935980820097, "learning_rate": 1.3641524736415248e-06, "loss": 0.5918, "step": 33410 }, { "epoch": 0.9754751686082158, "grad_norm": 0.5414210681865044, "learning_rate": 1.3625304136253043e-06, "loss": 0.5857, "step": 33411 }, { "epoch": 0.9755043648360632, "grad_norm": 0.5741981629501745, "learning_rate": 1.3609083536090837e-06, "loss": 0.6464, "step": 33412 }, { "epoch": 0.9755335610639105, "grad_norm": 0.5115686114046789, "learning_rate": 1.359286293592863e-06, "loss": 0.5328, "step": 33413 }, { "epoch": 0.9755627572917579, "grad_norm": 0.5169885012830252, "learning_rate": 1.3576642335766423e-06, "loss": 0.5875, "step": 33414 }, { "epoch": 0.9755919535196053, "grad_norm": 0.48259676355144354, "learning_rate": 1.3560421735604218e-06, "loss": 0.4825, "step": 33415 }, { "epoch": 0.9756211497474526, "grad_norm": 0.519479707828882, "learning_rate": 1.3544201135442012e-06, "loss": 0.582, "step": 33416 }, { "epoch": 0.9756503459753, "grad_norm": 0.5433173816640602, "learning_rate": 1.3527980535279806e-06, "loss": 0.5671, "step": 33417 }, { "epoch": 0.9756795422031473, "grad_norm": 0.5373823303601103, "learning_rate": 1.35117599351176e-06, "loss": 0.6094, "step": 33418 }, { "epoch": 0.9757087384309947, "grad_norm": 0.5213965149130203, "learning_rate": 1.3495539334955395e-06, "loss": 0.5313, "step": 33419 }, { "epoch": 0.9757379346588421, "grad_norm": 0.5597623934838306, "learning_rate": 1.3479318734793187e-06, "loss": 0.6368, "step": 33420 }, { "epoch": 0.9757671308866894, "grad_norm": 0.5516788475966423, "learning_rate": 1.3463098134630982e-06, "loss": 0.5977, "step": 33421 }, { "epoch": 0.9757963271145368, "grad_norm": 0.5503698087015917, "learning_rate": 1.3446877534468776e-06, "loss": 0.6417, "step": 33422 }, { "epoch": 0.9758255233423841, "grad_norm": 0.522126756500114, "learning_rate": 1.343065693430657e-06, "loss": 0.5171, "step": 33423 }, { "epoch": 0.9758547195702315, "grad_norm": 0.516681059959301, "learning_rate": 1.3414436334144365e-06, "loss": 0.5691, "step": 33424 }, { "epoch": 0.9758839157980789, "grad_norm": 0.5193306121485615, "learning_rate": 1.339821573398216e-06, "loss": 0.5743, "step": 33425 }, { "epoch": 0.9759131120259262, "grad_norm": 0.5667099776116542, "learning_rate": 1.3381995133819951e-06, "loss": 0.6561, "step": 33426 }, { "epoch": 0.9759423082537736, "grad_norm": 0.537010104289829, "learning_rate": 1.3365774533657745e-06, "loss": 0.5972, "step": 33427 }, { "epoch": 0.975971504481621, "grad_norm": 0.507979580795135, "learning_rate": 1.334955393349554e-06, "loss": 0.5347, "step": 33428 }, { "epoch": 0.9760007007094683, "grad_norm": 0.5131246813245514, "learning_rate": 1.3333333333333334e-06, "loss": 0.5248, "step": 33429 }, { "epoch": 0.9760298969373157, "grad_norm": 0.5417093586518326, "learning_rate": 1.3317112733171128e-06, "loss": 0.6106, "step": 33430 }, { "epoch": 0.976059093165163, "grad_norm": 0.757531380785681, "learning_rate": 1.3300892133008923e-06, "loss": 0.6072, "step": 33431 }, { "epoch": 0.9760882893930104, "grad_norm": 0.5417748841782866, "learning_rate": 1.3284671532846717e-06, "loss": 0.6069, "step": 33432 }, { "epoch": 0.9761174856208578, "grad_norm": 0.5287628489225071, "learning_rate": 1.326845093268451e-06, "loss": 0.5384, "step": 33433 }, { "epoch": 0.9761466818487051, "grad_norm": 0.5352398172416661, "learning_rate": 1.3252230332522304e-06, "loss": 0.6056, "step": 33434 }, { "epoch": 0.9761758780765525, "grad_norm": 0.5473646462725718, "learning_rate": 1.3236009732360098e-06, "loss": 0.6195, "step": 33435 }, { "epoch": 0.9762050743043998, "grad_norm": 0.5415776085170511, "learning_rate": 1.3219789132197892e-06, "loss": 0.5591, "step": 33436 }, { "epoch": 0.9762342705322472, "grad_norm": 0.5011543097299233, "learning_rate": 1.3203568532035687e-06, "loss": 0.5103, "step": 33437 }, { "epoch": 0.9762634667600946, "grad_norm": 0.5562020861186009, "learning_rate": 1.318734793187348e-06, "loss": 0.6083, "step": 33438 }, { "epoch": 0.9762926629879419, "grad_norm": 0.5062127917916749, "learning_rate": 1.3171127331711273e-06, "loss": 0.5445, "step": 33439 }, { "epoch": 0.9763218592157893, "grad_norm": 0.5068315551410317, "learning_rate": 1.3154906731549068e-06, "loss": 0.4994, "step": 33440 }, { "epoch": 0.9763510554436367, "grad_norm": 0.5375914433280556, "learning_rate": 1.3138686131386862e-06, "loss": 0.5952, "step": 33441 }, { "epoch": 0.976380251671484, "grad_norm": 0.5052060313036405, "learning_rate": 1.3122465531224656e-06, "loss": 0.5295, "step": 33442 }, { "epoch": 0.9764094478993314, "grad_norm": 0.5541869835588, "learning_rate": 1.310624493106245e-06, "loss": 0.6145, "step": 33443 }, { "epoch": 0.9764386441271787, "grad_norm": 0.5128849214050618, "learning_rate": 1.3090024330900245e-06, "loss": 0.4784, "step": 33444 }, { "epoch": 0.9764678403550261, "grad_norm": 0.5233082986564584, "learning_rate": 1.307380373073804e-06, "loss": 0.5549, "step": 33445 }, { "epoch": 0.9764970365828735, "grad_norm": 0.529943595081944, "learning_rate": 1.3057583130575831e-06, "loss": 0.5645, "step": 33446 }, { "epoch": 0.9765262328107208, "grad_norm": 0.5321015276370724, "learning_rate": 1.3041362530413626e-06, "loss": 0.5843, "step": 33447 }, { "epoch": 0.9765554290385682, "grad_norm": 0.5083399654104007, "learning_rate": 1.302514193025142e-06, "loss": 0.5671, "step": 33448 }, { "epoch": 0.9765846252664155, "grad_norm": 0.5500596421752098, "learning_rate": 1.3008921330089212e-06, "loss": 0.624, "step": 33449 }, { "epoch": 0.9766138214942629, "grad_norm": 0.5431364997460061, "learning_rate": 1.2992700729927009e-06, "loss": 0.6181, "step": 33450 }, { "epoch": 0.9766430177221103, "grad_norm": 0.5364637534782476, "learning_rate": 1.2976480129764803e-06, "loss": 0.6195, "step": 33451 }, { "epoch": 0.9766722139499576, "grad_norm": 0.49048837065257694, "learning_rate": 1.2960259529602595e-06, "loss": 0.5072, "step": 33452 }, { "epoch": 0.976701410177805, "grad_norm": 0.5012757878126127, "learning_rate": 1.294403892944039e-06, "loss": 0.5228, "step": 33453 }, { "epoch": 0.9767306064056523, "grad_norm": 0.5421433695618727, "learning_rate": 1.2927818329278184e-06, "loss": 0.541, "step": 33454 }, { "epoch": 0.9767598026334997, "grad_norm": 0.5478468534325576, "learning_rate": 1.2911597729115978e-06, "loss": 0.5991, "step": 33455 }, { "epoch": 0.9767889988613471, "grad_norm": 0.5850606063187647, "learning_rate": 1.289537712895377e-06, "loss": 0.661, "step": 33456 }, { "epoch": 0.9768181950891944, "grad_norm": 0.5533271547620172, "learning_rate": 1.2879156528791567e-06, "loss": 0.6163, "step": 33457 }, { "epoch": 0.9768473913170418, "grad_norm": 0.5519202528184993, "learning_rate": 1.2862935928629361e-06, "loss": 0.6582, "step": 33458 }, { "epoch": 0.9768765875448892, "grad_norm": 0.49401119418083783, "learning_rate": 1.2846715328467153e-06, "loss": 0.5331, "step": 33459 }, { "epoch": 0.9769057837727365, "grad_norm": 0.5017622270824585, "learning_rate": 1.2830494728304948e-06, "loss": 0.5028, "step": 33460 }, { "epoch": 0.9769349800005839, "grad_norm": 0.5389982316646665, "learning_rate": 1.2814274128142742e-06, "loss": 0.6017, "step": 33461 }, { "epoch": 0.9769641762284312, "grad_norm": 0.5334886444655362, "learning_rate": 1.2798053527980534e-06, "loss": 0.5473, "step": 33462 }, { "epoch": 0.9769933724562786, "grad_norm": 0.5051025295900604, "learning_rate": 1.278183292781833e-06, "loss": 0.4909, "step": 33463 }, { "epoch": 0.977022568684126, "grad_norm": 0.4949702803122873, "learning_rate": 1.2765612327656125e-06, "loss": 0.5019, "step": 33464 }, { "epoch": 0.9770517649119733, "grad_norm": 0.48336651558573196, "learning_rate": 1.274939172749392e-06, "loss": 0.5105, "step": 33465 }, { "epoch": 0.9770809611398207, "grad_norm": 0.5326720432110525, "learning_rate": 1.2733171127331712e-06, "loss": 0.6049, "step": 33466 }, { "epoch": 0.977110157367668, "grad_norm": 0.5402105599429214, "learning_rate": 1.2716950527169506e-06, "loss": 0.5893, "step": 33467 }, { "epoch": 0.9771393535955155, "grad_norm": 0.5604594769216258, "learning_rate": 1.27007299270073e-06, "loss": 0.598, "step": 33468 }, { "epoch": 0.9771685498233629, "grad_norm": 0.5287857412400027, "learning_rate": 1.2684509326845092e-06, "loss": 0.5839, "step": 33469 }, { "epoch": 0.9771977460512102, "grad_norm": 0.6730907928269537, "learning_rate": 1.2668288726682889e-06, "loss": 0.5763, "step": 33470 }, { "epoch": 0.9772269422790576, "grad_norm": 0.5700204970522348, "learning_rate": 1.2652068126520683e-06, "loss": 0.6573, "step": 33471 }, { "epoch": 0.977256138506905, "grad_norm": 0.5407444598094434, "learning_rate": 1.2635847526358475e-06, "loss": 0.5954, "step": 33472 }, { "epoch": 0.9772853347347523, "grad_norm": 0.487711443958737, "learning_rate": 1.261962692619627e-06, "loss": 0.502, "step": 33473 }, { "epoch": 0.9773145309625997, "grad_norm": 0.5335210885161032, "learning_rate": 1.2603406326034064e-06, "loss": 0.6011, "step": 33474 }, { "epoch": 0.977343727190447, "grad_norm": 0.4949462399551174, "learning_rate": 1.2587185725871856e-06, "loss": 0.4856, "step": 33475 }, { "epoch": 0.9773729234182944, "grad_norm": 0.5129778008782159, "learning_rate": 1.257096512570965e-06, "loss": 0.5192, "step": 33476 }, { "epoch": 0.9774021196461418, "grad_norm": 0.5197820001995612, "learning_rate": 1.2554744525547447e-06, "loss": 0.5601, "step": 33477 }, { "epoch": 0.9774313158739891, "grad_norm": 0.5428100002947313, "learning_rate": 1.2538523925385241e-06, "loss": 0.6128, "step": 33478 }, { "epoch": 0.9774605121018365, "grad_norm": 0.5025465040148248, "learning_rate": 1.2522303325223034e-06, "loss": 0.5149, "step": 33479 }, { "epoch": 0.9774897083296838, "grad_norm": 0.5629788590398549, "learning_rate": 1.2506082725060828e-06, "loss": 0.6396, "step": 33480 }, { "epoch": 0.9775189045575312, "grad_norm": 0.5193637784391132, "learning_rate": 1.2489862124898622e-06, "loss": 0.5564, "step": 33481 }, { "epoch": 0.9775481007853786, "grad_norm": 0.5767157411668615, "learning_rate": 1.2473641524736414e-06, "loss": 0.6391, "step": 33482 }, { "epoch": 0.9775772970132259, "grad_norm": 0.5189121991604536, "learning_rate": 1.2457420924574209e-06, "loss": 0.5466, "step": 33483 }, { "epoch": 0.9776064932410733, "grad_norm": 0.5172328982431085, "learning_rate": 1.2441200324412005e-06, "loss": 0.5398, "step": 33484 }, { "epoch": 0.9776356894689207, "grad_norm": 0.5704299816131015, "learning_rate": 1.2424979724249797e-06, "loss": 0.6567, "step": 33485 }, { "epoch": 0.977664885696768, "grad_norm": 0.5591884895189694, "learning_rate": 1.2408759124087592e-06, "loss": 0.6227, "step": 33486 }, { "epoch": 0.9776940819246154, "grad_norm": 0.5266809825602113, "learning_rate": 1.2392538523925386e-06, "loss": 0.5949, "step": 33487 }, { "epoch": 0.9777232781524627, "grad_norm": 0.5360776716909305, "learning_rate": 1.237631792376318e-06, "loss": 0.5704, "step": 33488 }, { "epoch": 0.9777524743803101, "grad_norm": 0.5341669144534222, "learning_rate": 1.2360097323600973e-06, "loss": 0.5306, "step": 33489 }, { "epoch": 0.9777816706081575, "grad_norm": 0.49722144351297215, "learning_rate": 1.2343876723438767e-06, "loss": 0.5056, "step": 33490 }, { "epoch": 0.9778108668360048, "grad_norm": 0.5598431699919467, "learning_rate": 1.2327656123276563e-06, "loss": 0.6053, "step": 33491 }, { "epoch": 0.9778400630638522, "grad_norm": 0.5749277033058005, "learning_rate": 1.2311435523114356e-06, "loss": 0.6586, "step": 33492 }, { "epoch": 0.9778692592916995, "grad_norm": 0.5442015187301624, "learning_rate": 1.229521492295215e-06, "loss": 0.5463, "step": 33493 }, { "epoch": 0.9778984555195469, "grad_norm": 0.5051599950785812, "learning_rate": 1.2278994322789944e-06, "loss": 0.5384, "step": 33494 }, { "epoch": 0.9779276517473943, "grad_norm": 0.5634800387987302, "learning_rate": 1.2262773722627736e-06, "loss": 0.5826, "step": 33495 }, { "epoch": 0.9779568479752416, "grad_norm": 0.5500506073498126, "learning_rate": 1.224655312246553e-06, "loss": 0.6075, "step": 33496 }, { "epoch": 0.977986044203089, "grad_norm": 0.538897319359736, "learning_rate": 1.2230332522303327e-06, "loss": 0.5819, "step": 33497 }, { "epoch": 0.9780152404309364, "grad_norm": 0.5456845510938184, "learning_rate": 1.221411192214112e-06, "loss": 0.6057, "step": 33498 }, { "epoch": 0.9780444366587837, "grad_norm": 0.5157075615336066, "learning_rate": 1.2197891321978914e-06, "loss": 0.5477, "step": 33499 }, { "epoch": 0.9780736328866311, "grad_norm": 0.5354196983866958, "learning_rate": 1.2181670721816708e-06, "loss": 0.5947, "step": 33500 }, { "epoch": 0.9781028291144784, "grad_norm": 0.4940161632401523, "learning_rate": 1.2165450121654502e-06, "loss": 0.5165, "step": 33501 }, { "epoch": 0.9781320253423258, "grad_norm": 0.5583103216454497, "learning_rate": 1.2149229521492295e-06, "loss": 0.6485, "step": 33502 }, { "epoch": 0.9781612215701732, "grad_norm": 0.5595140969162922, "learning_rate": 1.2133008921330089e-06, "loss": 0.6043, "step": 33503 }, { "epoch": 0.9781904177980205, "grad_norm": 0.5062383731287983, "learning_rate": 1.2116788321167885e-06, "loss": 0.563, "step": 33504 }, { "epoch": 0.9782196140258679, "grad_norm": 0.5489100997134605, "learning_rate": 1.2100567721005678e-06, "loss": 0.6423, "step": 33505 }, { "epoch": 0.9782488102537152, "grad_norm": 0.4853950083415204, "learning_rate": 1.2084347120843472e-06, "loss": 0.4778, "step": 33506 }, { "epoch": 0.9782780064815626, "grad_norm": 0.5393175635171129, "learning_rate": 1.2068126520681266e-06, "loss": 0.6058, "step": 33507 }, { "epoch": 0.97830720270941, "grad_norm": 0.5395037112143317, "learning_rate": 1.2051905920519058e-06, "loss": 0.5979, "step": 33508 }, { "epoch": 0.9783363989372573, "grad_norm": 0.5391085850054679, "learning_rate": 1.2035685320356853e-06, "loss": 0.5909, "step": 33509 }, { "epoch": 0.9783655951651047, "grad_norm": 0.544878111517923, "learning_rate": 1.2019464720194647e-06, "loss": 0.5908, "step": 33510 }, { "epoch": 0.978394791392952, "grad_norm": 0.5588813268720898, "learning_rate": 1.2003244120032444e-06, "loss": 0.6466, "step": 33511 }, { "epoch": 0.9784239876207994, "grad_norm": 0.5213347792868698, "learning_rate": 1.1987023519870236e-06, "loss": 0.5504, "step": 33512 }, { "epoch": 0.9784531838486468, "grad_norm": 0.5080115978255466, "learning_rate": 1.197080291970803e-06, "loss": 0.5479, "step": 33513 }, { "epoch": 0.9784823800764941, "grad_norm": 0.5661450586867375, "learning_rate": 1.1954582319545824e-06, "loss": 0.6165, "step": 33514 }, { "epoch": 0.9785115763043415, "grad_norm": 0.5108886862952882, "learning_rate": 1.1938361719383617e-06, "loss": 0.5444, "step": 33515 }, { "epoch": 0.9785407725321889, "grad_norm": 0.5103684124273278, "learning_rate": 1.192214111922141e-06, "loss": 0.5338, "step": 33516 }, { "epoch": 0.9785699687600362, "grad_norm": 0.5402568365279393, "learning_rate": 1.1905920519059205e-06, "loss": 0.5919, "step": 33517 }, { "epoch": 0.9785991649878836, "grad_norm": 0.5691257812241068, "learning_rate": 1.1889699918897e-06, "loss": 0.656, "step": 33518 }, { "epoch": 0.9786283612157309, "grad_norm": 0.46445769317352525, "learning_rate": 1.1873479318734794e-06, "loss": 0.4701, "step": 33519 }, { "epoch": 0.9786575574435783, "grad_norm": 0.5078961172971548, "learning_rate": 1.1857258718572588e-06, "loss": 0.5083, "step": 33520 }, { "epoch": 0.9786867536714257, "grad_norm": 0.5004398422899867, "learning_rate": 1.184103811841038e-06, "loss": 0.5132, "step": 33521 }, { "epoch": 0.978715949899273, "grad_norm": 0.5223818409323144, "learning_rate": 1.1824817518248175e-06, "loss": 0.5808, "step": 33522 }, { "epoch": 0.9787451461271204, "grad_norm": 0.5333001555001124, "learning_rate": 1.180859691808597e-06, "loss": 0.6095, "step": 33523 }, { "epoch": 0.9787743423549677, "grad_norm": 0.5396461941683354, "learning_rate": 1.1792376317923766e-06, "loss": 0.5848, "step": 33524 }, { "epoch": 0.9788035385828151, "grad_norm": 0.5472665506354061, "learning_rate": 1.1776155717761558e-06, "loss": 0.6057, "step": 33525 }, { "epoch": 0.9788327348106625, "grad_norm": 0.5235392330930917, "learning_rate": 1.1759935117599352e-06, "loss": 0.565, "step": 33526 }, { "epoch": 0.9788619310385098, "grad_norm": 0.551944655361082, "learning_rate": 1.1743714517437146e-06, "loss": 0.6436, "step": 33527 }, { "epoch": 0.9788911272663572, "grad_norm": 0.5017812471101736, "learning_rate": 1.1727493917274939e-06, "loss": 0.5106, "step": 33528 }, { "epoch": 0.9789203234942045, "grad_norm": 0.5030398887847437, "learning_rate": 1.1711273317112733e-06, "loss": 0.5348, "step": 33529 }, { "epoch": 0.9789495197220519, "grad_norm": 0.5037622407256493, "learning_rate": 1.1695052716950527e-06, "loss": 0.5377, "step": 33530 }, { "epoch": 0.9789787159498993, "grad_norm": 0.6063195749132954, "learning_rate": 1.1678832116788322e-06, "loss": 0.7104, "step": 33531 }, { "epoch": 0.9790079121777466, "grad_norm": 0.5268928567274413, "learning_rate": 1.1662611516626116e-06, "loss": 0.5956, "step": 33532 }, { "epoch": 0.979037108405594, "grad_norm": 0.5300917205299355, "learning_rate": 1.164639091646391e-06, "loss": 0.5689, "step": 33533 }, { "epoch": 0.9790663046334414, "grad_norm": 0.5839152146788577, "learning_rate": 1.1630170316301705e-06, "loss": 0.6288, "step": 33534 }, { "epoch": 0.9790955008612887, "grad_norm": 0.5102739782831563, "learning_rate": 1.1613949716139497e-06, "loss": 0.5277, "step": 33535 }, { "epoch": 0.9791246970891361, "grad_norm": 0.493657787733459, "learning_rate": 1.1597729115977291e-06, "loss": 0.4627, "step": 33536 }, { "epoch": 0.9791538933169834, "grad_norm": 0.4964744191840548, "learning_rate": 1.1581508515815085e-06, "loss": 0.5014, "step": 33537 }, { "epoch": 0.9791830895448308, "grad_norm": 0.5497308379055516, "learning_rate": 1.156528791565288e-06, "loss": 0.63, "step": 33538 }, { "epoch": 0.9792122857726782, "grad_norm": 0.5553314938301588, "learning_rate": 1.1549067315490674e-06, "loss": 0.677, "step": 33539 }, { "epoch": 0.9792414820005255, "grad_norm": 0.5191096505604825, "learning_rate": 1.1532846715328468e-06, "loss": 0.5645, "step": 33540 }, { "epoch": 0.9792706782283729, "grad_norm": 0.5170502607905125, "learning_rate": 1.151662611516626e-06, "loss": 0.5816, "step": 33541 }, { "epoch": 0.9792998744562202, "grad_norm": 0.5369869314656381, "learning_rate": 1.1500405515004055e-06, "loss": 0.6014, "step": 33542 }, { "epoch": 0.9793290706840676, "grad_norm": 0.5533094460182412, "learning_rate": 1.148418491484185e-06, "loss": 0.597, "step": 33543 }, { "epoch": 0.979358266911915, "grad_norm": 0.5200424426626882, "learning_rate": 1.1467964314679644e-06, "loss": 0.5313, "step": 33544 }, { "epoch": 0.9793874631397623, "grad_norm": 0.5310489040738715, "learning_rate": 1.1451743714517438e-06, "loss": 0.5568, "step": 33545 }, { "epoch": 0.9794166593676097, "grad_norm": 0.5378414947915451, "learning_rate": 1.1435523114355232e-06, "loss": 0.5806, "step": 33546 }, { "epoch": 0.979445855595457, "grad_norm": 0.5369823948461894, "learning_rate": 1.1419302514193027e-06, "loss": 0.5667, "step": 33547 }, { "epoch": 0.9794750518233044, "grad_norm": 0.5552654549894241, "learning_rate": 1.1403081914030819e-06, "loss": 0.6095, "step": 33548 }, { "epoch": 0.9795042480511518, "grad_norm": 0.6035618661634627, "learning_rate": 1.1386861313868613e-06, "loss": 0.7017, "step": 33549 }, { "epoch": 0.9795334442789991, "grad_norm": 0.5654251630060549, "learning_rate": 1.1370640713706407e-06, "loss": 0.6065, "step": 33550 }, { "epoch": 0.9795626405068465, "grad_norm": 0.5014764428267611, "learning_rate": 1.1354420113544202e-06, "loss": 0.5545, "step": 33551 }, { "epoch": 0.9795918367346939, "grad_norm": 0.5401615782025215, "learning_rate": 1.1338199513381996e-06, "loss": 0.5659, "step": 33552 }, { "epoch": 0.9796210329625412, "grad_norm": 0.5321190572505923, "learning_rate": 1.132197891321979e-06, "loss": 0.5948, "step": 33553 }, { "epoch": 0.9796502291903886, "grad_norm": 0.5281524030623455, "learning_rate": 1.1305758313057583e-06, "loss": 0.5694, "step": 33554 }, { "epoch": 0.9796794254182359, "grad_norm": 0.5145405694758219, "learning_rate": 1.1289537712895377e-06, "loss": 0.5674, "step": 33555 }, { "epoch": 0.9797086216460833, "grad_norm": 0.49464963386483124, "learning_rate": 1.1273317112733171e-06, "loss": 0.5074, "step": 33556 }, { "epoch": 0.9797378178739307, "grad_norm": 0.5265993054047098, "learning_rate": 1.1257096512570966e-06, "loss": 0.5685, "step": 33557 }, { "epoch": 0.979767014101778, "grad_norm": 0.4787968710482991, "learning_rate": 1.124087591240876e-06, "loss": 0.4544, "step": 33558 }, { "epoch": 0.9797962103296254, "grad_norm": 0.5611754975968737, "learning_rate": 1.1224655312246554e-06, "loss": 0.6306, "step": 33559 }, { "epoch": 0.9798254065574727, "grad_norm": 0.5565487773535946, "learning_rate": 1.1208434712084349e-06, "loss": 0.564, "step": 33560 }, { "epoch": 0.9798546027853201, "grad_norm": 0.5286045664574233, "learning_rate": 1.119221411192214e-06, "loss": 0.5608, "step": 33561 }, { "epoch": 0.9798837990131675, "grad_norm": 0.5267515185722323, "learning_rate": 1.1175993511759935e-06, "loss": 0.5669, "step": 33562 }, { "epoch": 0.9799129952410148, "grad_norm": 0.5195948565254065, "learning_rate": 1.115977291159773e-06, "loss": 0.5453, "step": 33563 }, { "epoch": 0.9799421914688622, "grad_norm": 0.5317761022567061, "learning_rate": 1.1143552311435524e-06, "loss": 0.5825, "step": 33564 }, { "epoch": 0.9799713876967096, "grad_norm": 0.5452123255565394, "learning_rate": 1.1127331711273318e-06, "loss": 0.5948, "step": 33565 }, { "epoch": 0.9800005839245569, "grad_norm": 0.5442360485693927, "learning_rate": 1.1111111111111112e-06, "loss": 0.607, "step": 33566 }, { "epoch": 0.9800297801524043, "grad_norm": 0.5521054900960223, "learning_rate": 1.1094890510948905e-06, "loss": 0.6514, "step": 33567 }, { "epoch": 0.9800589763802516, "grad_norm": 0.5272699042352161, "learning_rate": 1.10786699107867e-06, "loss": 0.5589, "step": 33568 }, { "epoch": 0.980088172608099, "grad_norm": 0.49780722004781147, "learning_rate": 1.1062449310624493e-06, "loss": 0.5308, "step": 33569 }, { "epoch": 0.9801173688359464, "grad_norm": 0.550322847458457, "learning_rate": 1.1046228710462288e-06, "loss": 0.6113, "step": 33570 }, { "epoch": 0.9801465650637937, "grad_norm": 0.5535656242810985, "learning_rate": 1.1030008110300082e-06, "loss": 0.6264, "step": 33571 }, { "epoch": 0.9801757612916411, "grad_norm": 0.5116262834821348, "learning_rate": 1.1013787510137876e-06, "loss": 0.5142, "step": 33572 }, { "epoch": 0.9802049575194884, "grad_norm": 0.4912055736873184, "learning_rate": 1.099756690997567e-06, "loss": 0.5202, "step": 33573 }, { "epoch": 0.9802341537473358, "grad_norm": 0.525382726625475, "learning_rate": 1.0981346309813463e-06, "loss": 0.5479, "step": 33574 }, { "epoch": 0.9802633499751832, "grad_norm": 0.5066501846495625, "learning_rate": 1.0965125709651257e-06, "loss": 0.5053, "step": 33575 }, { "epoch": 0.9802925462030305, "grad_norm": 0.5395159087987952, "learning_rate": 1.0948905109489052e-06, "loss": 0.587, "step": 33576 }, { "epoch": 0.9803217424308779, "grad_norm": 0.5672531170934902, "learning_rate": 1.0932684509326846e-06, "loss": 0.6498, "step": 33577 }, { "epoch": 0.9803509386587252, "grad_norm": 0.5108619490164932, "learning_rate": 1.091646390916464e-06, "loss": 0.5019, "step": 33578 }, { "epoch": 0.9803801348865726, "grad_norm": 0.5116287643255668, "learning_rate": 1.0900243309002435e-06, "loss": 0.5231, "step": 33579 }, { "epoch": 0.98040933111442, "grad_norm": 0.5594665378778974, "learning_rate": 1.0884022708840227e-06, "loss": 0.5865, "step": 33580 }, { "epoch": 0.9804385273422673, "grad_norm": 0.5229263068878885, "learning_rate": 1.086780210867802e-06, "loss": 0.5661, "step": 33581 }, { "epoch": 0.9804677235701147, "grad_norm": 0.5173498430607553, "learning_rate": 1.0851581508515815e-06, "loss": 0.5545, "step": 33582 }, { "epoch": 0.980496919797962, "grad_norm": 0.5592466952325605, "learning_rate": 1.083536090835361e-06, "loss": 0.6547, "step": 33583 }, { "epoch": 0.9805261160258094, "grad_norm": 0.5498448487838044, "learning_rate": 1.0819140308191404e-06, "loss": 0.6011, "step": 33584 }, { "epoch": 0.9805553122536568, "grad_norm": 0.5139851884697988, "learning_rate": 1.0802919708029198e-06, "loss": 0.5662, "step": 33585 }, { "epoch": 0.9805845084815041, "grad_norm": 0.521203618490264, "learning_rate": 1.0786699107866993e-06, "loss": 0.5831, "step": 33586 }, { "epoch": 0.9806137047093515, "grad_norm": 0.5219487468577424, "learning_rate": 1.0770478507704785e-06, "loss": 0.5391, "step": 33587 }, { "epoch": 0.980642900937199, "grad_norm": 0.5085889852680067, "learning_rate": 1.075425790754258e-06, "loss": 0.5577, "step": 33588 }, { "epoch": 0.9806720971650463, "grad_norm": 0.5304465638056941, "learning_rate": 1.0738037307380374e-06, "loss": 0.5538, "step": 33589 }, { "epoch": 0.9807012933928937, "grad_norm": 0.5192651306739894, "learning_rate": 1.0721816707218168e-06, "loss": 0.5197, "step": 33590 }, { "epoch": 0.980730489620741, "grad_norm": 0.5128920215333347, "learning_rate": 1.0705596107055962e-06, "loss": 0.5312, "step": 33591 }, { "epoch": 0.9807596858485884, "grad_norm": 0.5449911799970488, "learning_rate": 1.0689375506893757e-06, "loss": 0.577, "step": 33592 }, { "epoch": 0.9807888820764358, "grad_norm": 0.5469166102638707, "learning_rate": 1.067315490673155e-06, "loss": 0.6282, "step": 33593 }, { "epoch": 0.9808180783042831, "grad_norm": 0.5531616787188699, "learning_rate": 1.0656934306569343e-06, "loss": 0.5597, "step": 33594 }, { "epoch": 0.9808472745321305, "grad_norm": 0.5216263273365962, "learning_rate": 1.0640713706407137e-06, "loss": 0.5793, "step": 33595 }, { "epoch": 0.9808764707599779, "grad_norm": 0.5210051110665913, "learning_rate": 1.0624493106244932e-06, "loss": 0.5621, "step": 33596 }, { "epoch": 0.9809056669878252, "grad_norm": 0.5331346388914316, "learning_rate": 1.0608272506082726e-06, "loss": 0.6315, "step": 33597 }, { "epoch": 0.9809348632156726, "grad_norm": 0.49335884563889054, "learning_rate": 1.059205190592052e-06, "loss": 0.5625, "step": 33598 }, { "epoch": 0.9809640594435199, "grad_norm": 0.535763549455091, "learning_rate": 1.0575831305758315e-06, "loss": 0.6191, "step": 33599 }, { "epoch": 0.9809932556713673, "grad_norm": 0.5437783197253109, "learning_rate": 1.0559610705596107e-06, "loss": 0.6039, "step": 33600 }, { "epoch": 0.9810224518992147, "grad_norm": 0.5289842876004942, "learning_rate": 1.0543390105433901e-06, "loss": 0.56, "step": 33601 }, { "epoch": 0.981051648127062, "grad_norm": 0.5014190085693662, "learning_rate": 1.0527169505271696e-06, "loss": 0.5365, "step": 33602 }, { "epoch": 0.9810808443549094, "grad_norm": 0.5357582198643549, "learning_rate": 1.0510948905109488e-06, "loss": 0.5914, "step": 33603 }, { "epoch": 0.9811100405827567, "grad_norm": 0.5275286030084829, "learning_rate": 1.0494728304947284e-06, "loss": 0.5746, "step": 33604 }, { "epoch": 0.9811392368106041, "grad_norm": 0.5702241063440131, "learning_rate": 1.0478507704785079e-06, "loss": 0.6125, "step": 33605 }, { "epoch": 0.9811684330384515, "grad_norm": 0.5857623290122609, "learning_rate": 1.0462287104622873e-06, "loss": 0.6606, "step": 33606 }, { "epoch": 0.9811976292662988, "grad_norm": 0.5120851716903057, "learning_rate": 1.0446066504460665e-06, "loss": 0.566, "step": 33607 }, { "epoch": 0.9812268254941462, "grad_norm": 0.5251183970493181, "learning_rate": 1.042984590429846e-06, "loss": 0.58, "step": 33608 }, { "epoch": 0.9812560217219936, "grad_norm": 0.49646725792695495, "learning_rate": 1.0413625304136254e-06, "loss": 0.4726, "step": 33609 }, { "epoch": 0.9812852179498409, "grad_norm": 0.5250709498424613, "learning_rate": 1.0397404703974046e-06, "loss": 0.5681, "step": 33610 }, { "epoch": 0.9813144141776883, "grad_norm": 0.519742234538783, "learning_rate": 1.0381184103811842e-06, "loss": 0.5638, "step": 33611 }, { "epoch": 0.9813436104055356, "grad_norm": 0.5245668767557288, "learning_rate": 1.0364963503649637e-06, "loss": 0.5262, "step": 33612 }, { "epoch": 0.981372806633383, "grad_norm": 0.5398851279420589, "learning_rate": 1.0348742903487429e-06, "loss": 0.5781, "step": 33613 }, { "epoch": 0.9814020028612304, "grad_norm": 0.5067878645050898, "learning_rate": 1.0332522303325223e-06, "loss": 0.5167, "step": 33614 }, { "epoch": 0.9814311990890777, "grad_norm": 0.5123291240325252, "learning_rate": 1.0316301703163018e-06, "loss": 0.5458, "step": 33615 }, { "epoch": 0.9814603953169251, "grad_norm": 0.5036728959741616, "learning_rate": 1.0300081103000812e-06, "loss": 0.5163, "step": 33616 }, { "epoch": 0.9814895915447724, "grad_norm": 0.5207193349248034, "learning_rate": 1.0283860502838604e-06, "loss": 0.5534, "step": 33617 }, { "epoch": 0.9815187877726198, "grad_norm": 0.6191165907272829, "learning_rate": 1.02676399026764e-06, "loss": 0.6615, "step": 33618 }, { "epoch": 0.9815479840004672, "grad_norm": 0.531607316198862, "learning_rate": 1.0251419302514195e-06, "loss": 0.5673, "step": 33619 }, { "epoch": 0.9815771802283145, "grad_norm": 0.5142283824595887, "learning_rate": 1.0235198702351987e-06, "loss": 0.5173, "step": 33620 }, { "epoch": 0.9816063764561619, "grad_norm": 0.5246860400178313, "learning_rate": 1.0218978102189781e-06, "loss": 0.5312, "step": 33621 }, { "epoch": 0.9816355726840092, "grad_norm": 0.5237372613652618, "learning_rate": 1.0202757502027576e-06, "loss": 0.5504, "step": 33622 }, { "epoch": 0.9816647689118566, "grad_norm": 0.5039304333444751, "learning_rate": 1.0186536901865368e-06, "loss": 0.5576, "step": 33623 }, { "epoch": 0.981693965139704, "grad_norm": 0.5592538933212591, "learning_rate": 1.0170316301703164e-06, "loss": 0.5933, "step": 33624 }, { "epoch": 0.9817231613675513, "grad_norm": 0.5501825165693874, "learning_rate": 1.0154095701540959e-06, "loss": 0.6216, "step": 33625 }, { "epoch": 0.9817523575953987, "grad_norm": 0.5386143761914697, "learning_rate": 1.013787510137875e-06, "loss": 0.5705, "step": 33626 }, { "epoch": 0.9817815538232461, "grad_norm": 0.5532388698092117, "learning_rate": 1.0121654501216545e-06, "loss": 0.6363, "step": 33627 }, { "epoch": 0.9818107500510934, "grad_norm": 0.5108660003607179, "learning_rate": 1.010543390105434e-06, "loss": 0.561, "step": 33628 }, { "epoch": 0.9818399462789408, "grad_norm": 0.5115035843785499, "learning_rate": 1.0089213300892134e-06, "loss": 0.5745, "step": 33629 }, { "epoch": 0.9818691425067881, "grad_norm": 0.5278985716683098, "learning_rate": 1.0072992700729926e-06, "loss": 0.5872, "step": 33630 }, { "epoch": 0.9818983387346355, "grad_norm": 0.5002621826525822, "learning_rate": 1.0056772100567723e-06, "loss": 0.5516, "step": 33631 }, { "epoch": 0.9819275349624829, "grad_norm": 0.5416192049511458, "learning_rate": 1.0040551500405517e-06, "loss": 0.6028, "step": 33632 }, { "epoch": 0.9819567311903302, "grad_norm": 0.5340001987525811, "learning_rate": 1.002433090024331e-06, "loss": 0.5735, "step": 33633 }, { "epoch": 0.9819859274181776, "grad_norm": 0.5299495070438824, "learning_rate": 1.0008110300081103e-06, "loss": 0.5873, "step": 33634 }, { "epoch": 0.982015123646025, "grad_norm": 0.5429114265022655, "learning_rate": 9.991889699918898e-07, "loss": 0.6179, "step": 33635 }, { "epoch": 0.9820443198738723, "grad_norm": 0.5050743936398981, "learning_rate": 9.97566909975669e-07, "loss": 0.5363, "step": 33636 }, { "epoch": 0.9820735161017197, "grad_norm": 0.472991178200758, "learning_rate": 9.959448499594484e-07, "loss": 0.4902, "step": 33637 }, { "epoch": 0.982102712329567, "grad_norm": 0.5063259752001723, "learning_rate": 9.94322789943228e-07, "loss": 0.5227, "step": 33638 }, { "epoch": 0.9821319085574144, "grad_norm": 0.5359747459446268, "learning_rate": 9.927007299270075e-07, "loss": 0.6085, "step": 33639 }, { "epoch": 0.9821611047852618, "grad_norm": 0.5103645032130147, "learning_rate": 9.910786699107867e-07, "loss": 0.5171, "step": 33640 }, { "epoch": 0.9821903010131091, "grad_norm": 0.5543666622048293, "learning_rate": 9.894566098945662e-07, "loss": 0.6099, "step": 33641 }, { "epoch": 0.9822194972409565, "grad_norm": 0.520341436708904, "learning_rate": 9.878345498783456e-07, "loss": 0.5577, "step": 33642 }, { "epoch": 0.9822486934688038, "grad_norm": 0.5568085502492415, "learning_rate": 9.862124898621248e-07, "loss": 0.6257, "step": 33643 }, { "epoch": 0.9822778896966512, "grad_norm": 0.5844277222582979, "learning_rate": 9.845904298459042e-07, "loss": 0.6702, "step": 33644 }, { "epoch": 0.9823070859244986, "grad_norm": 0.5610402200938301, "learning_rate": 9.829683698296839e-07, "loss": 0.6379, "step": 33645 }, { "epoch": 0.9823362821523459, "grad_norm": 0.5841144671025591, "learning_rate": 9.813463098134631e-07, "loss": 0.6847, "step": 33646 }, { "epoch": 0.9823654783801933, "grad_norm": 0.5103168697689883, "learning_rate": 9.797242497972425e-07, "loss": 0.5356, "step": 33647 }, { "epoch": 0.9823946746080406, "grad_norm": 0.5464917661784804, "learning_rate": 9.78102189781022e-07, "loss": 0.5868, "step": 33648 }, { "epoch": 0.982423870835888, "grad_norm": 0.5124978257285672, "learning_rate": 9.764801297648012e-07, "loss": 0.5425, "step": 33649 }, { "epoch": 0.9824530670637354, "grad_norm": 0.5653709777745205, "learning_rate": 9.748580697485806e-07, "loss": 0.6533, "step": 33650 }, { "epoch": 0.9824822632915827, "grad_norm": 0.529472904967222, "learning_rate": 9.7323600973236e-07, "loss": 0.5778, "step": 33651 }, { "epoch": 0.9825114595194301, "grad_norm": 0.5174470105845986, "learning_rate": 9.716139497161397e-07, "loss": 0.5592, "step": 33652 }, { "epoch": 0.9825406557472774, "grad_norm": 0.5431453107337131, "learning_rate": 9.69991889699919e-07, "loss": 0.5875, "step": 33653 }, { "epoch": 0.9825698519751248, "grad_norm": 0.844931380277777, "learning_rate": 9.683698296836984e-07, "loss": 0.6449, "step": 33654 }, { "epoch": 0.9825990482029722, "grad_norm": 0.5212603937938377, "learning_rate": 9.667477696674778e-07, "loss": 0.5682, "step": 33655 }, { "epoch": 0.9826282444308195, "grad_norm": 0.5655009633559778, "learning_rate": 9.65125709651257e-07, "loss": 0.6645, "step": 33656 }, { "epoch": 0.9826574406586669, "grad_norm": 0.5254423982756412, "learning_rate": 9.635036496350364e-07, "loss": 0.5476, "step": 33657 }, { "epoch": 0.9826866368865143, "grad_norm": 0.5632599199649017, "learning_rate": 9.61881589618816e-07, "loss": 0.597, "step": 33658 }, { "epoch": 0.9827158331143616, "grad_norm": 0.5215302179627328, "learning_rate": 9.602595296025953e-07, "loss": 0.5676, "step": 33659 }, { "epoch": 0.982745029342209, "grad_norm": 0.5636061981311494, "learning_rate": 9.586374695863747e-07, "loss": 0.6427, "step": 33660 }, { "epoch": 0.9827742255700563, "grad_norm": 0.5014062210917786, "learning_rate": 9.570154095701542e-07, "loss": 0.5163, "step": 33661 }, { "epoch": 0.9828034217979037, "grad_norm": 0.5771072901664865, "learning_rate": 9.553933495539334e-07, "loss": 0.6774, "step": 33662 }, { "epoch": 0.9828326180257511, "grad_norm": 0.5203687052139271, "learning_rate": 9.537712895377128e-07, "loss": 0.5686, "step": 33663 }, { "epoch": 0.9828618142535984, "grad_norm": 0.4953555466814713, "learning_rate": 9.521492295214924e-07, "loss": 0.4872, "step": 33664 }, { "epoch": 0.9828910104814458, "grad_norm": 0.5374515414670243, "learning_rate": 9.505271695052718e-07, "loss": 0.5727, "step": 33665 }, { "epoch": 0.9829202067092931, "grad_norm": 0.5558259599775728, "learning_rate": 9.489051094890511e-07, "loss": 0.6174, "step": 33666 }, { "epoch": 0.9829494029371405, "grad_norm": 0.5160668788292411, "learning_rate": 9.472830494728306e-07, "loss": 0.5404, "step": 33667 }, { "epoch": 0.9829785991649879, "grad_norm": 0.5443219325587423, "learning_rate": 9.4566098945661e-07, "loss": 0.5758, "step": 33668 }, { "epoch": 0.9830077953928352, "grad_norm": 0.5261579417538474, "learning_rate": 9.440389294403893e-07, "loss": 0.5677, "step": 33669 }, { "epoch": 0.9830369916206826, "grad_norm": 0.490783649819995, "learning_rate": 9.424168694241688e-07, "loss": 0.5205, "step": 33670 }, { "epoch": 0.98306618784853, "grad_norm": 0.548921623584258, "learning_rate": 9.407948094079482e-07, "loss": 0.5827, "step": 33671 }, { "epoch": 0.9830953840763773, "grad_norm": 0.5233339801000423, "learning_rate": 9.391727493917274e-07, "loss": 0.5687, "step": 33672 }, { "epoch": 0.9831245803042247, "grad_norm": 0.541543938891206, "learning_rate": 9.37550689375507e-07, "loss": 0.5852, "step": 33673 }, { "epoch": 0.983153776532072, "grad_norm": 0.5131438456515214, "learning_rate": 9.359286293592864e-07, "loss": 0.5313, "step": 33674 }, { "epoch": 0.9831829727599194, "grad_norm": 0.5180302819508216, "learning_rate": 9.343065693430658e-07, "loss": 0.5412, "step": 33675 }, { "epoch": 0.9832121689877668, "grad_norm": 0.5194820245869056, "learning_rate": 9.326845093268451e-07, "loss": 0.5298, "step": 33676 }, { "epoch": 0.9832413652156141, "grad_norm": 0.5422055355342684, "learning_rate": 9.310624493106246e-07, "loss": 0.6063, "step": 33677 }, { "epoch": 0.9832705614434615, "grad_norm": 0.5319603488654591, "learning_rate": 9.29440389294404e-07, "loss": 0.5657, "step": 33678 }, { "epoch": 0.9832997576713088, "grad_norm": 0.4796466912517986, "learning_rate": 9.278183292781833e-07, "loss": 0.5063, "step": 33679 }, { "epoch": 0.9833289538991562, "grad_norm": 0.5627181576013414, "learning_rate": 9.261962692619628e-07, "loss": 0.6101, "step": 33680 }, { "epoch": 0.9833581501270036, "grad_norm": 0.5425302135324721, "learning_rate": 9.245742092457422e-07, "loss": 0.5947, "step": 33681 }, { "epoch": 0.9833873463548509, "grad_norm": 0.5456090963989753, "learning_rate": 9.229521492295214e-07, "loss": 0.5574, "step": 33682 }, { "epoch": 0.9834165425826983, "grad_norm": 0.46797312807608443, "learning_rate": 9.21330089213301e-07, "loss": 0.4451, "step": 33683 }, { "epoch": 0.9834457388105456, "grad_norm": 0.5059476485446188, "learning_rate": 9.197080291970804e-07, "loss": 0.5421, "step": 33684 }, { "epoch": 0.983474935038393, "grad_norm": 0.5322980552330052, "learning_rate": 9.180859691808596e-07, "loss": 0.5548, "step": 33685 }, { "epoch": 0.9835041312662404, "grad_norm": 0.5218096679879618, "learning_rate": 9.164639091646392e-07, "loss": 0.574, "step": 33686 }, { "epoch": 0.9835333274940877, "grad_norm": 0.5608490057135065, "learning_rate": 9.148418491484186e-07, "loss": 0.5575, "step": 33687 }, { "epoch": 0.9835625237219351, "grad_norm": 0.5850567151761128, "learning_rate": 9.13219789132198e-07, "loss": 0.6797, "step": 33688 }, { "epoch": 0.9835917199497825, "grad_norm": 0.5690244933808252, "learning_rate": 9.115977291159772e-07, "loss": 0.6541, "step": 33689 }, { "epoch": 0.9836209161776298, "grad_norm": 0.5133489116875141, "learning_rate": 9.099756690997568e-07, "loss": 0.5409, "step": 33690 }, { "epoch": 0.9836501124054772, "grad_norm": 0.5307883600598, "learning_rate": 9.083536090835362e-07, "loss": 0.553, "step": 33691 }, { "epoch": 0.9836793086333245, "grad_norm": 0.4700446692676041, "learning_rate": 9.067315490673154e-07, "loss": 0.4756, "step": 33692 }, { "epoch": 0.9837085048611719, "grad_norm": 0.5579354412491483, "learning_rate": 9.05109489051095e-07, "loss": 0.6391, "step": 33693 }, { "epoch": 0.9837377010890193, "grad_norm": 0.5182772751909887, "learning_rate": 9.034874290348744e-07, "loss": 0.5675, "step": 33694 }, { "epoch": 0.9837668973168666, "grad_norm": 0.4807469671713439, "learning_rate": 9.018653690186536e-07, "loss": 0.4779, "step": 33695 }, { "epoch": 0.983796093544714, "grad_norm": 0.5224564707598879, "learning_rate": 9.002433090024332e-07, "loss": 0.5537, "step": 33696 }, { "epoch": 0.9838252897725613, "grad_norm": 0.47083611782814755, "learning_rate": 8.986212489862126e-07, "loss": 0.4311, "step": 33697 }, { "epoch": 0.9838544860004087, "grad_norm": 0.5316424942805431, "learning_rate": 8.96999188969992e-07, "loss": 0.5786, "step": 33698 }, { "epoch": 0.9838836822282561, "grad_norm": 0.518171893178638, "learning_rate": 8.953771289537712e-07, "loss": 0.5611, "step": 33699 }, { "epoch": 0.9839128784561034, "grad_norm": 0.528359764753194, "learning_rate": 8.937550689375508e-07, "loss": 0.5668, "step": 33700 }, { "epoch": 0.9839420746839508, "grad_norm": 0.523440230525219, "learning_rate": 8.921330089213302e-07, "loss": 0.546, "step": 33701 }, { "epoch": 0.9839712709117981, "grad_norm": 0.5246697327062085, "learning_rate": 8.905109489051094e-07, "loss": 0.5339, "step": 33702 }, { "epoch": 0.9840004671396455, "grad_norm": 0.48562652351588603, "learning_rate": 8.88888888888889e-07, "loss": 0.4996, "step": 33703 }, { "epoch": 0.9840296633674929, "grad_norm": 0.5819897925404452, "learning_rate": 8.872668288726684e-07, "loss": 0.6962, "step": 33704 }, { "epoch": 0.9840588595953402, "grad_norm": 0.49432360660564123, "learning_rate": 8.856447688564476e-07, "loss": 0.4943, "step": 33705 }, { "epoch": 0.9840880558231876, "grad_norm": 0.5294116720651583, "learning_rate": 8.840227088402271e-07, "loss": 0.5924, "step": 33706 }, { "epoch": 0.984117252051035, "grad_norm": 0.5301560111789995, "learning_rate": 8.824006488240066e-07, "loss": 0.5768, "step": 33707 }, { "epoch": 0.9841464482788823, "grad_norm": 0.5054441699738849, "learning_rate": 8.807785888077858e-07, "loss": 0.5361, "step": 33708 }, { "epoch": 0.9841756445067298, "grad_norm": 0.5828331640609525, "learning_rate": 8.791565287915653e-07, "loss": 0.6351, "step": 33709 }, { "epoch": 0.9842048407345771, "grad_norm": 0.49279639627661687, "learning_rate": 8.775344687753448e-07, "loss": 0.5101, "step": 33710 }, { "epoch": 0.9842340369624245, "grad_norm": 0.5365965728383268, "learning_rate": 8.759124087591242e-07, "loss": 0.5912, "step": 33711 }, { "epoch": 0.9842632331902719, "grad_norm": 0.5387306693336327, "learning_rate": 8.742903487429034e-07, "loss": 0.584, "step": 33712 }, { "epoch": 0.9842924294181192, "grad_norm": 0.5576445031099941, "learning_rate": 8.72668288726683e-07, "loss": 0.6188, "step": 33713 }, { "epoch": 0.9843216256459666, "grad_norm": 0.5380641728756017, "learning_rate": 8.710462287104624e-07, "loss": 0.5784, "step": 33714 }, { "epoch": 0.984350821873814, "grad_norm": 0.5103928297685199, "learning_rate": 8.694241686942416e-07, "loss": 0.5409, "step": 33715 }, { "epoch": 0.9843800181016613, "grad_norm": 0.5674592936385354, "learning_rate": 8.678021086780211e-07, "loss": 0.6372, "step": 33716 }, { "epoch": 0.9844092143295087, "grad_norm": 0.5407694582204516, "learning_rate": 8.661800486618006e-07, "loss": 0.6176, "step": 33717 }, { "epoch": 0.984438410557356, "grad_norm": 0.5243440139780647, "learning_rate": 8.645579886455798e-07, "loss": 0.5829, "step": 33718 }, { "epoch": 0.9844676067852034, "grad_norm": 0.5118161653250476, "learning_rate": 8.629359286293593e-07, "loss": 0.5706, "step": 33719 }, { "epoch": 0.9844968030130508, "grad_norm": 0.5404915239968708, "learning_rate": 8.613138686131388e-07, "loss": 0.585, "step": 33720 }, { "epoch": 0.9845259992408981, "grad_norm": 0.5286151992296602, "learning_rate": 8.596918085969182e-07, "loss": 0.6025, "step": 33721 }, { "epoch": 0.9845551954687455, "grad_norm": 0.549990820270702, "learning_rate": 8.580697485806975e-07, "loss": 0.5494, "step": 33722 }, { "epoch": 0.9845843916965928, "grad_norm": 0.5499941605664725, "learning_rate": 8.564476885644769e-07, "loss": 0.5937, "step": 33723 }, { "epoch": 0.9846135879244402, "grad_norm": 0.5240680097641165, "learning_rate": 8.548256285482564e-07, "loss": 0.5634, "step": 33724 }, { "epoch": 0.9846427841522876, "grad_norm": 0.5193825154333214, "learning_rate": 8.532035685320357e-07, "loss": 0.5622, "step": 33725 }, { "epoch": 0.9846719803801349, "grad_norm": 0.5439763698372809, "learning_rate": 8.515815085158151e-07, "loss": 0.5738, "step": 33726 }, { "epoch": 0.9847011766079823, "grad_norm": 0.5139953793282833, "learning_rate": 8.499594484995946e-07, "loss": 0.5332, "step": 33727 }, { "epoch": 0.9847303728358296, "grad_norm": 0.5212868365999991, "learning_rate": 8.483373884833738e-07, "loss": 0.5961, "step": 33728 }, { "epoch": 0.984759569063677, "grad_norm": 0.5061701087866854, "learning_rate": 8.467153284671533e-07, "loss": 0.5367, "step": 33729 }, { "epoch": 0.9847887652915244, "grad_norm": 0.5317328482520214, "learning_rate": 8.450932684509328e-07, "loss": 0.6306, "step": 33730 }, { "epoch": 0.9848179615193717, "grad_norm": 0.5620927399477782, "learning_rate": 8.43471208434712e-07, "loss": 0.6309, "step": 33731 }, { "epoch": 0.9848471577472191, "grad_norm": 0.49732569379700264, "learning_rate": 8.418491484184915e-07, "loss": 0.5614, "step": 33732 }, { "epoch": 0.9848763539750665, "grad_norm": 0.5712419148729693, "learning_rate": 8.402270884022709e-07, "loss": 0.6271, "step": 33733 }, { "epoch": 0.9849055502029138, "grad_norm": 0.49071337801121756, "learning_rate": 8.386050283860504e-07, "loss": 0.4885, "step": 33734 }, { "epoch": 0.9849347464307612, "grad_norm": 0.507234768446276, "learning_rate": 8.369829683698297e-07, "loss": 0.5338, "step": 33735 }, { "epoch": 0.9849639426586085, "grad_norm": 0.49700219755846087, "learning_rate": 8.353609083536091e-07, "loss": 0.5309, "step": 33736 }, { "epoch": 0.9849931388864559, "grad_norm": 0.518153542323295, "learning_rate": 8.337388483373886e-07, "loss": 0.542, "step": 33737 }, { "epoch": 0.9850223351143033, "grad_norm": 0.5487031975599754, "learning_rate": 8.321167883211679e-07, "loss": 0.5897, "step": 33738 }, { "epoch": 0.9850515313421506, "grad_norm": 0.5494299962465893, "learning_rate": 8.304947283049473e-07, "loss": 0.6149, "step": 33739 }, { "epoch": 0.985080727569998, "grad_norm": 0.5417710163679755, "learning_rate": 8.288726682887267e-07, "loss": 0.5962, "step": 33740 }, { "epoch": 0.9851099237978453, "grad_norm": 0.5140131231303076, "learning_rate": 8.27250608272506e-07, "loss": 0.5627, "step": 33741 }, { "epoch": 0.9851391200256927, "grad_norm": 0.5380500797563286, "learning_rate": 8.256285482562855e-07, "loss": 0.6034, "step": 33742 }, { "epoch": 0.9851683162535401, "grad_norm": 0.5202616327915804, "learning_rate": 8.240064882400649e-07, "loss": 0.5655, "step": 33743 }, { "epoch": 0.9851975124813874, "grad_norm": 0.5594951039345011, "learning_rate": 8.223844282238444e-07, "loss": 0.6169, "step": 33744 }, { "epoch": 0.9852267087092348, "grad_norm": 0.5177884366595583, "learning_rate": 8.207623682076237e-07, "loss": 0.5427, "step": 33745 }, { "epoch": 0.9852559049370821, "grad_norm": 0.5396253107478813, "learning_rate": 8.191403081914031e-07, "loss": 0.5782, "step": 33746 }, { "epoch": 0.9852851011649295, "grad_norm": 0.5746434909650505, "learning_rate": 8.175182481751826e-07, "loss": 0.5678, "step": 33747 }, { "epoch": 0.9853142973927769, "grad_norm": 0.49565649478246365, "learning_rate": 8.158961881589619e-07, "loss": 0.5273, "step": 33748 }, { "epoch": 0.9853434936206242, "grad_norm": 0.5513118166172444, "learning_rate": 8.142741281427413e-07, "loss": 0.5757, "step": 33749 }, { "epoch": 0.9853726898484716, "grad_norm": 0.5180231369966375, "learning_rate": 8.126520681265207e-07, "loss": 0.5427, "step": 33750 }, { "epoch": 0.985401886076319, "grad_norm": 0.5311811795104447, "learning_rate": 8.110300081103001e-07, "loss": 0.5752, "step": 33751 }, { "epoch": 0.9854310823041663, "grad_norm": 0.5274124420254912, "learning_rate": 8.094079480940795e-07, "loss": 0.5643, "step": 33752 }, { "epoch": 0.9854602785320137, "grad_norm": 0.5605358760689612, "learning_rate": 8.077858880778589e-07, "loss": 0.626, "step": 33753 }, { "epoch": 0.985489474759861, "grad_norm": 0.523100495093669, "learning_rate": 8.061638280616382e-07, "loss": 0.5404, "step": 33754 }, { "epoch": 0.9855186709877084, "grad_norm": 0.5021197778297087, "learning_rate": 8.045417680454177e-07, "loss": 0.5259, "step": 33755 }, { "epoch": 0.9855478672155558, "grad_norm": 0.5365023850880422, "learning_rate": 8.029197080291971e-07, "loss": 0.5956, "step": 33756 }, { "epoch": 0.9855770634434031, "grad_norm": 0.5161363379125922, "learning_rate": 8.012976480129765e-07, "loss": 0.5381, "step": 33757 }, { "epoch": 0.9856062596712505, "grad_norm": 0.5243282485641545, "learning_rate": 7.996755879967559e-07, "loss": 0.532, "step": 33758 }, { "epoch": 0.9856354558990978, "grad_norm": 0.5137695624722273, "learning_rate": 7.980535279805353e-07, "loss": 0.5038, "step": 33759 }, { "epoch": 0.9856646521269452, "grad_norm": 0.5295176680222132, "learning_rate": 7.964314679643147e-07, "loss": 0.5615, "step": 33760 }, { "epoch": 0.9856938483547926, "grad_norm": 0.5100092373306765, "learning_rate": 7.948094079480941e-07, "loss": 0.5744, "step": 33761 }, { "epoch": 0.9857230445826399, "grad_norm": 0.5434522377744102, "learning_rate": 7.931873479318735e-07, "loss": 0.572, "step": 33762 }, { "epoch": 0.9857522408104873, "grad_norm": 0.5321524540894902, "learning_rate": 7.915652879156529e-07, "loss": 0.5794, "step": 33763 }, { "epoch": 0.9857814370383347, "grad_norm": 0.5560535861450254, "learning_rate": 7.899432278994323e-07, "loss": 0.6073, "step": 33764 }, { "epoch": 0.985810633266182, "grad_norm": 0.543400302280479, "learning_rate": 7.883211678832117e-07, "loss": 0.5917, "step": 33765 }, { "epoch": 0.9858398294940294, "grad_norm": 0.533945803603586, "learning_rate": 7.866991078669911e-07, "loss": 0.5647, "step": 33766 }, { "epoch": 0.9858690257218767, "grad_norm": 0.5354550814068937, "learning_rate": 7.850770478507706e-07, "loss": 0.6059, "step": 33767 }, { "epoch": 0.9858982219497241, "grad_norm": 0.5328575997845733, "learning_rate": 7.834549878345499e-07, "loss": 0.5552, "step": 33768 }, { "epoch": 0.9859274181775715, "grad_norm": 0.5128242811814839, "learning_rate": 7.818329278183293e-07, "loss": 0.5577, "step": 33769 }, { "epoch": 0.9859566144054188, "grad_norm": 0.5381682878923623, "learning_rate": 7.802108678021087e-07, "loss": 0.5894, "step": 33770 }, { "epoch": 0.9859858106332662, "grad_norm": 0.5007741928464902, "learning_rate": 7.785888077858882e-07, "loss": 0.5473, "step": 33771 }, { "epoch": 0.9860150068611135, "grad_norm": 0.48758730342488305, "learning_rate": 7.769667477696675e-07, "loss": 0.5105, "step": 33772 }, { "epoch": 0.9860442030889609, "grad_norm": 0.5287650283652992, "learning_rate": 7.753446877534468e-07, "loss": 0.5897, "step": 33773 }, { "epoch": 0.9860733993168083, "grad_norm": 0.5522209267399013, "learning_rate": 7.737226277372264e-07, "loss": 0.5983, "step": 33774 }, { "epoch": 0.9861025955446556, "grad_norm": 0.5714110797008067, "learning_rate": 7.721005677210057e-07, "loss": 0.6534, "step": 33775 }, { "epoch": 0.986131791772503, "grad_norm": 0.5513949630772248, "learning_rate": 7.704785077047851e-07, "loss": 0.5757, "step": 33776 }, { "epoch": 0.9861609880003503, "grad_norm": 0.5259990574508795, "learning_rate": 7.688564476885646e-07, "loss": 0.5846, "step": 33777 }, { "epoch": 0.9861901842281977, "grad_norm": 0.5504681210209101, "learning_rate": 7.672343876723439e-07, "loss": 0.5941, "step": 33778 }, { "epoch": 0.9862193804560451, "grad_norm": 0.5082910881208769, "learning_rate": 7.656123276561233e-07, "loss": 0.5238, "step": 33779 }, { "epoch": 0.9862485766838924, "grad_norm": 0.5131501908158478, "learning_rate": 7.639902676399028e-07, "loss": 0.5435, "step": 33780 }, { "epoch": 0.9862777729117398, "grad_norm": 0.5298271364637291, "learning_rate": 7.623682076236821e-07, "loss": 0.6134, "step": 33781 }, { "epoch": 0.9863069691395872, "grad_norm": 0.5195005847767886, "learning_rate": 7.607461476074615e-07, "loss": 0.5099, "step": 33782 }, { "epoch": 0.9863361653674345, "grad_norm": 0.5237029450629737, "learning_rate": 7.591240875912408e-07, "loss": 0.5797, "step": 33783 }, { "epoch": 0.9863653615952819, "grad_norm": 0.5402971675664087, "learning_rate": 7.575020275750204e-07, "loss": 0.5994, "step": 33784 }, { "epoch": 0.9863945578231292, "grad_norm": 0.5023693549692707, "learning_rate": 7.558799675587997e-07, "loss": 0.5335, "step": 33785 }, { "epoch": 0.9864237540509766, "grad_norm": 0.5692440844073492, "learning_rate": 7.54257907542579e-07, "loss": 0.6183, "step": 33786 }, { "epoch": 0.986452950278824, "grad_norm": 0.508909700520979, "learning_rate": 7.526358475263586e-07, "loss": 0.5336, "step": 33787 }, { "epoch": 0.9864821465066713, "grad_norm": 0.5741077698105755, "learning_rate": 7.510137875101379e-07, "loss": 0.688, "step": 33788 }, { "epoch": 0.9865113427345187, "grad_norm": 0.5390150340225945, "learning_rate": 7.493917274939173e-07, "loss": 0.5906, "step": 33789 }, { "epoch": 0.986540538962366, "grad_norm": 0.5198603483742483, "learning_rate": 7.477696674776967e-07, "loss": 0.567, "step": 33790 }, { "epoch": 0.9865697351902134, "grad_norm": 0.5403715199690706, "learning_rate": 7.461476074614761e-07, "loss": 0.6045, "step": 33791 }, { "epoch": 0.9865989314180608, "grad_norm": 0.5303690459340722, "learning_rate": 7.445255474452555e-07, "loss": 0.5966, "step": 33792 }, { "epoch": 0.9866281276459081, "grad_norm": 0.48959924384277026, "learning_rate": 7.429034874290349e-07, "loss": 0.5031, "step": 33793 }, { "epoch": 0.9866573238737555, "grad_norm": 0.532397614457145, "learning_rate": 7.412814274128144e-07, "loss": 0.6192, "step": 33794 }, { "epoch": 0.9866865201016028, "grad_norm": 0.5429985132193319, "learning_rate": 7.396593673965937e-07, "loss": 0.593, "step": 33795 }, { "epoch": 0.9867157163294502, "grad_norm": 0.5061229878069429, "learning_rate": 7.38037307380373e-07, "loss": 0.5506, "step": 33796 }, { "epoch": 0.9867449125572976, "grad_norm": 0.5043016185176719, "learning_rate": 7.364152473641526e-07, "loss": 0.5189, "step": 33797 }, { "epoch": 0.9867741087851449, "grad_norm": 0.5359854365950664, "learning_rate": 7.347931873479319e-07, "loss": 0.5963, "step": 33798 }, { "epoch": 0.9868033050129923, "grad_norm": 0.5253581806276281, "learning_rate": 7.331711273317112e-07, "loss": 0.5412, "step": 33799 }, { "epoch": 0.9868325012408397, "grad_norm": 0.5446674735826733, "learning_rate": 7.315490673154907e-07, "loss": 0.5905, "step": 33800 }, { "epoch": 0.986861697468687, "grad_norm": 0.5440528246498951, "learning_rate": 7.299270072992701e-07, "loss": 0.5603, "step": 33801 }, { "epoch": 0.9868908936965344, "grad_norm": 0.5484499322506322, "learning_rate": 7.283049472830495e-07, "loss": 0.5764, "step": 33802 }, { "epoch": 0.9869200899243817, "grad_norm": 0.5300800381465348, "learning_rate": 7.266828872668289e-07, "loss": 0.5706, "step": 33803 }, { "epoch": 0.9869492861522291, "grad_norm": 0.5249192959073683, "learning_rate": 7.250608272506083e-07, "loss": 0.6073, "step": 33804 }, { "epoch": 0.9869784823800765, "grad_norm": 0.517814836801884, "learning_rate": 7.234387672343877e-07, "loss": 0.562, "step": 33805 }, { "epoch": 0.9870076786079238, "grad_norm": 0.5387902718014691, "learning_rate": 7.218167072181671e-07, "loss": 0.6185, "step": 33806 }, { "epoch": 0.9870368748357712, "grad_norm": 0.5054206374140616, "learning_rate": 7.201946472019466e-07, "loss": 0.5302, "step": 33807 }, { "epoch": 0.9870660710636185, "grad_norm": 0.5235498023844342, "learning_rate": 7.185725871857259e-07, "loss": 0.562, "step": 33808 }, { "epoch": 0.9870952672914659, "grad_norm": 0.4920772871569357, "learning_rate": 7.169505271695052e-07, "loss": 0.5042, "step": 33809 }, { "epoch": 0.9871244635193133, "grad_norm": 0.5244208943792791, "learning_rate": 7.153284671532847e-07, "loss": 0.5342, "step": 33810 }, { "epoch": 0.9871536597471606, "grad_norm": 0.545217253247877, "learning_rate": 7.137064071370641e-07, "loss": 0.6222, "step": 33811 }, { "epoch": 0.987182855975008, "grad_norm": 0.5286311218101444, "learning_rate": 7.120843471208435e-07, "loss": 0.5732, "step": 33812 }, { "epoch": 0.9872120522028554, "grad_norm": 0.5186703224372043, "learning_rate": 7.104622871046229e-07, "loss": 0.522, "step": 33813 }, { "epoch": 0.9872412484307027, "grad_norm": 0.5283045547608919, "learning_rate": 7.088402270884023e-07, "loss": 0.5327, "step": 33814 }, { "epoch": 0.9872704446585501, "grad_norm": 0.5338944594082375, "learning_rate": 7.072181670721817e-07, "loss": 0.5889, "step": 33815 }, { "epoch": 0.9872996408863974, "grad_norm": 0.5081495972467076, "learning_rate": 7.055961070559611e-07, "loss": 0.5418, "step": 33816 }, { "epoch": 0.9873288371142448, "grad_norm": 0.52806690262778, "learning_rate": 7.039740470397405e-07, "loss": 0.5277, "step": 33817 }, { "epoch": 0.9873580333420922, "grad_norm": 0.5291137834233739, "learning_rate": 7.023519870235199e-07, "loss": 0.569, "step": 33818 }, { "epoch": 0.9873872295699395, "grad_norm": 0.5101947871620817, "learning_rate": 7.007299270072993e-07, "loss": 0.5673, "step": 33819 }, { "epoch": 0.9874164257977869, "grad_norm": 0.5415528866508899, "learning_rate": 6.991078669910787e-07, "loss": 0.6053, "step": 33820 }, { "epoch": 0.9874456220256342, "grad_norm": 0.514247918238607, "learning_rate": 6.974858069748581e-07, "loss": 0.543, "step": 33821 }, { "epoch": 0.9874748182534816, "grad_norm": 0.5182660000383991, "learning_rate": 6.958637469586374e-07, "loss": 0.5349, "step": 33822 }, { "epoch": 0.987504014481329, "grad_norm": 0.5415925808272265, "learning_rate": 6.942416869424169e-07, "loss": 0.5823, "step": 33823 }, { "epoch": 0.9875332107091763, "grad_norm": 0.4926198268473414, "learning_rate": 6.926196269261963e-07, "loss": 0.5091, "step": 33824 }, { "epoch": 0.9875624069370237, "grad_norm": 0.5056280741998799, "learning_rate": 6.909975669099757e-07, "loss": 0.5194, "step": 33825 }, { "epoch": 0.987591603164871, "grad_norm": 0.5015187480995008, "learning_rate": 6.893755068937551e-07, "loss": 0.5275, "step": 33826 }, { "epoch": 0.9876207993927184, "grad_norm": 0.5339028326523593, "learning_rate": 6.877534468775345e-07, "loss": 0.5562, "step": 33827 }, { "epoch": 0.9876499956205658, "grad_norm": 0.4967600238495701, "learning_rate": 6.861313868613139e-07, "loss": 0.5406, "step": 33828 }, { "epoch": 0.9876791918484132, "grad_norm": 0.5078942294720614, "learning_rate": 6.845093268450933e-07, "loss": 0.5633, "step": 33829 }, { "epoch": 0.9877083880762606, "grad_norm": 0.5251443935561041, "learning_rate": 6.828872668288727e-07, "loss": 0.59, "step": 33830 }, { "epoch": 0.987737584304108, "grad_norm": 0.5405938051511524, "learning_rate": 6.812652068126521e-07, "loss": 0.6295, "step": 33831 }, { "epoch": 0.9877667805319553, "grad_norm": 0.5334815509959573, "learning_rate": 6.796431467964315e-07, "loss": 0.5908, "step": 33832 }, { "epoch": 0.9877959767598027, "grad_norm": 0.5172396023523966, "learning_rate": 6.780210867802109e-07, "loss": 0.5291, "step": 33833 }, { "epoch": 0.98782517298765, "grad_norm": 0.5437404404953856, "learning_rate": 6.763990267639903e-07, "loss": 0.5923, "step": 33834 }, { "epoch": 0.9878543692154974, "grad_norm": 0.5821086846900685, "learning_rate": 6.747769667477698e-07, "loss": 0.6777, "step": 33835 }, { "epoch": 0.9878835654433448, "grad_norm": 0.5252973005674205, "learning_rate": 6.731549067315491e-07, "loss": 0.5703, "step": 33836 }, { "epoch": 0.9879127616711921, "grad_norm": 0.5093564488317956, "learning_rate": 6.715328467153285e-07, "loss": 0.538, "step": 33837 }, { "epoch": 0.9879419578990395, "grad_norm": 0.5574266704400194, "learning_rate": 6.69910786699108e-07, "loss": 0.6033, "step": 33838 }, { "epoch": 0.9879711541268869, "grad_norm": 0.5316814406035894, "learning_rate": 6.682887266828873e-07, "loss": 0.5643, "step": 33839 }, { "epoch": 0.9880003503547342, "grad_norm": 0.5032171839439631, "learning_rate": 6.666666666666667e-07, "loss": 0.5158, "step": 33840 }, { "epoch": 0.9880295465825816, "grad_norm": 0.5499115061966044, "learning_rate": 6.650446066504461e-07, "loss": 0.6255, "step": 33841 }, { "epoch": 0.9880587428104289, "grad_norm": 0.5113239186083292, "learning_rate": 6.634225466342255e-07, "loss": 0.5615, "step": 33842 }, { "epoch": 0.9880879390382763, "grad_norm": 0.550474654094276, "learning_rate": 6.618004866180049e-07, "loss": 0.6253, "step": 33843 }, { "epoch": 0.9881171352661237, "grad_norm": 0.5374015473527263, "learning_rate": 6.601784266017843e-07, "loss": 0.5524, "step": 33844 }, { "epoch": 0.988146331493971, "grad_norm": 0.5646002159069734, "learning_rate": 6.585563665855637e-07, "loss": 0.6359, "step": 33845 }, { "epoch": 0.9881755277218184, "grad_norm": 0.5242883958600071, "learning_rate": 6.569343065693431e-07, "loss": 0.5288, "step": 33846 }, { "epoch": 0.9882047239496657, "grad_norm": 0.5127827356977755, "learning_rate": 6.553122465531225e-07, "loss": 0.5524, "step": 33847 }, { "epoch": 0.9882339201775131, "grad_norm": 0.5187486549746153, "learning_rate": 6.53690186536902e-07, "loss": 0.5424, "step": 33848 }, { "epoch": 0.9882631164053605, "grad_norm": 0.5416736256894498, "learning_rate": 6.520681265206813e-07, "loss": 0.5352, "step": 33849 }, { "epoch": 0.9882923126332078, "grad_norm": 0.542795376744512, "learning_rate": 6.504460665044606e-07, "loss": 0.6129, "step": 33850 }, { "epoch": 0.9883215088610552, "grad_norm": 0.5173135071180248, "learning_rate": 6.488240064882401e-07, "loss": 0.5702, "step": 33851 }, { "epoch": 0.9883507050889025, "grad_norm": 0.47808900336203647, "learning_rate": 6.472019464720195e-07, "loss": 0.4855, "step": 33852 }, { "epoch": 0.9883799013167499, "grad_norm": 0.5386096692359522, "learning_rate": 6.455798864557989e-07, "loss": 0.5874, "step": 33853 }, { "epoch": 0.9884090975445973, "grad_norm": 0.5089457045195324, "learning_rate": 6.439578264395783e-07, "loss": 0.5622, "step": 33854 }, { "epoch": 0.9884382937724446, "grad_norm": 0.5200442843500676, "learning_rate": 6.423357664233577e-07, "loss": 0.5538, "step": 33855 }, { "epoch": 0.988467490000292, "grad_norm": 0.5531381423823769, "learning_rate": 6.407137064071371e-07, "loss": 0.5949, "step": 33856 }, { "epoch": 0.9884966862281394, "grad_norm": 0.5384903917143881, "learning_rate": 6.390916463909165e-07, "loss": 0.605, "step": 33857 }, { "epoch": 0.9885258824559867, "grad_norm": 0.5171638601887469, "learning_rate": 6.37469586374696e-07, "loss": 0.5384, "step": 33858 }, { "epoch": 0.9885550786838341, "grad_norm": 0.5054849906176885, "learning_rate": 6.358475263584753e-07, "loss": 0.49, "step": 33859 }, { "epoch": 0.9885842749116814, "grad_norm": 0.49427508775951845, "learning_rate": 6.342254663422546e-07, "loss": 0.5303, "step": 33860 }, { "epoch": 0.9886134711395288, "grad_norm": 0.5341588384014518, "learning_rate": 6.326034063260342e-07, "loss": 0.5562, "step": 33861 }, { "epoch": 0.9886426673673762, "grad_norm": 0.5175698372716352, "learning_rate": 6.309813463098135e-07, "loss": 0.5294, "step": 33862 }, { "epoch": 0.9886718635952235, "grad_norm": 0.5415857150850723, "learning_rate": 6.293592862935928e-07, "loss": 0.6058, "step": 33863 }, { "epoch": 0.9887010598230709, "grad_norm": 0.5445496180128858, "learning_rate": 6.277372262773724e-07, "loss": 0.6278, "step": 33864 }, { "epoch": 0.9887302560509182, "grad_norm": 0.5262729934483503, "learning_rate": 6.261151662611517e-07, "loss": 0.5631, "step": 33865 }, { "epoch": 0.9887594522787656, "grad_norm": 0.5240033703351517, "learning_rate": 6.244931062449311e-07, "loss": 0.575, "step": 33866 }, { "epoch": 0.988788648506613, "grad_norm": 0.5204423439258895, "learning_rate": 6.228710462287104e-07, "loss": 0.5392, "step": 33867 }, { "epoch": 0.9888178447344603, "grad_norm": 0.5615548495014472, "learning_rate": 6.212489862124899e-07, "loss": 0.6112, "step": 33868 }, { "epoch": 0.9888470409623077, "grad_norm": 0.4977027004633809, "learning_rate": 6.196269261962693e-07, "loss": 0.5007, "step": 33869 }, { "epoch": 0.988876237190155, "grad_norm": 0.5459882202536851, "learning_rate": 6.180048661800486e-07, "loss": 0.5795, "step": 33870 }, { "epoch": 0.9889054334180024, "grad_norm": 0.5352488699488451, "learning_rate": 6.163828061638282e-07, "loss": 0.5797, "step": 33871 }, { "epoch": 0.9889346296458498, "grad_norm": 0.5361688036853358, "learning_rate": 6.147607461476075e-07, "loss": 0.5849, "step": 33872 }, { "epoch": 0.9889638258736971, "grad_norm": 0.5520967649566147, "learning_rate": 6.131386861313868e-07, "loss": 0.6231, "step": 33873 }, { "epoch": 0.9889930221015445, "grad_norm": 0.5336450057921944, "learning_rate": 6.115166261151664e-07, "loss": 0.6266, "step": 33874 }, { "epoch": 0.9890222183293919, "grad_norm": 0.5106637721194113, "learning_rate": 6.098945660989457e-07, "loss": 0.5417, "step": 33875 }, { "epoch": 0.9890514145572392, "grad_norm": 0.505873205890898, "learning_rate": 6.082725060827251e-07, "loss": 0.5027, "step": 33876 }, { "epoch": 0.9890806107850866, "grad_norm": 0.5572174000130136, "learning_rate": 6.066504460665044e-07, "loss": 0.6291, "step": 33877 }, { "epoch": 0.9891098070129339, "grad_norm": 0.5057492517932461, "learning_rate": 6.050283860502839e-07, "loss": 0.5492, "step": 33878 }, { "epoch": 0.9891390032407813, "grad_norm": 0.5296395537322188, "learning_rate": 6.034063260340633e-07, "loss": 0.5868, "step": 33879 }, { "epoch": 0.9891681994686287, "grad_norm": 0.5436734210274009, "learning_rate": 6.017842660178426e-07, "loss": 0.5859, "step": 33880 }, { "epoch": 0.989197395696476, "grad_norm": 0.5634757618016135, "learning_rate": 6.001622060016222e-07, "loss": 0.6686, "step": 33881 }, { "epoch": 0.9892265919243234, "grad_norm": 0.5160977470716696, "learning_rate": 5.985401459854015e-07, "loss": 0.5222, "step": 33882 }, { "epoch": 0.9892557881521707, "grad_norm": 0.49506401328256616, "learning_rate": 5.969180859691808e-07, "loss": 0.4938, "step": 33883 }, { "epoch": 0.9892849843800181, "grad_norm": 0.536269159357939, "learning_rate": 5.952960259529603e-07, "loss": 0.5961, "step": 33884 }, { "epoch": 0.9893141806078655, "grad_norm": 0.5114153325424935, "learning_rate": 5.936739659367397e-07, "loss": 0.5338, "step": 33885 }, { "epoch": 0.9893433768357128, "grad_norm": 0.5269322669881378, "learning_rate": 5.92051905920519e-07, "loss": 0.5913, "step": 33886 }, { "epoch": 0.9893725730635602, "grad_norm": 0.5181182773988586, "learning_rate": 5.904298459042985e-07, "loss": 0.5706, "step": 33887 }, { "epoch": 0.9894017692914076, "grad_norm": 0.507651304012824, "learning_rate": 5.888077858880779e-07, "loss": 0.538, "step": 33888 }, { "epoch": 0.9894309655192549, "grad_norm": 0.5601059726071592, "learning_rate": 5.871857258718573e-07, "loss": 0.6603, "step": 33889 }, { "epoch": 0.9894601617471023, "grad_norm": 0.5399960300075936, "learning_rate": 5.855636658556366e-07, "loss": 0.5876, "step": 33890 }, { "epoch": 0.9894893579749496, "grad_norm": 0.5361223558487028, "learning_rate": 5.839416058394161e-07, "loss": 0.5975, "step": 33891 }, { "epoch": 0.989518554202797, "grad_norm": 0.5262727714171769, "learning_rate": 5.823195458231955e-07, "loss": 0.5826, "step": 33892 }, { "epoch": 0.9895477504306444, "grad_norm": 0.5265491156484465, "learning_rate": 5.806974858069748e-07, "loss": 0.5627, "step": 33893 }, { "epoch": 0.9895769466584917, "grad_norm": 0.5844415018186785, "learning_rate": 5.790754257907543e-07, "loss": 0.7185, "step": 33894 }, { "epoch": 0.9896061428863391, "grad_norm": 0.6015188984907525, "learning_rate": 5.774533657745337e-07, "loss": 0.6805, "step": 33895 }, { "epoch": 0.9896353391141864, "grad_norm": 0.5351516715248642, "learning_rate": 5.75831305758313e-07, "loss": 0.6132, "step": 33896 }, { "epoch": 0.9896645353420338, "grad_norm": 0.5260853457670943, "learning_rate": 5.742092457420925e-07, "loss": 0.5619, "step": 33897 }, { "epoch": 0.9896937315698812, "grad_norm": 0.538786454895313, "learning_rate": 5.725871857258719e-07, "loss": 0.5346, "step": 33898 }, { "epoch": 0.9897229277977285, "grad_norm": 0.5204179021827462, "learning_rate": 5.709651257096513e-07, "loss": 0.5339, "step": 33899 }, { "epoch": 0.9897521240255759, "grad_norm": 0.5147606569053464, "learning_rate": 5.693430656934307e-07, "loss": 0.5367, "step": 33900 }, { "epoch": 0.9897813202534232, "grad_norm": 0.8975353971669663, "learning_rate": 5.677210056772101e-07, "loss": 0.7307, "step": 33901 }, { "epoch": 0.9898105164812706, "grad_norm": 0.5277525639724184, "learning_rate": 5.660989456609895e-07, "loss": 0.541, "step": 33902 }, { "epoch": 0.989839712709118, "grad_norm": 0.5416933052308134, "learning_rate": 5.644768856447689e-07, "loss": 0.6221, "step": 33903 }, { "epoch": 0.9898689089369653, "grad_norm": 0.5568110052922102, "learning_rate": 5.628548256285483e-07, "loss": 0.6283, "step": 33904 }, { "epoch": 0.9898981051648127, "grad_norm": 0.5644750193605248, "learning_rate": 5.612327656123277e-07, "loss": 0.5875, "step": 33905 }, { "epoch": 0.98992730139266, "grad_norm": 0.566876845454864, "learning_rate": 5.59610705596107e-07, "loss": 0.6324, "step": 33906 }, { "epoch": 0.9899564976205074, "grad_norm": 0.5195030244964256, "learning_rate": 5.579886455798865e-07, "loss": 0.5667, "step": 33907 }, { "epoch": 0.9899856938483548, "grad_norm": 0.5532671438944814, "learning_rate": 5.563665855636659e-07, "loss": 0.652, "step": 33908 }, { "epoch": 0.9900148900762021, "grad_norm": 0.5550851378317035, "learning_rate": 5.547445255474452e-07, "loss": 0.588, "step": 33909 }, { "epoch": 0.9900440863040495, "grad_norm": 0.5742227402885224, "learning_rate": 5.531224655312247e-07, "loss": 0.6937, "step": 33910 }, { "epoch": 0.9900732825318969, "grad_norm": 0.5211344830422391, "learning_rate": 5.515004055150041e-07, "loss": 0.5866, "step": 33911 }, { "epoch": 0.9901024787597442, "grad_norm": 0.5183272463538149, "learning_rate": 5.498783454987835e-07, "loss": 0.5531, "step": 33912 }, { "epoch": 0.9901316749875916, "grad_norm": 0.5387347628646145, "learning_rate": 5.482562854825629e-07, "loss": 0.6506, "step": 33913 }, { "epoch": 0.9901608712154389, "grad_norm": 0.5218237978491254, "learning_rate": 5.466342254663423e-07, "loss": 0.5524, "step": 33914 }, { "epoch": 0.9901900674432863, "grad_norm": 0.5352920424292088, "learning_rate": 5.450121654501217e-07, "loss": 0.5385, "step": 33915 }, { "epoch": 0.9902192636711337, "grad_norm": 0.5249631205675139, "learning_rate": 5.43390105433901e-07, "loss": 0.5322, "step": 33916 }, { "epoch": 0.990248459898981, "grad_norm": 0.5302150070399093, "learning_rate": 5.417680454176805e-07, "loss": 0.5884, "step": 33917 }, { "epoch": 0.9902776561268284, "grad_norm": 0.5452304855234417, "learning_rate": 5.401459854014599e-07, "loss": 0.594, "step": 33918 }, { "epoch": 0.9903068523546757, "grad_norm": 0.5334232997401535, "learning_rate": 5.385239253852392e-07, "loss": 0.5445, "step": 33919 }, { "epoch": 0.9903360485825231, "grad_norm": 0.5046145676583172, "learning_rate": 5.369018653690187e-07, "loss": 0.527, "step": 33920 }, { "epoch": 0.9903652448103705, "grad_norm": 0.5236623611921083, "learning_rate": 5.352798053527981e-07, "loss": 0.5386, "step": 33921 }, { "epoch": 0.9903944410382178, "grad_norm": 0.5405515462330714, "learning_rate": 5.336577453365775e-07, "loss": 0.629, "step": 33922 }, { "epoch": 0.9904236372660652, "grad_norm": 0.5009135611759666, "learning_rate": 5.320356853203569e-07, "loss": 0.524, "step": 33923 }, { "epoch": 0.9904528334939126, "grad_norm": 0.493924833198754, "learning_rate": 5.304136253041363e-07, "loss": 0.5199, "step": 33924 }, { "epoch": 0.9904820297217599, "grad_norm": 0.4898910632188397, "learning_rate": 5.287915652879157e-07, "loss": 0.5019, "step": 33925 }, { "epoch": 0.9905112259496073, "grad_norm": 0.5369856604106483, "learning_rate": 5.271695052716951e-07, "loss": 0.6026, "step": 33926 }, { "epoch": 0.9905404221774546, "grad_norm": 0.570985244881201, "learning_rate": 5.255474452554744e-07, "loss": 0.6313, "step": 33927 }, { "epoch": 0.990569618405302, "grad_norm": 0.5729785526588679, "learning_rate": 5.239253852392539e-07, "loss": 0.6246, "step": 33928 }, { "epoch": 0.9905988146331494, "grad_norm": 0.5194493340306524, "learning_rate": 5.223033252230333e-07, "loss": 0.5508, "step": 33929 }, { "epoch": 0.9906280108609967, "grad_norm": 0.5459898896144763, "learning_rate": 5.206812652068127e-07, "loss": 0.6162, "step": 33930 }, { "epoch": 0.9906572070888441, "grad_norm": 0.509157212191186, "learning_rate": 5.190592051905921e-07, "loss": 0.562, "step": 33931 }, { "epoch": 0.9906864033166914, "grad_norm": 0.5317167834264632, "learning_rate": 5.174371451743714e-07, "loss": 0.5425, "step": 33932 }, { "epoch": 0.9907155995445388, "grad_norm": 0.5362924451166575, "learning_rate": 5.158150851581509e-07, "loss": 0.6116, "step": 33933 }, { "epoch": 0.9907447957723862, "grad_norm": 0.4907974380094578, "learning_rate": 5.141930251419302e-07, "loss": 0.5171, "step": 33934 }, { "epoch": 0.9907739920002335, "grad_norm": 0.5395254107386133, "learning_rate": 5.125709651257097e-07, "loss": 0.616, "step": 33935 }, { "epoch": 0.9908031882280809, "grad_norm": 0.5461090467841174, "learning_rate": 5.109489051094891e-07, "loss": 0.5724, "step": 33936 }, { "epoch": 0.9908323844559283, "grad_norm": 0.5084405220697821, "learning_rate": 5.093268450932684e-07, "loss": 0.5339, "step": 33937 }, { "epoch": 0.9908615806837756, "grad_norm": 0.5149362880703472, "learning_rate": 5.077047850770479e-07, "loss": 0.5518, "step": 33938 }, { "epoch": 0.990890776911623, "grad_norm": 0.5057178509141083, "learning_rate": 5.060827250608273e-07, "loss": 0.5278, "step": 33939 }, { "epoch": 0.9909199731394703, "grad_norm": 0.5122109277859893, "learning_rate": 5.044606650446067e-07, "loss": 0.5542, "step": 33940 }, { "epoch": 0.9909491693673177, "grad_norm": 0.53572632852756, "learning_rate": 5.028386050283861e-07, "loss": 0.5767, "step": 33941 }, { "epoch": 0.9909783655951651, "grad_norm": 0.5483691331718301, "learning_rate": 5.012165450121655e-07, "loss": 0.5765, "step": 33942 }, { "epoch": 0.9910075618230124, "grad_norm": 0.5156818120906235, "learning_rate": 4.995944849959449e-07, "loss": 0.5752, "step": 33943 }, { "epoch": 0.9910367580508598, "grad_norm": 0.4787702972478365, "learning_rate": 4.979724249797242e-07, "loss": 0.4999, "step": 33944 }, { "epoch": 0.9910659542787071, "grad_norm": 0.537295256380448, "learning_rate": 4.963503649635038e-07, "loss": 0.5996, "step": 33945 }, { "epoch": 0.9910951505065545, "grad_norm": 0.5746510919675715, "learning_rate": 4.947283049472831e-07, "loss": 0.6582, "step": 33946 }, { "epoch": 0.9911243467344019, "grad_norm": 0.5684322188075136, "learning_rate": 4.931062449310624e-07, "loss": 0.6532, "step": 33947 }, { "epoch": 0.9911535429622492, "grad_norm": 0.5803534241388753, "learning_rate": 4.914841849148419e-07, "loss": 0.6314, "step": 33948 }, { "epoch": 0.9911827391900966, "grad_norm": 0.5332279158746954, "learning_rate": 4.898621248986213e-07, "loss": 0.562, "step": 33949 }, { "epoch": 0.9912119354179441, "grad_norm": 0.5359383832027569, "learning_rate": 4.882400648824006e-07, "loss": 0.6165, "step": 33950 }, { "epoch": 0.9912411316457914, "grad_norm": 0.5190210426122756, "learning_rate": 4.8661800486618e-07, "loss": 0.4959, "step": 33951 }, { "epoch": 0.9912703278736388, "grad_norm": 0.5466474561982522, "learning_rate": 4.849959448499595e-07, "loss": 0.6349, "step": 33952 }, { "epoch": 0.9912995241014861, "grad_norm": 0.5006555316579284, "learning_rate": 4.833738848337389e-07, "loss": 0.5214, "step": 33953 }, { "epoch": 0.9913287203293335, "grad_norm": 0.5374299171474152, "learning_rate": 4.817518248175182e-07, "loss": 0.5546, "step": 33954 }, { "epoch": 0.9913579165571809, "grad_norm": 0.5600989637417095, "learning_rate": 4.801297648012977e-07, "loss": 0.6169, "step": 33955 }, { "epoch": 0.9913871127850282, "grad_norm": 0.5312829091551168, "learning_rate": 4.785077047850771e-07, "loss": 0.5694, "step": 33956 }, { "epoch": 0.9914163090128756, "grad_norm": 0.5372293356001171, "learning_rate": 4.768856447688564e-07, "loss": 0.5793, "step": 33957 }, { "epoch": 0.991445505240723, "grad_norm": 0.5290494414081847, "learning_rate": 4.752635847526359e-07, "loss": 0.5695, "step": 33958 }, { "epoch": 0.9914747014685703, "grad_norm": 0.5417664604401508, "learning_rate": 4.736415247364153e-07, "loss": 0.6028, "step": 33959 }, { "epoch": 0.9915038976964177, "grad_norm": 0.5539871001086736, "learning_rate": 4.7201946472019466e-07, "loss": 0.6203, "step": 33960 }, { "epoch": 0.991533093924265, "grad_norm": 0.49884457936837673, "learning_rate": 4.703974047039741e-07, "loss": 0.5216, "step": 33961 }, { "epoch": 0.9915622901521124, "grad_norm": 0.5101410802096596, "learning_rate": 4.687753446877535e-07, "loss": 0.5158, "step": 33962 }, { "epoch": 0.9915914863799598, "grad_norm": 0.4968038293652108, "learning_rate": 4.671532846715329e-07, "loss": 0.5399, "step": 33963 }, { "epoch": 0.9916206826078071, "grad_norm": 0.5398665744780334, "learning_rate": 4.655312246553123e-07, "loss": 0.6326, "step": 33964 }, { "epoch": 0.9916498788356545, "grad_norm": 0.5422607116412399, "learning_rate": 4.6390916463909167e-07, "loss": 0.5837, "step": 33965 }, { "epoch": 0.9916790750635018, "grad_norm": 0.5039400635488129, "learning_rate": 4.622871046228711e-07, "loss": 0.5541, "step": 33966 }, { "epoch": 0.9917082712913492, "grad_norm": 0.5874250469287217, "learning_rate": 4.606650446066505e-07, "loss": 0.6404, "step": 33967 }, { "epoch": 0.9917374675191966, "grad_norm": 0.50875218179735, "learning_rate": 4.590429845904298e-07, "loss": 0.5255, "step": 33968 }, { "epoch": 0.9917666637470439, "grad_norm": 0.5336998671222939, "learning_rate": 4.574209245742093e-07, "loss": 0.5802, "step": 33969 }, { "epoch": 0.9917958599748913, "grad_norm": 0.5164858622740234, "learning_rate": 4.557988645579886e-07, "loss": 0.5889, "step": 33970 }, { "epoch": 0.9918250562027386, "grad_norm": 0.5721482933306105, "learning_rate": 4.541768045417681e-07, "loss": 0.6304, "step": 33971 }, { "epoch": 0.991854252430586, "grad_norm": 0.5058590859822516, "learning_rate": 4.525547445255475e-07, "loss": 0.5373, "step": 33972 }, { "epoch": 0.9918834486584334, "grad_norm": 0.5088454415452517, "learning_rate": 4.509326845093268e-07, "loss": 0.5044, "step": 33973 }, { "epoch": 0.9919126448862807, "grad_norm": 0.5480566960106439, "learning_rate": 4.493106244931063e-07, "loss": 0.572, "step": 33974 }, { "epoch": 0.9919418411141281, "grad_norm": 0.6259282889154699, "learning_rate": 4.476885644768856e-07, "loss": 0.5833, "step": 33975 }, { "epoch": 0.9919710373419754, "grad_norm": 0.5250803275013507, "learning_rate": 4.460665044606651e-07, "loss": 0.5791, "step": 33976 }, { "epoch": 0.9920002335698228, "grad_norm": 0.4951947831283136, "learning_rate": 4.444444444444445e-07, "loss": 0.5048, "step": 33977 }, { "epoch": 0.9920294297976702, "grad_norm": 0.5547794956740298, "learning_rate": 4.428223844282238e-07, "loss": 0.5412, "step": 33978 }, { "epoch": 0.9920586260255175, "grad_norm": 0.5161600866502569, "learning_rate": 4.412003244120033e-07, "loss": 0.5147, "step": 33979 }, { "epoch": 0.9920878222533649, "grad_norm": 0.5429541726463644, "learning_rate": 4.3957826439578263e-07, "loss": 0.6046, "step": 33980 }, { "epoch": 0.9921170184812123, "grad_norm": 0.5142287866238212, "learning_rate": 4.379562043795621e-07, "loss": 0.5264, "step": 33981 }, { "epoch": 0.9921462147090596, "grad_norm": 0.5556710275565466, "learning_rate": 4.363341443633415e-07, "loss": 0.6044, "step": 33982 }, { "epoch": 0.992175410936907, "grad_norm": 0.5187253356420312, "learning_rate": 4.347120843471208e-07, "loss": 0.5319, "step": 33983 }, { "epoch": 0.9922046071647543, "grad_norm": 0.5228678937191836, "learning_rate": 4.330900243309003e-07, "loss": 0.5638, "step": 33984 }, { "epoch": 0.9922338033926017, "grad_norm": 0.512705454433513, "learning_rate": 4.3146796431467963e-07, "loss": 0.5422, "step": 33985 }, { "epoch": 0.9922629996204491, "grad_norm": 0.5431498186594476, "learning_rate": 4.298459042984591e-07, "loss": 0.6047, "step": 33986 }, { "epoch": 0.9922921958482964, "grad_norm": 0.5152494720886729, "learning_rate": 4.2822384428223845e-07, "loss": 0.5336, "step": 33987 }, { "epoch": 0.9923213920761438, "grad_norm": 0.47579012480370053, "learning_rate": 4.266017842660178e-07, "loss": 0.4658, "step": 33988 }, { "epoch": 0.9923505883039911, "grad_norm": 0.5131756758285214, "learning_rate": 4.249797242497973e-07, "loss": 0.5558, "step": 33989 }, { "epoch": 0.9923797845318385, "grad_norm": 0.5510045638461797, "learning_rate": 4.2335766423357664e-07, "loss": 0.6067, "step": 33990 }, { "epoch": 0.9924089807596859, "grad_norm": 0.5180100735948604, "learning_rate": 4.21735604217356e-07, "loss": 0.5785, "step": 33991 }, { "epoch": 0.9924381769875332, "grad_norm": 0.49510803075386023, "learning_rate": 4.2011354420113545e-07, "loss": 0.4897, "step": 33992 }, { "epoch": 0.9924673732153806, "grad_norm": 0.5115204486059193, "learning_rate": 4.1849148418491483e-07, "loss": 0.5366, "step": 33993 }, { "epoch": 0.992496569443228, "grad_norm": 0.5132841463026154, "learning_rate": 4.168694241686943e-07, "loss": 0.5584, "step": 33994 }, { "epoch": 0.9925257656710753, "grad_norm": 0.5453727664952054, "learning_rate": 4.1524736415247364e-07, "loss": 0.6174, "step": 33995 }, { "epoch": 0.9925549618989227, "grad_norm": 0.519263736963158, "learning_rate": 4.13625304136253e-07, "loss": 0.5527, "step": 33996 }, { "epoch": 0.99258415812677, "grad_norm": 0.5302197518848706, "learning_rate": 4.1200324412003246e-07, "loss": 0.5226, "step": 33997 }, { "epoch": 0.9926133543546174, "grad_norm": 0.5385302459476758, "learning_rate": 4.1038118410381183e-07, "loss": 0.5603, "step": 33998 }, { "epoch": 0.9926425505824648, "grad_norm": 0.5407293387873477, "learning_rate": 4.087591240875913e-07, "loss": 0.5779, "step": 33999 }, { "epoch": 0.9926717468103121, "grad_norm": 0.5508947584978664, "learning_rate": 4.0713706407137065e-07, "loss": 0.616, "step": 34000 }, { "epoch": 0.9927009430381595, "grad_norm": 0.5245870759906659, "learning_rate": 4.0551500405515003e-07, "loss": 0.5848, "step": 34001 }, { "epoch": 0.9927301392660068, "grad_norm": 0.4965435752213586, "learning_rate": 4.0389294403892946e-07, "loss": 0.497, "step": 34002 }, { "epoch": 0.9927593354938542, "grad_norm": 0.5211955597570795, "learning_rate": 4.0227088402270884e-07, "loss": 0.5792, "step": 34003 }, { "epoch": 0.9927885317217016, "grad_norm": 0.5334290500345695, "learning_rate": 4.0064882400648827e-07, "loss": 0.6062, "step": 34004 }, { "epoch": 0.9928177279495489, "grad_norm": 0.5517785762494674, "learning_rate": 3.9902676399026765e-07, "loss": 0.5979, "step": 34005 }, { "epoch": 0.9928469241773963, "grad_norm": 0.5457715487903719, "learning_rate": 3.9740470397404703e-07, "loss": 0.5959, "step": 34006 }, { "epoch": 0.9928761204052436, "grad_norm": 0.5302604354133894, "learning_rate": 3.9578264395782646e-07, "loss": 0.5692, "step": 34007 }, { "epoch": 0.992905316633091, "grad_norm": 0.5288641963362061, "learning_rate": 3.9416058394160584e-07, "loss": 0.5572, "step": 34008 }, { "epoch": 0.9929345128609384, "grad_norm": 0.5343846481495111, "learning_rate": 3.925385239253853e-07, "loss": 0.5634, "step": 34009 }, { "epoch": 0.9929637090887857, "grad_norm": 0.5207941487868877, "learning_rate": 3.9091646390916466e-07, "loss": 0.6056, "step": 34010 }, { "epoch": 0.9929929053166331, "grad_norm": 0.5123878007933745, "learning_rate": 3.892944038929441e-07, "loss": 0.5231, "step": 34011 }, { "epoch": 0.9930221015444805, "grad_norm": 0.5487437872391892, "learning_rate": 3.876723438767234e-07, "loss": 0.6368, "step": 34012 }, { "epoch": 0.9930512977723278, "grad_norm": 0.5258457107311637, "learning_rate": 3.8605028386050285e-07, "loss": 0.5534, "step": 34013 }, { "epoch": 0.9930804940001752, "grad_norm": 0.5169182449498105, "learning_rate": 3.844282238442823e-07, "loss": 0.5551, "step": 34014 }, { "epoch": 0.9931096902280225, "grad_norm": 0.5286438326559881, "learning_rate": 3.8280616382806166e-07, "loss": 0.5769, "step": 34015 }, { "epoch": 0.9931388864558699, "grad_norm": 0.5633910148569379, "learning_rate": 3.8118410381184104e-07, "loss": 0.6209, "step": 34016 }, { "epoch": 0.9931680826837173, "grad_norm": 0.5356796488061529, "learning_rate": 3.795620437956204e-07, "loss": 0.5816, "step": 34017 }, { "epoch": 0.9931972789115646, "grad_norm": 0.5011784219787118, "learning_rate": 3.7793998377939985e-07, "loss": 0.5429, "step": 34018 }, { "epoch": 0.993226475139412, "grad_norm": 0.5333932301360708, "learning_rate": 3.763179237631793e-07, "loss": 0.5353, "step": 34019 }, { "epoch": 0.9932556713672593, "grad_norm": 0.6045284049241877, "learning_rate": 3.7469586374695867e-07, "loss": 0.7083, "step": 34020 }, { "epoch": 0.9932848675951067, "grad_norm": 0.525889296217136, "learning_rate": 3.7307380373073805e-07, "loss": 0.5266, "step": 34021 }, { "epoch": 0.9933140638229541, "grad_norm": 0.5023645102492923, "learning_rate": 3.714517437145174e-07, "loss": 0.5267, "step": 34022 }, { "epoch": 0.9933432600508014, "grad_norm": 0.5239950975294656, "learning_rate": 3.6982968369829686e-07, "loss": 0.5568, "step": 34023 }, { "epoch": 0.9933724562786488, "grad_norm": 0.5506355938599279, "learning_rate": 3.682076236820763e-07, "loss": 0.638, "step": 34024 }, { "epoch": 0.9934016525064961, "grad_norm": 0.5441782700933165, "learning_rate": 3.665855636658556e-07, "loss": 0.5911, "step": 34025 }, { "epoch": 0.9934308487343435, "grad_norm": 0.5176138588734195, "learning_rate": 3.6496350364963505e-07, "loss": 0.5355, "step": 34026 }, { "epoch": 0.9934600449621909, "grad_norm": 0.5764314917086437, "learning_rate": 3.6334144363341443e-07, "loss": 0.6483, "step": 34027 }, { "epoch": 0.9934892411900382, "grad_norm": 0.5417440709778754, "learning_rate": 3.6171938361719386e-07, "loss": 0.5946, "step": 34028 }, { "epoch": 0.9935184374178856, "grad_norm": 0.5437110524379376, "learning_rate": 3.600973236009733e-07, "loss": 0.5961, "step": 34029 }, { "epoch": 0.993547633645733, "grad_norm": 0.5246762956043601, "learning_rate": 3.584752635847526e-07, "loss": 0.5681, "step": 34030 }, { "epoch": 0.9935768298735803, "grad_norm": 0.5470590700452733, "learning_rate": 3.5685320356853206e-07, "loss": 0.5756, "step": 34031 }, { "epoch": 0.9936060261014277, "grad_norm": 0.5221321223584537, "learning_rate": 3.5523114355231144e-07, "loss": 0.5561, "step": 34032 }, { "epoch": 0.993635222329275, "grad_norm": 0.517387425619394, "learning_rate": 3.5360908353609087e-07, "loss": 0.5626, "step": 34033 }, { "epoch": 0.9936644185571224, "grad_norm": 0.5058399700547407, "learning_rate": 3.5198702351987025e-07, "loss": 0.536, "step": 34034 }, { "epoch": 0.9936936147849698, "grad_norm": 0.5164687608875544, "learning_rate": 3.5036496350364963e-07, "loss": 0.5681, "step": 34035 }, { "epoch": 0.9937228110128171, "grad_norm": 0.5090294602060796, "learning_rate": 3.4874290348742906e-07, "loss": 0.5245, "step": 34036 }, { "epoch": 0.9937520072406645, "grad_norm": 0.5332353181550626, "learning_rate": 3.4712084347120844e-07, "loss": 0.5304, "step": 34037 }, { "epoch": 0.9937812034685118, "grad_norm": 0.5706327032641855, "learning_rate": 3.4549878345498787e-07, "loss": 0.6343, "step": 34038 }, { "epoch": 0.9938103996963592, "grad_norm": 0.5489870163998962, "learning_rate": 3.4387672343876725e-07, "loss": 0.6277, "step": 34039 }, { "epoch": 0.9938395959242066, "grad_norm": 0.5396059056493094, "learning_rate": 3.4225466342254663e-07, "loss": 0.6052, "step": 34040 }, { "epoch": 0.9938687921520539, "grad_norm": 0.5583095464149856, "learning_rate": 3.4063260340632607e-07, "loss": 0.5802, "step": 34041 }, { "epoch": 0.9938979883799013, "grad_norm": 0.547080771761547, "learning_rate": 3.3901054339010544e-07, "loss": 0.5994, "step": 34042 }, { "epoch": 0.9939271846077486, "grad_norm": 0.5927648669890899, "learning_rate": 3.373884833738849e-07, "loss": 0.6752, "step": 34043 }, { "epoch": 0.993956380835596, "grad_norm": 0.5280210921760681, "learning_rate": 3.3576642335766426e-07, "loss": 0.5912, "step": 34044 }, { "epoch": 0.9939855770634434, "grad_norm": 0.4874273100939237, "learning_rate": 3.3414436334144364e-07, "loss": 0.4953, "step": 34045 }, { "epoch": 0.9940147732912907, "grad_norm": 0.49570806463445044, "learning_rate": 3.3252230332522307e-07, "loss": 0.535, "step": 34046 }, { "epoch": 0.9940439695191381, "grad_norm": 0.49736760698130983, "learning_rate": 3.3090024330900245e-07, "loss": 0.5224, "step": 34047 }, { "epoch": 0.9940731657469855, "grad_norm": 0.515926517377902, "learning_rate": 3.2927818329278183e-07, "loss": 0.5344, "step": 34048 }, { "epoch": 0.9941023619748328, "grad_norm": 0.5008297554335593, "learning_rate": 3.2765612327656126e-07, "loss": 0.5254, "step": 34049 }, { "epoch": 0.9941315582026802, "grad_norm": 0.4999760887241476, "learning_rate": 3.2603406326034064e-07, "loss": 0.5362, "step": 34050 }, { "epoch": 0.9941607544305275, "grad_norm": 0.5616239867756982, "learning_rate": 3.244120032441201e-07, "loss": 0.6318, "step": 34051 }, { "epoch": 0.9941899506583749, "grad_norm": 0.5927831787766261, "learning_rate": 3.2278994322789945e-07, "loss": 0.585, "step": 34052 }, { "epoch": 0.9942191468862223, "grad_norm": 0.48539872504191056, "learning_rate": 3.2116788321167883e-07, "loss": 0.4667, "step": 34053 }, { "epoch": 0.9942483431140696, "grad_norm": 0.5198121926219812, "learning_rate": 3.1954582319545827e-07, "loss": 0.5596, "step": 34054 }, { "epoch": 0.994277539341917, "grad_norm": 0.5271306788809683, "learning_rate": 3.1792376317923765e-07, "loss": 0.5314, "step": 34055 }, { "epoch": 0.9943067355697643, "grad_norm": 0.5336058339714483, "learning_rate": 3.163017031630171e-07, "loss": 0.5772, "step": 34056 }, { "epoch": 0.9943359317976117, "grad_norm": 0.5406614156394519, "learning_rate": 3.146796431467964e-07, "loss": 0.5739, "step": 34057 }, { "epoch": 0.9943651280254591, "grad_norm": 0.53740644437689, "learning_rate": 3.1305758313057584e-07, "loss": 0.6117, "step": 34058 }, { "epoch": 0.9943943242533064, "grad_norm": 0.5463006176082078, "learning_rate": 3.114355231143552e-07, "loss": 0.6125, "step": 34059 }, { "epoch": 0.9944235204811538, "grad_norm": 0.504753011929825, "learning_rate": 3.0981346309813465e-07, "loss": 0.5309, "step": 34060 }, { "epoch": 0.9944527167090011, "grad_norm": 0.5269398249894124, "learning_rate": 3.081914030819141e-07, "loss": 0.5763, "step": 34061 }, { "epoch": 0.9944819129368485, "grad_norm": 0.5716457429314529, "learning_rate": 3.065693430656934e-07, "loss": 0.6553, "step": 34062 }, { "epoch": 0.9945111091646959, "grad_norm": 0.5044602811382404, "learning_rate": 3.0494728304947284e-07, "loss": 0.5056, "step": 34063 }, { "epoch": 0.9945403053925432, "grad_norm": 0.546794242863498, "learning_rate": 3.033252230332522e-07, "loss": 0.5866, "step": 34064 }, { "epoch": 0.9945695016203906, "grad_norm": 0.5220956152815601, "learning_rate": 3.0170316301703166e-07, "loss": 0.5515, "step": 34065 }, { "epoch": 0.994598697848238, "grad_norm": 0.5264880497365504, "learning_rate": 3.000811030008111e-07, "loss": 0.5858, "step": 34066 }, { "epoch": 0.9946278940760853, "grad_norm": 0.5038540270793133, "learning_rate": 2.984590429845904e-07, "loss": 0.5088, "step": 34067 }, { "epoch": 0.9946570903039327, "grad_norm": 0.520365453480153, "learning_rate": 2.9683698296836985e-07, "loss": 0.5535, "step": 34068 }, { "epoch": 0.99468628653178, "grad_norm": 0.5209586462179053, "learning_rate": 2.9521492295214923e-07, "loss": 0.5392, "step": 34069 }, { "epoch": 0.9947154827596274, "grad_norm": 0.5341851644393054, "learning_rate": 2.9359286293592866e-07, "loss": 0.5641, "step": 34070 }, { "epoch": 0.9947446789874749, "grad_norm": 0.5342408954136152, "learning_rate": 2.9197080291970804e-07, "loss": 0.5763, "step": 34071 }, { "epoch": 0.9947738752153222, "grad_norm": 0.5149265579589805, "learning_rate": 2.903487429034874e-07, "loss": 0.5267, "step": 34072 }, { "epoch": 0.9948030714431696, "grad_norm": 0.5245657227347956, "learning_rate": 2.8872668288726685e-07, "loss": 0.5651, "step": 34073 }, { "epoch": 0.994832267671017, "grad_norm": 0.5154300154611019, "learning_rate": 2.8710462287104623e-07, "loss": 0.5487, "step": 34074 }, { "epoch": 0.9948614638988643, "grad_norm": 0.5524951848293593, "learning_rate": 2.8548256285482567e-07, "loss": 0.6096, "step": 34075 }, { "epoch": 0.9948906601267117, "grad_norm": 0.5513471225520353, "learning_rate": 2.8386050283860505e-07, "loss": 0.6675, "step": 34076 }, { "epoch": 0.994919856354559, "grad_norm": 0.5372782185930556, "learning_rate": 2.822384428223844e-07, "loss": 0.5842, "step": 34077 }, { "epoch": 0.9949490525824064, "grad_norm": 0.5112599465015079, "learning_rate": 2.8061638280616386e-07, "loss": 0.5078, "step": 34078 }, { "epoch": 0.9949782488102538, "grad_norm": 0.5523696342201725, "learning_rate": 2.7899432278994324e-07, "loss": 0.6206, "step": 34079 }, { "epoch": 0.9950074450381011, "grad_norm": 0.5200113664523919, "learning_rate": 2.773722627737226e-07, "loss": 0.57, "step": 34080 }, { "epoch": 0.9950366412659485, "grad_norm": 0.5502284768322968, "learning_rate": 2.7575020275750205e-07, "loss": 0.6161, "step": 34081 }, { "epoch": 0.9950658374937958, "grad_norm": 0.5451042102444378, "learning_rate": 2.7412814274128143e-07, "loss": 0.6091, "step": 34082 }, { "epoch": 0.9950950337216432, "grad_norm": 0.5237001196366748, "learning_rate": 2.7250608272506086e-07, "loss": 0.5338, "step": 34083 }, { "epoch": 0.9951242299494906, "grad_norm": 0.5578130384993026, "learning_rate": 2.7088402270884024e-07, "loss": 0.6742, "step": 34084 }, { "epoch": 0.9951534261773379, "grad_norm": 0.5551970333975592, "learning_rate": 2.692619626926196e-07, "loss": 0.5893, "step": 34085 }, { "epoch": 0.9951826224051853, "grad_norm": 0.5308071465891289, "learning_rate": 2.6763990267639905e-07, "loss": 0.585, "step": 34086 }, { "epoch": 0.9952118186330327, "grad_norm": 0.5221415976284454, "learning_rate": 2.6601784266017843e-07, "loss": 0.5703, "step": 34087 }, { "epoch": 0.99524101486088, "grad_norm": 0.535758341267185, "learning_rate": 2.6439578264395787e-07, "loss": 0.5579, "step": 34088 }, { "epoch": 0.9952702110887274, "grad_norm": 0.531410062498169, "learning_rate": 2.627737226277372e-07, "loss": 0.568, "step": 34089 }, { "epoch": 0.9952994073165747, "grad_norm": 0.5295514076954199, "learning_rate": 2.6115166261151663e-07, "loss": 0.586, "step": 34090 }, { "epoch": 0.9953286035444221, "grad_norm": 0.5057254912526598, "learning_rate": 2.5952960259529606e-07, "loss": 0.5154, "step": 34091 }, { "epoch": 0.9953577997722695, "grad_norm": 0.549646458467044, "learning_rate": 2.5790754257907544e-07, "loss": 0.6235, "step": 34092 }, { "epoch": 0.9953869960001168, "grad_norm": 0.5626713444136487, "learning_rate": 2.5628548256285487e-07, "loss": 0.6497, "step": 34093 }, { "epoch": 0.9954161922279642, "grad_norm": 0.5567724965363642, "learning_rate": 2.546634225466342e-07, "loss": 0.6166, "step": 34094 }, { "epoch": 0.9954453884558115, "grad_norm": 0.5512065084879844, "learning_rate": 2.5304136253041363e-07, "loss": 0.609, "step": 34095 }, { "epoch": 0.9954745846836589, "grad_norm": 0.5375268286507572, "learning_rate": 2.5141930251419306e-07, "loss": 0.5775, "step": 34096 }, { "epoch": 0.9955037809115063, "grad_norm": 0.5252955400575398, "learning_rate": 2.4979724249797244e-07, "loss": 0.5821, "step": 34097 }, { "epoch": 0.9955329771393536, "grad_norm": 0.5840415403327235, "learning_rate": 2.481751824817519e-07, "loss": 0.5386, "step": 34098 }, { "epoch": 0.995562173367201, "grad_norm": 0.5296048926168412, "learning_rate": 2.465531224655312e-07, "loss": 0.5428, "step": 34099 }, { "epoch": 0.9955913695950483, "grad_norm": 0.518215049484714, "learning_rate": 2.4493106244931064e-07, "loss": 0.542, "step": 34100 }, { "epoch": 0.9956205658228957, "grad_norm": 0.5030187535092631, "learning_rate": 2.4330900243309e-07, "loss": 0.5534, "step": 34101 }, { "epoch": 0.9956497620507431, "grad_norm": 0.5570849793577611, "learning_rate": 2.4168694241686945e-07, "loss": 0.6038, "step": 34102 }, { "epoch": 0.9956789582785904, "grad_norm": 0.5207072017187495, "learning_rate": 2.4006488240064883e-07, "loss": 0.5346, "step": 34103 }, { "epoch": 0.9957081545064378, "grad_norm": 0.5097050133582006, "learning_rate": 2.384428223844282e-07, "loss": 0.5095, "step": 34104 }, { "epoch": 0.9957373507342852, "grad_norm": 0.5319399852686842, "learning_rate": 2.3682076236820764e-07, "loss": 0.5851, "step": 34105 }, { "epoch": 0.9957665469621325, "grad_norm": 0.5005077570904327, "learning_rate": 2.3519870235198705e-07, "loss": 0.558, "step": 34106 }, { "epoch": 0.9957957431899799, "grad_norm": 0.5635243248084364, "learning_rate": 2.3357664233576645e-07, "loss": 0.6471, "step": 34107 }, { "epoch": 0.9958249394178272, "grad_norm": 0.5849852446755681, "learning_rate": 2.3195458231954583e-07, "loss": 0.6318, "step": 34108 }, { "epoch": 0.9958541356456746, "grad_norm": 0.48184445302724427, "learning_rate": 2.3033252230332524e-07, "loss": 0.4741, "step": 34109 }, { "epoch": 0.995883331873522, "grad_norm": 0.5015181586824122, "learning_rate": 2.2871046228710465e-07, "loss": 0.5211, "step": 34110 }, { "epoch": 0.9959125281013693, "grad_norm": 0.5364636925290365, "learning_rate": 2.2708840227088405e-07, "loss": 0.5945, "step": 34111 }, { "epoch": 0.9959417243292167, "grad_norm": 0.5356069307863189, "learning_rate": 2.254663422546634e-07, "loss": 0.5606, "step": 34112 }, { "epoch": 0.995970920557064, "grad_norm": 0.48008771885235024, "learning_rate": 2.238442822384428e-07, "loss": 0.4833, "step": 34113 }, { "epoch": 0.9960001167849114, "grad_norm": 0.5112967783685207, "learning_rate": 2.2222222222222224e-07, "loss": 0.5736, "step": 34114 }, { "epoch": 0.9960293130127588, "grad_norm": 0.5483797202367049, "learning_rate": 2.2060016220600165e-07, "loss": 0.6345, "step": 34115 }, { "epoch": 0.9960585092406061, "grad_norm": 0.505157228300361, "learning_rate": 2.1897810218978106e-07, "loss": 0.5228, "step": 34116 }, { "epoch": 0.9960877054684535, "grad_norm": 0.5508260327939235, "learning_rate": 2.173560421735604e-07, "loss": 0.5943, "step": 34117 }, { "epoch": 0.9961169016963008, "grad_norm": 0.4815382627826161, "learning_rate": 2.1573398215733982e-07, "loss": 0.4895, "step": 34118 }, { "epoch": 0.9961460979241482, "grad_norm": 0.5334711870544063, "learning_rate": 2.1411192214111922e-07, "loss": 0.5522, "step": 34119 }, { "epoch": 0.9961752941519956, "grad_norm": 0.5086371972641649, "learning_rate": 2.1248986212489866e-07, "loss": 0.5351, "step": 34120 }, { "epoch": 0.9962044903798429, "grad_norm": 0.5157541952698318, "learning_rate": 2.10867802108678e-07, "loss": 0.5477, "step": 34121 }, { "epoch": 0.9962336866076903, "grad_norm": 0.5585911307869958, "learning_rate": 2.0924574209245742e-07, "loss": 0.6435, "step": 34122 }, { "epoch": 0.9962628828355377, "grad_norm": 0.5422715249089275, "learning_rate": 2.0762368207623682e-07, "loss": 0.6292, "step": 34123 }, { "epoch": 0.996292079063385, "grad_norm": 0.5709536375042459, "learning_rate": 2.0600162206001623e-07, "loss": 0.6129, "step": 34124 }, { "epoch": 0.9963212752912324, "grad_norm": 0.5344275470678522, "learning_rate": 2.0437956204379566e-07, "loss": 0.6028, "step": 34125 }, { "epoch": 0.9963504715190797, "grad_norm": 0.5524253210125203, "learning_rate": 2.0275750202757501e-07, "loss": 0.6508, "step": 34126 }, { "epoch": 0.9963796677469271, "grad_norm": 0.5087532843063319, "learning_rate": 2.0113544201135442e-07, "loss": 0.5794, "step": 34127 }, { "epoch": 0.9964088639747745, "grad_norm": 0.537778157090508, "learning_rate": 1.9951338199513383e-07, "loss": 0.6029, "step": 34128 }, { "epoch": 0.9964380602026218, "grad_norm": 0.5348313965722338, "learning_rate": 1.9789132197891323e-07, "loss": 0.5403, "step": 34129 }, { "epoch": 0.9964672564304692, "grad_norm": 0.5469902599241623, "learning_rate": 1.9626926196269264e-07, "loss": 0.5861, "step": 34130 }, { "epoch": 0.9964964526583165, "grad_norm": 0.5081851113589274, "learning_rate": 1.9464720194647204e-07, "loss": 0.5283, "step": 34131 }, { "epoch": 0.9965256488861639, "grad_norm": 0.5002242548934153, "learning_rate": 1.9302514193025142e-07, "loss": 0.5354, "step": 34132 }, { "epoch": 0.9965548451140113, "grad_norm": 0.5050155559332361, "learning_rate": 1.9140308191403083e-07, "loss": 0.539, "step": 34133 }, { "epoch": 0.9965840413418586, "grad_norm": 0.5320633448532185, "learning_rate": 1.897810218978102e-07, "loss": 0.5913, "step": 34134 }, { "epoch": 0.996613237569706, "grad_norm": 0.5854185737985123, "learning_rate": 1.8815896188158964e-07, "loss": 0.6018, "step": 34135 }, { "epoch": 0.9966424337975534, "grad_norm": 0.5550713431769756, "learning_rate": 1.8653690186536902e-07, "loss": 0.5942, "step": 34136 }, { "epoch": 0.9966716300254007, "grad_norm": 0.5239070340906213, "learning_rate": 1.8491484184914843e-07, "loss": 0.5619, "step": 34137 }, { "epoch": 0.9967008262532481, "grad_norm": 0.5211862928365928, "learning_rate": 1.832927818329278e-07, "loss": 0.5775, "step": 34138 }, { "epoch": 0.9967300224810954, "grad_norm": 0.5357286408772939, "learning_rate": 1.8167072181670722e-07, "loss": 0.5597, "step": 34139 }, { "epoch": 0.9967592187089428, "grad_norm": 0.5275446578595158, "learning_rate": 1.8004866180048665e-07, "loss": 0.5777, "step": 34140 }, { "epoch": 0.9967884149367902, "grad_norm": 0.4995557051552379, "learning_rate": 1.7842660178426603e-07, "loss": 0.5239, "step": 34141 }, { "epoch": 0.9968176111646375, "grad_norm": 0.5273923508349302, "learning_rate": 1.7680454176804543e-07, "loss": 0.5717, "step": 34142 }, { "epoch": 0.9968468073924849, "grad_norm": 0.5205146129907466, "learning_rate": 1.7518248175182481e-07, "loss": 0.5234, "step": 34143 }, { "epoch": 0.9968760036203322, "grad_norm": 0.5557940647492532, "learning_rate": 1.7356042173560422e-07, "loss": 0.5939, "step": 34144 }, { "epoch": 0.9969051998481796, "grad_norm": 0.5278741675767147, "learning_rate": 1.7193836171938363e-07, "loss": 0.5963, "step": 34145 }, { "epoch": 0.996934396076027, "grad_norm": 0.5211769385387116, "learning_rate": 1.7031630170316303e-07, "loss": 0.5519, "step": 34146 }, { "epoch": 0.9969635923038743, "grad_norm": 0.5227197204726153, "learning_rate": 1.6869424168694244e-07, "loss": 0.5719, "step": 34147 }, { "epoch": 0.9969927885317217, "grad_norm": 0.5407572001457136, "learning_rate": 1.6707218167072182e-07, "loss": 0.5995, "step": 34148 }, { "epoch": 0.997021984759569, "grad_norm": 0.5267795983570167, "learning_rate": 1.6545012165450122e-07, "loss": 0.5467, "step": 34149 }, { "epoch": 0.9970511809874164, "grad_norm": 0.517036395000485, "learning_rate": 1.6382806163828063e-07, "loss": 0.5428, "step": 34150 }, { "epoch": 0.9970803772152638, "grad_norm": 0.5821064510193545, "learning_rate": 1.6220600162206004e-07, "loss": 0.6736, "step": 34151 }, { "epoch": 0.9971095734431111, "grad_norm": 0.5194621299573828, "learning_rate": 1.6058394160583942e-07, "loss": 0.5689, "step": 34152 }, { "epoch": 0.9971387696709585, "grad_norm": 0.5174170526309066, "learning_rate": 1.5896188158961882e-07, "loss": 0.5524, "step": 34153 }, { "epoch": 0.9971679658988059, "grad_norm": 0.5054548469247393, "learning_rate": 1.573398215733982e-07, "loss": 0.5242, "step": 34154 }, { "epoch": 0.9971971621266532, "grad_norm": 0.5463461417357495, "learning_rate": 1.557177615571776e-07, "loss": 0.5957, "step": 34155 }, { "epoch": 0.9972263583545006, "grad_norm": 0.5335795424210831, "learning_rate": 1.5409570154095704e-07, "loss": 0.5997, "step": 34156 }, { "epoch": 0.9972555545823479, "grad_norm": 0.5095984529105987, "learning_rate": 1.5247364152473642e-07, "loss": 0.5516, "step": 34157 }, { "epoch": 0.9972847508101953, "grad_norm": 0.5483529981432727, "learning_rate": 1.5085158150851583e-07, "loss": 0.6436, "step": 34158 }, { "epoch": 0.9973139470380427, "grad_norm": 0.5236682179129428, "learning_rate": 1.492295214922952e-07, "loss": 0.6126, "step": 34159 }, { "epoch": 0.99734314326589, "grad_norm": 0.5286806305792261, "learning_rate": 1.4760746147607461e-07, "loss": 0.5875, "step": 34160 }, { "epoch": 0.9973723394937374, "grad_norm": 0.5542243949453792, "learning_rate": 1.4598540145985402e-07, "loss": 0.603, "step": 34161 }, { "epoch": 0.9974015357215847, "grad_norm": 0.5685626006137457, "learning_rate": 1.4436334144363343e-07, "loss": 0.6252, "step": 34162 }, { "epoch": 0.9974307319494321, "grad_norm": 0.5296445555211424, "learning_rate": 1.4274128142741283e-07, "loss": 0.5813, "step": 34163 }, { "epoch": 0.9974599281772795, "grad_norm": 0.5341832246054292, "learning_rate": 1.411192214111922e-07, "loss": 0.593, "step": 34164 }, { "epoch": 0.9974891244051268, "grad_norm": 0.596474204671622, "learning_rate": 1.3949716139497162e-07, "loss": 0.6817, "step": 34165 }, { "epoch": 0.9975183206329742, "grad_norm": 0.550309006230678, "learning_rate": 1.3787510137875103e-07, "loss": 0.6586, "step": 34166 }, { "epoch": 0.9975475168608215, "grad_norm": 0.5210077620463675, "learning_rate": 1.3625304136253043e-07, "loss": 0.5603, "step": 34167 }, { "epoch": 0.9975767130886689, "grad_norm": 0.5323127953551644, "learning_rate": 1.346309813463098e-07, "loss": 0.604, "step": 34168 }, { "epoch": 0.9976059093165163, "grad_norm": 0.485205899116622, "learning_rate": 1.3300892133008922e-07, "loss": 0.4578, "step": 34169 }, { "epoch": 0.9976351055443636, "grad_norm": 0.5597490258160234, "learning_rate": 1.313868613138686e-07, "loss": 0.5776, "step": 34170 }, { "epoch": 0.997664301772211, "grad_norm": 0.5627818045688517, "learning_rate": 1.2976480129764803e-07, "loss": 0.6092, "step": 34171 }, { "epoch": 0.9976934980000584, "grad_norm": 0.5723676471103621, "learning_rate": 1.2814274128142744e-07, "loss": 0.6643, "step": 34172 }, { "epoch": 0.9977226942279057, "grad_norm": 0.5481516574421946, "learning_rate": 1.2652068126520682e-07, "loss": 0.6289, "step": 34173 }, { "epoch": 0.9977518904557531, "grad_norm": 0.5522145893790503, "learning_rate": 1.2489862124898622e-07, "loss": 0.6158, "step": 34174 }, { "epoch": 0.9977810866836004, "grad_norm": 0.5345931975319063, "learning_rate": 1.232765612327656e-07, "loss": 0.5784, "step": 34175 }, { "epoch": 0.9978102829114478, "grad_norm": 0.5170264631792729, "learning_rate": 1.21654501216545e-07, "loss": 0.5355, "step": 34176 }, { "epoch": 0.9978394791392952, "grad_norm": 0.5596399868632458, "learning_rate": 1.2003244120032441e-07, "loss": 0.6259, "step": 34177 }, { "epoch": 0.9978686753671425, "grad_norm": 0.5319809318306405, "learning_rate": 1.1841038118410382e-07, "loss": 0.6064, "step": 34178 }, { "epoch": 0.9978978715949899, "grad_norm": 0.535679925657208, "learning_rate": 1.1678832116788323e-07, "loss": 0.5837, "step": 34179 }, { "epoch": 0.9979270678228372, "grad_norm": 0.507781449001054, "learning_rate": 1.1516626115166262e-07, "loss": 0.5564, "step": 34180 }, { "epoch": 0.9979562640506846, "grad_norm": 0.5260119250669482, "learning_rate": 1.1354420113544203e-07, "loss": 0.5867, "step": 34181 }, { "epoch": 0.997985460278532, "grad_norm": 0.5393261268000867, "learning_rate": 1.119221411192214e-07, "loss": 0.5153, "step": 34182 }, { "epoch": 0.9980146565063793, "grad_norm": 0.5323020965775882, "learning_rate": 1.1030008110300083e-07, "loss": 0.5628, "step": 34183 }, { "epoch": 0.9980438527342267, "grad_norm": 0.5502963332047272, "learning_rate": 1.086780210867802e-07, "loss": 0.6067, "step": 34184 }, { "epoch": 0.998073048962074, "grad_norm": 0.5318508648244329, "learning_rate": 1.0705596107055961e-07, "loss": 0.5615, "step": 34185 }, { "epoch": 0.9981022451899214, "grad_norm": 0.5126644693963319, "learning_rate": 1.05433901054339e-07, "loss": 0.5514, "step": 34186 }, { "epoch": 0.9981314414177688, "grad_norm": 0.5443953296196657, "learning_rate": 1.0381184103811841e-07, "loss": 0.5822, "step": 34187 }, { "epoch": 0.9981606376456161, "grad_norm": 0.47477039033896107, "learning_rate": 1.0218978102189783e-07, "loss": 0.479, "step": 34188 }, { "epoch": 0.9981898338734635, "grad_norm": 0.5201161959037346, "learning_rate": 1.0056772100567721e-07, "loss": 0.5591, "step": 34189 }, { "epoch": 0.9982190301013109, "grad_norm": 0.5043735850826346, "learning_rate": 9.894566098945662e-08, "loss": 0.5204, "step": 34190 }, { "epoch": 0.9982482263291583, "grad_norm": 0.5300687611525489, "learning_rate": 9.732360097323602e-08, "loss": 0.5794, "step": 34191 }, { "epoch": 0.9982774225570057, "grad_norm": 0.5537174199913726, "learning_rate": 9.570154095701542e-08, "loss": 0.6137, "step": 34192 }, { "epoch": 0.998306618784853, "grad_norm": 0.5692804723566481, "learning_rate": 9.407948094079482e-08, "loss": 0.6172, "step": 34193 }, { "epoch": 0.9983358150127004, "grad_norm": 0.5201666929565482, "learning_rate": 9.245742092457421e-08, "loss": 0.5408, "step": 34194 }, { "epoch": 0.9983650112405478, "grad_norm": 0.5209722056875367, "learning_rate": 9.083536090835361e-08, "loss": 0.5762, "step": 34195 }, { "epoch": 0.9983942074683951, "grad_norm": 0.5224341339385281, "learning_rate": 8.921330089213301e-08, "loss": 0.5489, "step": 34196 }, { "epoch": 0.9984234036962425, "grad_norm": 0.4985734687125628, "learning_rate": 8.759124087591241e-08, "loss": 0.5233, "step": 34197 }, { "epoch": 0.9984525999240899, "grad_norm": 0.5356923109716034, "learning_rate": 8.596918085969181e-08, "loss": 0.6041, "step": 34198 }, { "epoch": 0.9984817961519372, "grad_norm": 0.5939122474704892, "learning_rate": 8.434712084347122e-08, "loss": 0.6758, "step": 34199 }, { "epoch": 0.9985109923797846, "grad_norm": 0.5426617605981988, "learning_rate": 8.272506082725061e-08, "loss": 0.6216, "step": 34200 }, { "epoch": 0.9985401886076319, "grad_norm": 0.51493284400722, "learning_rate": 8.110300081103002e-08, "loss": 0.5574, "step": 34201 }, { "epoch": 0.9985693848354793, "grad_norm": 0.511271647587051, "learning_rate": 7.948094079480941e-08, "loss": 0.5404, "step": 34202 }, { "epoch": 0.9985985810633267, "grad_norm": 0.5108976969648747, "learning_rate": 7.78588807785888e-08, "loss": 0.5397, "step": 34203 }, { "epoch": 0.998627777291174, "grad_norm": 0.5370164928853589, "learning_rate": 7.623682076236821e-08, "loss": 0.5959, "step": 34204 }, { "epoch": 0.9986569735190214, "grad_norm": 0.5498164894906729, "learning_rate": 7.46147607461476e-08, "loss": 0.6009, "step": 34205 }, { "epoch": 0.9986861697468687, "grad_norm": 0.5298500534654464, "learning_rate": 7.299270072992701e-08, "loss": 0.5706, "step": 34206 }, { "epoch": 0.9987153659747161, "grad_norm": 0.5164471974863251, "learning_rate": 7.137064071370642e-08, "loss": 0.5298, "step": 34207 }, { "epoch": 0.9987445622025635, "grad_norm": 0.5417882015593067, "learning_rate": 6.974858069748581e-08, "loss": 0.5744, "step": 34208 }, { "epoch": 0.9987737584304108, "grad_norm": 0.5174850018115436, "learning_rate": 6.812652068126522e-08, "loss": 0.5351, "step": 34209 }, { "epoch": 0.9988029546582582, "grad_norm": 0.5314554289599851, "learning_rate": 6.650446066504461e-08, "loss": 0.5679, "step": 34210 }, { "epoch": 0.9988321508861056, "grad_norm": 0.6154025376628123, "learning_rate": 6.488240064882401e-08, "loss": 0.6511, "step": 34211 }, { "epoch": 0.9988613471139529, "grad_norm": 0.60026636809243, "learning_rate": 6.326034063260341e-08, "loss": 0.6936, "step": 34212 }, { "epoch": 0.9988905433418003, "grad_norm": 0.47526533027670037, "learning_rate": 6.16382806163828e-08, "loss": 0.499, "step": 34213 }, { "epoch": 0.9989197395696476, "grad_norm": 0.5466143792761072, "learning_rate": 6.001622060016221e-08, "loss": 0.6244, "step": 34214 }, { "epoch": 0.998948935797495, "grad_norm": 0.4973680888386574, "learning_rate": 5.8394160583941613e-08, "loss": 0.5151, "step": 34215 }, { "epoch": 0.9989781320253424, "grad_norm": 0.5630867560730695, "learning_rate": 5.677210056772101e-08, "loss": 0.6006, "step": 34216 }, { "epoch": 0.9990073282531897, "grad_norm": 0.5620105218048189, "learning_rate": 5.515004055150041e-08, "loss": 0.6067, "step": 34217 }, { "epoch": 0.9990365244810371, "grad_norm": 0.5560393590793722, "learning_rate": 5.3527980535279806e-08, "loss": 0.6725, "step": 34218 }, { "epoch": 0.9990657207088844, "grad_norm": 0.5328635225914202, "learning_rate": 5.1905920519059205e-08, "loss": 0.5952, "step": 34219 }, { "epoch": 0.9990949169367318, "grad_norm": 0.5653013015592522, "learning_rate": 5.0283860502838605e-08, "loss": 0.6261, "step": 34220 }, { "epoch": 0.9991241131645792, "grad_norm": 0.5009482966726518, "learning_rate": 4.866180048661801e-08, "loss": 0.4906, "step": 34221 }, { "epoch": 0.9991533093924265, "grad_norm": 0.5102760934406142, "learning_rate": 4.703974047039741e-08, "loss": 0.5582, "step": 34222 }, { "epoch": 0.9991825056202739, "grad_norm": 0.5139369532828939, "learning_rate": 4.5417680454176804e-08, "loss": 0.584, "step": 34223 }, { "epoch": 0.9992117018481212, "grad_norm": 0.5642784221079151, "learning_rate": 4.3795620437956203e-08, "loss": 0.6009, "step": 34224 }, { "epoch": 0.9992408980759686, "grad_norm": 0.5135529035967932, "learning_rate": 4.217356042173561e-08, "loss": 0.5278, "step": 34225 }, { "epoch": 0.999270094303816, "grad_norm": 0.5361172489131413, "learning_rate": 4.055150040551501e-08, "loss": 0.6027, "step": 34226 }, { "epoch": 0.9992992905316633, "grad_norm": 0.5379316745811364, "learning_rate": 3.89294403892944e-08, "loss": 0.5601, "step": 34227 }, { "epoch": 0.9993284867595107, "grad_norm": 0.540170515268606, "learning_rate": 3.73073803730738e-08, "loss": 0.5937, "step": 34228 }, { "epoch": 0.999357682987358, "grad_norm": 0.5349279735323974, "learning_rate": 3.568532035685321e-08, "loss": 0.5632, "step": 34229 }, { "epoch": 0.9993868792152054, "grad_norm": 0.5599082152902906, "learning_rate": 3.406326034063261e-08, "loss": 0.6474, "step": 34230 }, { "epoch": 0.9994160754430528, "grad_norm": 0.5128399278336012, "learning_rate": 3.244120032441201e-08, "loss": 0.5766, "step": 34231 }, { "epoch": 0.9994452716709001, "grad_norm": 0.4998489222082114, "learning_rate": 3.08191403081914e-08, "loss": 0.4933, "step": 34232 }, { "epoch": 0.9994744678987475, "grad_norm": 0.48470552024319336, "learning_rate": 2.9197080291970807e-08, "loss": 0.4845, "step": 34233 }, { "epoch": 0.9995036641265949, "grad_norm": 0.5394552105560116, "learning_rate": 2.7575020275750206e-08, "loss": 0.6054, "step": 34234 }, { "epoch": 0.9995328603544422, "grad_norm": 0.5432011776541867, "learning_rate": 2.5952960259529603e-08, "loss": 0.6012, "step": 34235 }, { "epoch": 0.9995620565822896, "grad_norm": 0.5237954040753826, "learning_rate": 2.4330900243309006e-08, "loss": 0.5717, "step": 34236 }, { "epoch": 0.9995912528101369, "grad_norm": 0.4951585288232567, "learning_rate": 2.2708840227088402e-08, "loss": 0.4938, "step": 34237 }, { "epoch": 0.9996204490379843, "grad_norm": 0.5517480928741285, "learning_rate": 2.1086780210867805e-08, "loss": 0.6072, "step": 34238 }, { "epoch": 0.9996496452658317, "grad_norm": 0.5585533183963877, "learning_rate": 1.94647201946472e-08, "loss": 0.6406, "step": 34239 }, { "epoch": 0.999678841493679, "grad_norm": 0.5607043002838983, "learning_rate": 1.7842660178426604e-08, "loss": 0.5211, "step": 34240 }, { "epoch": 0.9997080377215264, "grad_norm": 0.5440138354369283, "learning_rate": 1.6220600162206004e-08, "loss": 0.5743, "step": 34241 }, { "epoch": 0.9997372339493737, "grad_norm": 0.5259691687779517, "learning_rate": 1.4598540145985403e-08, "loss": 0.558, "step": 34242 }, { "epoch": 0.9997664301772211, "grad_norm": 0.5440334920181005, "learning_rate": 1.2976480129764801e-08, "loss": 0.609, "step": 34243 }, { "epoch": 0.9997956264050685, "grad_norm": 0.5250478963394257, "learning_rate": 1.1354420113544201e-08, "loss": 0.551, "step": 34244 }, { "epoch": 0.9998248226329158, "grad_norm": 0.53599337655369, "learning_rate": 9.7323600973236e-09, "loss": 0.6093, "step": 34245 }, { "epoch": 0.9998540188607632, "grad_norm": 0.5387306295721075, "learning_rate": 8.110300081103002e-09, "loss": 0.5945, "step": 34246 }, { "epoch": 0.9998832150886106, "grad_norm": 0.5785315482071987, "learning_rate": 6.488240064882401e-09, "loss": 0.6431, "step": 34247 }, { "epoch": 0.9999124113164579, "grad_norm": 0.5493504748146679, "learning_rate": 4.8661800486618e-09, "loss": 0.5687, "step": 34248 }, { "epoch": 0.9999416075443053, "grad_norm": 0.5160090030792226, "learning_rate": 3.2441200324412003e-09, "loss": 0.555, "step": 34249 }, { "epoch": 0.9999708037721526, "grad_norm": 0.5520459156655103, "learning_rate": 1.6220600162206002e-09, "loss": 0.6071, "step": 34250 }, { "epoch": 1.0, "grad_norm": 0.5288074379223691, "learning_rate": 0.0, "loss": 0.5754, "step": 34251 }, { "epoch": 1.0, "step": 34251, "total_flos": 487866325008384.0, "train_loss": 0.6296185475711923, "train_runtime": 53016.33, "train_samples_per_second": 10.336, "train_steps_per_second": 0.646 } ], "logging_steps": 1, "max_steps": 34251, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 487866325008384.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }